From e51bee58ecf8f684abfb0f1f9ac1806fb47b5efd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 24 Apr 2022 22:32:45 +0200
Subject: [PATCH 0001/1097] Remove useless "install" from CMake (step 1)

---
 CMakeLists.txt                              |  1 -
 base/glibc-compatibility/CMakeLists.txt     |  6 ----
 base/harmful/CMakeLists.txt                 |  1 -
 tests/CMakeLists.txt                        | 26 ---------------
 tests/integration/CMakeLists.txt            | 24 --------------
 utils/CMakeLists.txt                        |  5 ---
 utils/config-processor/CMakeLists.txt       |  2 --
 utils/config-processor/config-processor.cpp | 35 ---------------------
 utils/report/CMakeLists.txt                 |  1 -
 9 files changed, 101 deletions(-)
 delete mode 100644 tests/CMakeLists.txt
 delete mode 100644 tests/integration/CMakeLists.txt
 delete mode 100644 utils/config-processor/CMakeLists.txt
 delete mode 100644 utils/config-processor/config-processor.cpp
 delete mode 100644 utils/report/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e8b6e9217d2..bffdd810686 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -577,7 +577,6 @@ include (cmake/print_flags.cmake)
 add_subdirectory (base)
 add_subdirectory (src)
 add_subdirectory (programs)
-add_subdirectory (tests)
 add_subdirectory (utils)
 
 include (cmake/sanitize_target_link_libraries.cmake)
diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt
index ef7ec6d7fc0..37423bb68a6 100644
--- a/base/glibc-compatibility/CMakeLists.txt
+++ b/base/glibc-compatibility/CMakeLists.txt
@@ -43,12 +43,6 @@ if (GLIBC_COMPATIBILITY)
 
     target_link_libraries(global-libs INTERFACE glibc-compatibility ${MEMCPY_LIBRARY})
 
-    install(
-        TARGETS glibc-compatibility ${MEMCPY_LIBRARY}
-        EXPORT global
-        ARCHIVE DESTINATION lib
-    )
-
     message (STATUS "Some symbols from glibc will be replaced for compatibility")
 
 elseif (CLICKHOUSE_OFFICIAL_BUILD)
diff --git a/base/harmful/CMakeLists.txt b/base/harmful/CMakeLists.txt
index 399f6ecc625..c19661875be 100644
--- a/base/harmful/CMakeLists.txt
+++ b/base/harmful/CMakeLists.txt
@@ -1,2 +1 @@
 add_library(harmful harmful.c)
-install(TARGETS harmful EXPORT global ARCHIVE DESTINATION lib)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
deleted file mode 100644
index 22c89aaafa7..00000000000
--- a/tests/CMakeLists.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-enable_testing()
-
-# Run tests with "ninja check" or "make check"
-if (TARGET check)
-    message (STATUS "Target check already exists")
-else ()
-    include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake)
-endif ()
-
-option (ENABLE_CLICKHOUSE_TEST "Install clickhouse-test script and relevant tests scenarios" OFF)
-
-if (ENABLE_CLICKHOUSE_TEST)
-    install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    install (
-        DIRECTORY queries performance config
-        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse-test
-        USE_SOURCE_PERMISSIONS
-        COMPONENT clickhouse
-        PATTERN "CMakeLists.txt" EXCLUDE
-        PATTERN ".gitignore" EXCLUDE
-    )
-endif ()
-
-if (ENABLE_TEST_INTEGRATION)
-    add_subdirectory (integration)
-endif ()
diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt
deleted file mode 100644
index 68c695f57a0..00000000000
--- a/tests/integration/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-if(CLICKHOUSE_SPLIT_BINARY)
-    set (TEST_USE_BINARIES CLICKHOUSE_TESTS_SERVER_BIN_PATH=${ClickHouse_BINARY_DIR}/programs/clickhouse-server CLICKHOUSE_TESTS_CLIENT_BIN_PATH=${ClickHouse_BINARY_DIR}/programs/clickhouse-client)
-else()
-    set (TEST_USE_BINARIES CLICKHOUSE_TESTS_SERVER_BIN_PATH=${ClickHouse_BINARY_DIR}/programs/clickhouse CLICKHOUSE_TESTS_CLIENT_BIN_PATH=${ClickHouse_BINARY_DIR}/programs/clickhouse)
-endif()
-
-find_program(DOCKER_CMD docker)
-find_program(DOCKER_COMPOSE_CMD docker-compose)
-find_program(PYTEST_CMD pytest)
-find_program(SUDO_CMD sudo)
-
-# will mount only one binary to docker container - build with .so cant work
-if(USE_STATIC_LIBRARIES AND DOCKER_CMD)
-    if(INTEGRATION_USE_RUNNER AND SUDO_CMD)
-        add_test(NAME integration-runner WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${SUDO_CMD} ${CMAKE_CURRENT_SOURCE_DIR}/runner --binary ${ClickHouse_BINARY_DIR}/programs/clickhouse --configs-dir ${ClickHouse_SOURCE_DIR}/programs/server/)
-        message(STATUS "Using tests in docker with runner SUDO=${SUDO_CMD}; DOCKER=${DOCKER_CMD};")
-    endif()
-    if(NOT INTEGRATION_USE_RUNNER AND DOCKER_COMPOSE_CMD AND PYTEST_CMD)
-        # To run one test with debug:
-        # cmake . -DPYTEST_OPT="-ss;test_cluster_copier"
-        add_test(NAME integration-pytest WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND env ${TEST_USE_BINARIES} "CLICKHOUSE_TESTS_BASE_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/programs/server/" "CLICKHOUSE_TESTS_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/tests/config/" ${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT})
-        message(STATUS "Using tests in docker DOCKER=${DOCKER_CMD}; DOCKER_COMPOSE=${DOCKER_COMPOSE_CMD}; PYTEST=${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT}")
-    endif()
-endif()
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index 51300472ed1..d4f22d8065d 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -9,11 +9,6 @@ else()
 endif()
 include(../cmake/limit_jobs.cmake)
 
-# Utils used in package
-add_subdirectory (config-processor)
-add_subdirectory (report)
-
-# Not used in package
 if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS)
     add_subdirectory (compressor)
     add_subdirectory (iotest)
diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt
deleted file mode 100644
index 76c10b5f2fd..00000000000
--- a/utils/config-processor/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-add_executable (config-processor config-processor.cpp)
-target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log)
diff --git a/utils/config-processor/config-processor.cpp b/utils/config-processor/config-processor.cpp
deleted file mode 100644
index 242a6782b3b..00000000000
--- a/utils/config-processor/config-processor.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <Common/Config/ConfigProcessor.h>
-#include <iostream>
-
-int main(int argc, char ** argv)
-{
-    try
-    {
-        if (argc != 2)
-        {
-            std::cerr << "usage: " << argv[0] << " path" << std::endl;
-            return 3;
-        }
-
-        DB::ConfigProcessor processor(argv[1], false, true);
-        DB::XMLDocumentPtr document = processor.processConfig();
-        Poco::XML::DOMWriter().writeNode(std::cout, document);
-    }
-    catch (Poco::Exception & e)
-    {
-        std::cerr << "Exception: " << e.displayText() << std::endl;
-        return 1;
-    }
-    catch (std::exception & e)
-    {
-        std::cerr << "std::exception: " << e.what() << std::endl;
-        return 3;
-    }
-    catch (...)
-    {
-        std::cerr << "Some exception" << std::endl;
-        return 2;
-    }
-
-    return 0;
-}
diff --git a/utils/report/CMakeLists.txt b/utils/report/CMakeLists.txt
deleted file mode 100644
index e39dd155b15..00000000000
--- a/utils/report/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-install (PROGRAMS clickhouse-report DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

From f40b2b0ffbb7c7b7dd7f45a665d036395ae624fa Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 27 Jul 2022 01:26:40 +0200
Subject: [PATCH 0002/1097] WIP: window function parallel execution

---
 src/Processors/QueryPlan/WindowStep.cpp       |  17 ++-
 .../ScatterByPartitionTransform.cpp           | 127 ++++++++++++++++++
 .../Transforms/ScatterByPartitionTransform.h  |  34 +++++
 3 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 src/Processors/Transforms/ScatterByPartitionTransform.cpp
 create mode 100644 src/Processors/Transforms/ScatterByPartitionTransform.h

diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index b67b394b57b..67edf13997e 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -7,6 +7,11 @@
 #include <IO/Operators.h>
 #include <Common/JSONBuilder.h>
 
+#include "Columns/ColumnConst.h"
+#include "DataTypes/DataTypesNumber.h"
+#include "Functions/FunctionFactory.h"
+#include "Processors/Transforms/ScatterByPartitionTransform.h"
+
 namespace DB
 {
 
@@ -64,7 +69,17 @@ void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
     // This resize is needed for cases such as `over ()` when we don't have a
     // sort node, and the input might have multiple streams. The sort node would
     // have resized it.
-    pipeline.resize(1);
+    Block header = pipeline.getHeader();
+    if (!window_description.partition_by.empty())
+    {
+        ColumnNumbers key_columns;
+        key_columns.reserve(window_description.partition_by.size());
+        for (auto & col : window_description.partition_by)
+        {
+            key_columns.push_back(header.getPositionByName(col.column_name));
+        }
+        pipeline.addTransform(std::make_shared<ScatterByPartitionTransform>(header, pipeline.getNumThreads(), std::move(key_columns)));
+    }
 
     pipeline.addSimpleTransform(
         [&](const Block & /*header*/)
diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.cpp b/src/Processors/Transforms/ScatterByPartitionTransform.cpp
new file mode 100644
index 00000000000..ec2add1b9d0
--- /dev/null
+++ b/src/Processors/Transforms/ScatterByPartitionTransform.cpp
@@ -0,0 +1,127 @@
+#include <utility>
+#include <utility>
+#include <Processors/Transforms/ScatterByPartitionTransform.h>
+
+#include "Common/PODArray.h"
+#include "Core/ColumnNumbers.h"
+
+namespace DB
+{
+ScatterByPartitionTransform::ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_)
+    : IProcessor(InputPorts{header}, OutputPorts{output_size_, header})
+    , output_size(output_size_)
+    , key_columns(std::move(key_columns_))
+    , hash(0)
+{}
+
+IProcessor::Status ScatterByPartitionTransform::prepare()
+{
+    auto & input = getInputs().front();
+
+    /// Check all outputs are finished or ready to get data.
+
+    bool all_finished = true;
+    for (auto & output : outputs)
+    {
+        if (output.isFinished())
+            continue;
+
+        all_finished = false;
+    }
+
+    if (all_finished)
+    {
+        input.close();
+        return Status::Finished;
+    }
+
+    if (!all_outputs_processed)
+        return Status::Ready;
+
+    /// Try get chunk from input.
+
+    if (input.isFinished())
+    {
+        for (auto & output : outputs)
+            output.finish();
+
+        return Status::Finished;
+    }
+
+    input.setNeeded();
+    if (!input.hasData())
+        return Status::NeedData;
+
+    chunk = input.pull();
+    has_data = true;
+    was_output_processed.assign(outputs.size(), false);
+
+    return Status::Ready;
+}
+
+void ScatterByPartitionTransform::work()
+{
+    if (all_outputs_processed)
+        generateOutputChunks();
+    all_outputs_processed = true;
+
+    size_t chunk_number = 0;
+    for (auto & output : outputs)
+    {
+        auto & was_processed = was_output_processed[chunk_number];
+        auto & output_chunk = output_chunks[chunk_number];
+        ++chunk_number;
+
+        if (was_processed)
+            continue;
+
+        if (output.isFinished())
+            continue;
+
+        if (!output.canPush())
+        {
+            all_outputs_processed = false;
+            continue;
+        }
+
+        output.push(std::move(output_chunk));
+        was_processed = true;
+    }
+
+    if (all_outputs_processed)
+    {
+        has_data = false;
+        output_chunks.clear();
+    }
+}
+
+void ScatterByPartitionTransform::generateOutputChunks()
+{
+    auto num_rows = chunk.getNumRows();
+    const auto & columns = chunk.getColumns();
+
+    hash.reset(num_rows);
+
+    for (const auto & column_number : key_columns)
+        columns[column_number]->updateWeakHash32(hash);
+
+    const auto & hash_data = hash.getData();
+    IColumn::Selector selector(num_rows);
+
+    for (size_t row = 0; row < num_rows; ++row)
+    {
+        selector[row] = hash_data[row]; /// [0, 2^32)
+        selector[row] *= output_size; /// [0, output_size * 2^32), selector stores 64 bit values.
+        selector[row] >>= 32u; /// [0, output_size)
+    }
+
+    output_chunks.resize(output_size);
+    for (const auto & column : columns)
+    {
+        auto filtered_columns = column->scatter(output_size, selector);
+        for (size_t i = 0; i < output_size; ++i)
+            output_chunks[i].addColumn(std::move(filtered_columns[i]));
+    }
+}
+
+}
diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.h b/src/Processors/Transforms/ScatterByPartitionTransform.h
new file mode 100644
index 00000000000..78a86d4a1db
--- /dev/null
+++ b/src/Processors/Transforms/ScatterByPartitionTransform.h
@@ -0,0 +1,34 @@
+#pragma once
+#include "Common/WeakHash.h"
+#include "Core/ColumnNumbers.h"
+#include "Processors/IProcessor.h"
+
+namespace DB
+{
+
+struct ScatterByPartitionTransform : IProcessor
+{
+    ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_);
+
+    String getName() const override { return "ScatterByPartitionTransform"; }
+
+    Status prepare() override;
+    void work() override;
+
+private:
+
+    void generateOutputChunks();
+
+    size_t output_size;
+    ColumnNumbers key_columns;
+
+    bool has_data = false;
+    bool all_outputs_processed = true;
+    std::vector<char> was_output_processed;
+    Chunk chunk;
+
+    WeakHash32 hash;
+    Chunks output_chunks;
+};
+
+}

From 64d71b0ce0c8dbb0d4907f8728896f8f98c7fea1 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 29 Jul 2022 19:44:10 +0200
Subject: [PATCH 0003/1097] Do scatter before sort

---
 src/Interpreters/InterpreterSelectQuery.cpp   |  1 +
 src/Processors/QueryPlan/SortingStep.cpp      | 61 ++++++++++++++++++-
 src/Processors/QueryPlan/SortingStep.h        | 17 ++++++
 src/Processors/QueryPlan/WindowStep.cpp       | 13 +---
 .../ScatterByPartitionTransform.cpp           | 16 +++--
 .../Transforms/ScatterByPartitionTransform.h  |  6 +-
 6 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index de01115abec..cc5fc5bd1bb 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2561,6 +2561,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
             auto sorting_step = std::make_unique<SortingStep>(
                 query_plan.getCurrentDataStream(),
                 window.full_sort_description,
+                window.partition_by,
                 settings.max_block_size,
                 0 /* LIMIT */,
                 SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode),
diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index 46588ada225..2760c04dc19 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -1,3 +1,4 @@
+#include <memory>
 #include <stdexcept>
 #include <IO/Operators.h>
 #include <Processors/Merges/MergingSortedTransform.h>
@@ -9,6 +10,9 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Common/JSONBuilder.h>
 
+#include "Processors/ResizeProcessor.h"
+#include "Processors/Transforms/ScatterByPartitionTransform.h"
+
 namespace DB
 {
 
@@ -55,6 +59,15 @@ SortingStep::SortingStep(
     output_stream->sort_mode = DataStream::SortMode::Stream;
 }
 
+SortingStep::SortingStep(const DataStream& input_stream, const SortDescription& description_,
+        const SortDescription& partition_by_description_, size_t max_block_size_, UInt64 limit_, SizeLimits size_limits_,
+        size_t max_bytes_before_remerge_, double remerge_lowered_memory_bytes_ratio_,
+        size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_, size_t min_free_disk_space_)
+    : SortingStep(input_stream, description_, max_block_size_, limit_, size_limits_, max_bytes_before_remerge_, remerge_lowered_memory_bytes_ratio_, max_bytes_before_external_sort_, tmp_volume_, min_free_disk_space_)
+{
+    partition_by_description = partition_by_description_;
+}
+
 SortingStep::SortingStep(
     const DataStream & input_stream_,
     SortDescription prefix_description_,
@@ -160,6 +173,52 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
     }
     else if (type == Type::Full)
     {
+        size_t threads = pipeline.getNumThreads();
+        size_t streams = pipeline.getNumStreams();
+
+        if (!partition_by_description.empty() && threads > 1)
+        {
+            Block stream_header = pipeline.getHeader();
+
+            ColumnNumbers key_columns;
+            key_columns.reserve(partition_by_description.size());
+            for (auto & col : partition_by_description)
+            {
+                key_columns.push_back(stream_header.getPositionByName(col.column_name));
+            }
+
+            pipeline.transform([&](OutputPortRawPtrs ports)
+            {
+                Processors processors;
+                for (auto * port : ports)
+                {
+                    auto scatter = std::make_shared<ScatterByPartitionTransform>(stream_header, threads, key_columns);
+                    connect(*port, scatter->getInputs().front());
+                    processors.push_back(scatter);
+                }
+                return processors;
+            });
+
+            if (streams > 1)
+            {
+                pipeline.transform([&](OutputPortRawPtrs ports)
+               {
+                   Processors processors;
+                   for (size_t i = 0; i < threads; ++i)
+                   {
+                       size_t output_it = i;
+                       auto resize = std::make_shared<ResizeProcessor>(ports[output_it]->getHeader(), streams, 1);
+                       auto & inputs = resize->getInputs();
+
+                       for (auto input_it = inputs.begin(); input_it != inputs.end(); output_it += threads, ++input_it)
+                           connect(*ports[output_it], *input_it);
+                       processors.push_back(resize);
+                   }
+                   return processors;
+               });
+            }
+        }
+
         pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr
         {
             if (stream_type != QueryPipelineBuilder::StreamType::Main)
@@ -206,7 +265,7 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
         });
 
         /// If there are several streams, then we merge them into one
-        if (pipeline.getNumStreams() > 1)
+        if (pipeline.getNumStreams() > 1 && partition_by_description.empty())
         {
             auto transform = std::make_shared<MergingSortedTransform>(
                     pipeline.getHeader(),
diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h
index ce78bb863bf..b67fd748c1f 100644
--- a/src/Processors/QueryPlan/SortingStep.h
+++ b/src/Processors/QueryPlan/SortingStep.h
@@ -24,6 +24,20 @@ public:
         VolumePtr tmp_volume_,
         size_t min_free_disk_space_);
 
+    /// Full with partitioning
+    SortingStep(
+        const DataStream & input_stream,
+        const SortDescription & description_,
+        const SortDescription & partition_by_description_,
+        size_t max_block_size_,
+        UInt64 limit_,
+        SizeLimits size_limits_,
+        size_t max_bytes_before_remerge_,
+        double remerge_lowered_memory_bytes_ratio_,
+        size_t max_bytes_before_external_sort_,
+        VolumePtr tmp_volume_,
+        size_t min_free_disk_space_);
+
     /// FinishSorting
     SortingStep(
         const DataStream & input_stream_,
@@ -67,6 +81,9 @@ private:
 
     SortDescription prefix_description;
     SortDescription result_description;
+
+    SortDescription partition_by_description;
+
     size_t max_block_size;
     UInt64 limit;
     SizeLimits size_limits;
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index 67edf13997e..545bb5f4f79 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -69,17 +69,8 @@ void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
     // This resize is needed for cases such as `over ()` when we don't have a
     // sort node, and the input might have multiple streams. The sort node would
     // have resized it.
-    Block header = pipeline.getHeader();
-    if (!window_description.partition_by.empty())
-    {
-        ColumnNumbers key_columns;
-        key_columns.reserve(window_description.partition_by.size());
-        for (auto & col : window_description.partition_by)
-        {
-            key_columns.push_back(header.getPositionByName(col.column_name));
-        }
-        pipeline.addTransform(std::make_shared<ScatterByPartitionTransform>(header, pipeline.getNumThreads(), std::move(key_columns)));
-    }
+    if (window_description.full_sort_description.empty())
+        pipeline.resize(1);
 
     pipeline.addSimpleTransform(
         [&](const Block & /*header*/)
diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.cpp b/src/Processors/Transforms/ScatterByPartitionTransform.cpp
index ec2add1b9d0..336371dae8b 100644
--- a/src/Processors/Transforms/ScatterByPartitionTransform.cpp
+++ b/src/Processors/Transforms/ScatterByPartitionTransform.cpp
@@ -1,9 +1,7 @@
-#include <utility>
-#include <utility>
 #include <Processors/Transforms/ScatterByPartitionTransform.h>
 
-#include "Common/PODArray.h"
-#include "Core/ColumnNumbers.h"
+#include <Common/PODArray.h>
+#include <Core/ColumnNumbers.h>
 
 namespace DB
 {
@@ -36,8 +34,16 @@ IProcessor::Status ScatterByPartitionTransform::prepare()
     }
 
     if (!all_outputs_processed)
+    {
+        auto output_it = outputs.begin();
+        bool can_push = false;
+        for (size_t i = 0; i < output_size; ++i, ++output_it)
+            if (!was_output_processed[i] && output_it->canPush())
+                can_push = true;
+        if (!can_push)
+            return Status::PortFull;
         return Status::Ready;
-
+    }
     /// Try get chunk from input.
 
     if (input.isFinished())
diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.h b/src/Processors/Transforms/ScatterByPartitionTransform.h
index 78a86d4a1db..327f6dd62b4 100644
--- a/src/Processors/Transforms/ScatterByPartitionTransform.h
+++ b/src/Processors/Transforms/ScatterByPartitionTransform.h
@@ -1,7 +1,7 @@
 #pragma once
-#include "Common/WeakHash.h"
-#include "Core/ColumnNumbers.h"
-#include "Processors/IProcessor.h"
+#include <Common/WeakHash.h>
+#include <Core/ColumnNumbers.h>
+#include <Processors/IProcessor.h>
 
 namespace DB
 {

From cd96bcad98f89b77b39bb7170f6ac4b27ebccb51 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 1 Aug 2022 16:33:06 +0200
Subject: [PATCH 0004/1097] Produce several outputs in FinishSort

---
 src/Processors/QueryPlan/SortingStep.cpp | 99 +++++++++++++-----------
 src/Processors/QueryPlan/SortingStep.h   |  1 +
 2 files changed, 54 insertions(+), 46 deletions(-)

diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index 2760c04dc19..d0170026f0f 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -124,12 +124,61 @@ void SortingStep::convertToFinishSorting(SortDescription prefix_description_)
     prefix_description = std::move(prefix_description_);
 }
 
+void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline)
+{
+    size_t threads = pipeline.getNumThreads();
+    size_t streams = pipeline.getNumStreams();
+
+    if (!partition_by_description.empty() && threads > 1)
+    {
+        Block stream_header = pipeline.getHeader();
+
+        ColumnNumbers key_columns;
+        key_columns.reserve(partition_by_description.size());
+        for (auto & col : partition_by_description)
+        {
+            key_columns.push_back(stream_header.getPositionByName(col.column_name));
+        }
+
+        pipeline.transform([&](OutputPortRawPtrs ports)
+        {
+            Processors processors;
+            for (auto * port : ports)
+            {
+                auto scatter = std::make_shared<ScatterByPartitionTransform>(stream_header, threads, key_columns);
+                connect(*port, scatter->getInputs().front());
+                processors.push_back(scatter);
+            }
+            return processors;
+        });
+
+        if (streams > 1)
+        {
+            pipeline.transform([&](OutputPortRawPtrs ports)
+            {
+                Processors processors;
+                for (size_t i = 0; i < threads; ++i)
+                {
+                    size_t output_it = i;
+                    auto resize = std::make_shared<ResizeProcessor>(ports[output_it]->getHeader(), streams, 1);
+                    auto & inputs = resize->getInputs();
+
+                    for (auto input_it = inputs.begin(); input_it != inputs.end(); output_it += threads, ++input_it)
+                        connect(*ports[output_it], *input_it);
+                    processors.push_back(resize);
+                }
+                return processors;
+            });
+        }
+    }
+}
+
 void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
     if (type == Type::FinishSorting)
     {
         bool need_finish_sorting = (prefix_description.size() < result_description.size());
-        if (pipeline.getNumStreams() > 1)
+        if (pipeline.getNumStreams() > 1 && partition_by_description.empty())
         {
             UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit);
             auto transform = std::make_shared<MergingSortedTransform>(
@@ -143,6 +192,8 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
             pipeline.addTransform(std::move(transform));
         }
 
+        scatterByPartitionIfNeeded(pipeline);
+
         if (need_finish_sorting)
         {
             pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr
@@ -173,51 +224,7 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
     }
     else if (type == Type::Full)
     {
-        size_t threads = pipeline.getNumThreads();
-        size_t streams = pipeline.getNumStreams();
-
-        if (!partition_by_description.empty() && threads > 1)
-        {
-            Block stream_header = pipeline.getHeader();
-
-            ColumnNumbers key_columns;
-            key_columns.reserve(partition_by_description.size());
-            for (auto & col : partition_by_description)
-            {
-                key_columns.push_back(stream_header.getPositionByName(col.column_name));
-            }
-
-            pipeline.transform([&](OutputPortRawPtrs ports)
-            {
-                Processors processors;
-                for (auto * port : ports)
-                {
-                    auto scatter = std::make_shared<ScatterByPartitionTransform>(stream_header, threads, key_columns);
-                    connect(*port, scatter->getInputs().front());
-                    processors.push_back(scatter);
-                }
-                return processors;
-            });
-
-            if (streams > 1)
-            {
-                pipeline.transform([&](OutputPortRawPtrs ports)
-               {
-                   Processors processors;
-                   for (size_t i = 0; i < threads; ++i)
-                   {
-                       size_t output_it = i;
-                       auto resize = std::make_shared<ResizeProcessor>(ports[output_it]->getHeader(), streams, 1);
-                       auto & inputs = resize->getInputs();
-
-                       for (auto input_it = inputs.begin(); input_it != inputs.end(); output_it += threads, ++input_it)
-                           connect(*ports[output_it], *input_it);
-                       processors.push_back(resize);
-                   }
-                   return processors;
-               });
-            }
-        }
+        scatterByPartitionIfNeeded(pipeline);
 
         pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr
         {
diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h
index b67fd748c1f..e3d41f921d7 100644
--- a/src/Processors/QueryPlan/SortingStep.h
+++ b/src/Processors/QueryPlan/SortingStep.h
@@ -68,6 +68,7 @@ public:
     void convertToFinishSorting(SortDescription prefix_description);
 
 private:
+    void scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline);
     void updateOutputStream() override;
 
     enum class Type

From 8ad8e32f232106a242ff6e9c4b65c720be3e284c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 27 Feb 2023 05:26:38 +0100
Subject: [PATCH 0005/1097] Tune the concurrency settings

---
 programs/server/config.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index 85cb299e188..3787415c553 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -307,10 +307,10 @@
          Query can upscale to desired number of threads during execution if more threads become available.
     -->
     <concurrent_threads_soft_limit_num>0</concurrent_threads_soft_limit_num>
-    <concurrent_threads_soft_limit_ratio_to_cores>0</concurrent_threads_soft_limit_ratio_to_cores>
+    <concurrent_threads_soft_limit_ratio_to_cores>2</concurrent_threads_soft_limit_ratio_to_cores>
 
     <!-- Maximum number of concurrent queries. -->
-    <max_concurrent_queries>100</max_concurrent_queries>
+    <max_concurrent_queries>1000</max_concurrent_queries>
 
     <!-- Maximum memory usage (resident set size) for server process.
          Zero value or unset means default. Default is "max_server_memory_usage_to_ram_ratio" of available physical RAM.

From 4ba8d846940d4d9fff1c4b2664bcff0744f46f4b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 27 Feb 2023 07:10:02 +0100
Subject: [PATCH 0006/1097] Adjust tests

---
 tests/queries/0_stateless/01275_parallel_mv.sql.j2             | 3 ++-
 .../01283_max_threads_simple_query_optimization.sql            | 2 +-
 tests/queries/0_stateless/01287_max_execution_speed.sql        | 2 +-
 tests/queries/0_stateless/01356_view_threads.sql               | 3 +++
 tests/queries/0_stateless/01358_union_threads_bug.sql          | 3 +++
 .../0_stateless/01415_overlimiting_threads_for_repica_bug.sql  | 3 +++
 .../01524_do_not_merge_across_partitions_select_final.sql      | 3 +++
 tests/queries/0_stateless/02015_global_in_threads.sh           | 2 ++
 tests/queries/1_stateful/00097_constexpr_in_index.sql          | 2 +-
 9 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2
index 6b17a141d3e..6f506b1f1ce 100644
--- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2
+++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2
@@ -1,5 +1,6 @@
--- Tags: no-s3-storage
+-- Tags: no-s3-storage, no-parallel
 -- no-s3-storage: s3 has 20 more threads
+-- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
 
 -- avoid settings randomization by clickhouse-test
 set max_threads = 0;
diff --git a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql
index f57ebc10da2..2c04e7d1b8e 100644
--- a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql
+++ b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql
@@ -18,7 +18,7 @@ SYSTEM FLUSH LOGS;
 -- 1 for PullingAsyncPipelineExecutor::pull
 SELECT
     throwIf(count() != 1, 'no query was logged'),
-    throwIf(length(thread_ids) != 2, 'too many threads used')
+    throwIf(length(thread_ids) > 2, 'too many threads used')
 FROM system.query_log
 WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND query LIKE '%data_01283 LIMIT 1%'
 GROUP BY thread_ids
diff --git a/tests/queries/0_stateless/01287_max_execution_speed.sql b/tests/queries/0_stateless/01287_max_execution_speed.sql
index 6e6c71e445c..104c10a323d 100644
--- a/tests/queries/0_stateless/01287_max_execution_speed.sql
+++ b/tests/queries/0_stateless/01287_max_execution_speed.sql
@@ -38,7 +38,7 @@ SET max_execution_speed_bytes = 0;
 -- with 'max_execution_speed' and similar limits like 'priority' and 'max_network_bandwidth'
 
 -- Note: I have to disable this part of the test because it actually can work slower under sanitizers,
--- with debug builds and in presense of random system hickups in our CI environment.
+-- with debug builds and in presence of random system hiccups in our CI environment.
 
 --SET max_execution_speed = 1000000, min_execution_speed = 2000000;
 -- And this query will work despite the fact that the above settings look contradictory.
diff --git a/tests/queries/0_stateless/01356_view_threads.sql b/tests/queries/0_stateless/01356_view_threads.sql
index 17b95324a00..dd84f56a15a 100644
--- a/tests/queries/0_stateless/01356_view_threads.sql
+++ b/tests/queries/0_stateless/01356_view_threads.sql
@@ -1,3 +1,6 @@
+-- Tags: no-parallel
+-- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
+
 drop table if exists table_01356_view_threads;
 
 create view table_01356_view_threads as select number % 10 as g, sum(number) as s from numbers_mt(1000000) group by g;
diff --git a/tests/queries/0_stateless/01358_union_threads_bug.sql b/tests/queries/0_stateless/01358_union_threads_bug.sql
index 8366d3f12a0..6949524e23c 100644
--- a/tests/queries/0_stateless/01358_union_threads_bug.sql
+++ b/tests/queries/0_stateless/01358_union_threads_bug.sql
@@ -1,3 +1,6 @@
+-- Tags: no-parallel
+-- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
+
 set log_queries = 1;
 set max_threads = 16;
 
diff --git a/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql b/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql
index 6b5c2ac8ffd..a47a49b8ac1 100644
--- a/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql
+++ b/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql
@@ -1,3 +1,6 @@
+-- Tags: no-parallel
+-- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
+
 set log_queries = 1;
 set max_threads = 16;
 set prefer_localhost_replica = 1;
diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
index e3bc8cf6e72..cc642d92260 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
@@ -1,3 +1,6 @@
+-- Tags: no-parallel
+-- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
+
 DROP TABLE IF EXISTS select_final;
 
 SET allow_asynchronous_read_from_io_pool_for_merge_tree = 0;
diff --git a/tests/queries/0_stateless/02015_global_in_threads.sh b/tests/queries/0_stateless/02015_global_in_threads.sh
index 9437187d462..7a02bc64425 100755
--- a/tests/queries/0_stateless/02015_global_in_threads.sh
+++ b/tests/queries/0_stateless/02015_global_in_threads.sh
@@ -1,4 +1,6 @@
 #!/usr/bin/env bash
+# Tags: no-parallel
+# no-parallel: it checks the number of threads, which can be lowered in presence of other queries
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/1_stateful/00097_constexpr_in_index.sql b/tests/queries/1_stateful/00097_constexpr_in_index.sql
index b5cac75c767..9da7aa4eaf4 100644
--- a/tests/queries/1_stateful/00097_constexpr_in_index.sql
+++ b/tests/queries/1_stateful/00097_constexpr_in_index.sql
@@ -1,3 +1,3 @@
--- Even in presense of OR, we evaluate the "0 IN (1, 2, 3)" as a constant expression therefore it does not prevent the index analysis.
+-- Even in presence of OR, we evaluate the "0 IN (1, 2, 3)" as a constant expression therefore it does not prevent the index analysis.
 
 SELECT count() FROM test.hits WHERE CounterID IN (14917930, 33034174) OR 0 IN (1, 2, 3) SETTINGS max_rows_to_read = 1000000, force_primary_key = 1;

From 14386ea767fa88ae4d3ad4fb666e63c7aed39d23 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 27 Feb 2023 19:51:44 +0300
Subject: [PATCH 0007/1097] Update 01275_parallel_mv.sql.j2

---
 tests/queries/0_stateless/01275_parallel_mv.sql.j2 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2
index 6f506b1f1ce..047b1cc3ee7 100644
--- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2
+++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2
@@ -1,4 +1,4 @@
--- Tags: no-s3-storage, no-parallel
+-- Tags: no-s3-storage, no-parallel, no-fasttest
 -- no-s3-storage: s3 has 20 more threads
 -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
 

From c13f5e6a1c1e1c1fcee6b6202313680e18085a96 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 27 Feb 2023 19:51:58 +0300
Subject: [PATCH 0008/1097] Update 01356_view_threads.sql

---
 tests/queries/0_stateless/01356_view_threads.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01356_view_threads.sql b/tests/queries/0_stateless/01356_view_threads.sql
index dd84f56a15a..370f6b3cc93 100644
--- a/tests/queries/0_stateless/01356_view_threads.sql
+++ b/tests/queries/0_stateless/01356_view_threads.sql
@@ -1,4 +1,4 @@
--- Tags: no-parallel
+-- Tags: no-parallel, no-fasttest
 -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
 
 drop table if exists table_01356_view_threads;

From 94ea37edcdba606b23060eff6050ca71e33f88cd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 27 Feb 2023 19:52:09 +0300
Subject: [PATCH 0009/1097] Update 01358_union_threads_bug.sql

---
 tests/queries/0_stateless/01358_union_threads_bug.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01358_union_threads_bug.sql b/tests/queries/0_stateless/01358_union_threads_bug.sql
index 6949524e23c..b8116431a04 100644
--- a/tests/queries/0_stateless/01358_union_threads_bug.sql
+++ b/tests/queries/0_stateless/01358_union_threads_bug.sql
@@ -1,4 +1,4 @@
--- Tags: no-parallel
+-- Tags: no-parallel, no-fasttest
 -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
 
 set log_queries = 1;

From 18f5827fd166064d861c3c2485bb342630f0911a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 27 Feb 2023 19:52:20 +0300
Subject: [PATCH 0010/1097] Update
 01415_overlimiting_threads_for_repica_bug.sql

---
 .../0_stateless/01415_overlimiting_threads_for_repica_bug.sql   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql b/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql
index a47a49b8ac1..16faeb23ec1 100644
--- a/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql
+++ b/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql
@@ -1,4 +1,4 @@
--- Tags: no-parallel
+-- Tags: no-parallel, no-fasttest
 -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
 
 set log_queries = 1;

From 8935f79c40097903baae2e33c867e39ef8ca3dfa Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 27 Feb 2023 19:52:29 +0300
Subject: [PATCH 0011/1097] Update
 01524_do_not_merge_across_partitions_select_final.sql

---
 .../01524_do_not_merge_across_partitions_select_final.sql       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
index cc642d92260..b1ab7b157e2 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
@@ -1,4 +1,4 @@
--- Tags: no-parallel
+-- Tags: no-parallel, no-fasttest
 -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
 
 DROP TABLE IF EXISTS select_final;

From 297f5c24b27ecfefc7fd49ee5d4b8c17e535dd7b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 27 Feb 2023 19:52:42 +0300
Subject: [PATCH 0012/1097] Update 02015_global_in_threads.sh

---
 tests/queries/0_stateless/02015_global_in_threads.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02015_global_in_threads.sh b/tests/queries/0_stateless/02015_global_in_threads.sh
index 7a02bc64425..3845c55511c 100755
--- a/tests/queries/0_stateless/02015_global_in_threads.sh
+++ b/tests/queries/0_stateless/02015_global_in_threads.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-parallel
+# Tags: no-parallel, no-fasttest
 # no-parallel: it checks the number of threads, which can be lowered in presence of other queries
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

From 3d4c3a695f318514419c5f1c281380d764082a56 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Mar 2023 01:19:57 +0100
Subject: [PATCH 0013/1097] Update a test

---
 tests/queries/0_stateless/02350_views_max_insert_threads.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02350_views_max_insert_threads.sql b/tests/queries/0_stateless/02350_views_max_insert_threads.sql
index e19ad465b49..b8883a8d6e3 100644
--- a/tests/queries/0_stateless/02350_views_max_insert_threads.sql
+++ b/tests/queries/0_stateless/02350_views_max_insert_threads.sql
@@ -1,3 +1,5 @@
+-- Tags: no-parallel
+-- no-parallel: it checks the number of threads, which can be lowered in presence of other queries
 -- https://github.com/ClickHouse/ClickHouse/issues/37900
 
 drop table if exists t;

From c267c15b9c3bad400f3ba2dc06987b5e205f688c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Mar 2023 01:26:13 +0100
Subject: [PATCH 0014/1097] Update a test

---
 .../configs/config_default.xml                                   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_concurrent_threads_soft_limit/configs/config_default.xml b/tests/integration/test_concurrent_threads_soft_limit/configs/config_default.xml
index a63e91f41fb..6f0623a42bf 100644
--- a/tests/integration/test_concurrent_threads_soft_limit/configs/config_default.xml
+++ b/tests/integration/test_concurrent_threads_soft_limit/configs/config_default.xml
@@ -1,4 +1,5 @@
 <clickhouse>
+    <concurrent_threads_soft_limit_ratio_to_cores>0</concurrent_threads_soft_limit_ratio_to_cores>
     <query_log>
       <database>system</database>
       <table>query_log</table>

From 5908b9cbee7ff649b617a7c24198b1fd01878e42 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Mar 2023 21:38:26 +0100
Subject: [PATCH 0015/1097] Update a test

---
 .../test_overcommit_tracker/configs/config.d/config.xml       | 4 ++++
 tests/integration/test_overcommit_tracker/test.py             | 3 +++
 2 files changed, 7 insertions(+)
 create mode 100644 tests/integration/test_overcommit_tracker/configs/config.d/config.xml

diff --git a/tests/integration/test_overcommit_tracker/configs/config.d/config.xml b/tests/integration/test_overcommit_tracker/configs/config.d/config.xml
new file mode 100644
index 00000000000..f5c8117511f
--- /dev/null
+++ b/tests/integration/test_overcommit_tracker/configs/config.d/config.xml
@@ -0,0 +1,4 @@
+<clickhouse>
+    <!-- Keep the number of threads consistent across queries in the test -->
+    <concurrent_threads_soft_limit_ratio_to_cores>0</concurrent_threads_soft_limit_ratio_to_cores>
+</clickhouse>
diff --git a/tests/integration/test_overcommit_tracker/test.py b/tests/integration/test_overcommit_tracker/test.py
index f758c3629b3..e364ec3fe4d 100644
--- a/tests/integration/test_overcommit_tracker/test.py
+++ b/tests/integration/test_overcommit_tracker/test.py
@@ -6,6 +6,9 @@ cluster = ClickHouseCluster(__file__)
 
 node = cluster.add_instance(
     "node",
+    main_configs=[
+        "configs/config.d/config.xml"
+    ],
     user_configs=[
         "configs/users.d/users.xml",
     ],

From 8f310fb1525479d9cffc1a05a7bd64e8211e0922 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Mar 2023 21:44:01 +0100
Subject: [PATCH 0016/1097] Update a test

---
 tests/queries/0_stateless/00963_achimbab.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/00963_achimbab.sql b/tests/queries/0_stateless/00963_achimbab.sql
index 758ecf5acf3..60fd1911e85 100644
--- a/tests/queries/0_stateless/00963_achimbab.sql
+++ b/tests/queries/0_stateless/00963_achimbab.sql
@@ -1,3 +1,5 @@
+-- Tags: no-parallel, long
+
 SET output_format_write_statistics = 0;
 
 select 

From e2fc2e31ca7d8dfc9cd078118da1e4f7eb6a1d75 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Mar 2023 21:48:21 +0100
Subject: [PATCH 0017/1097] Fix typo

---
 .../0_stateless/01605_adaptive_granularity_block_borders.sql  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql
index ca7d0f3c950..340950f16fe 100644
--- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql
+++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql
@@ -5,8 +5,8 @@ SET allow_prefetched_read_pool_for_remote_filesystem=0;
 
 DROP TABLE IF EXISTS adaptive_table;
 
---- If granularity of consequent blocks differs a lot, then adaptive
---- granularity will adjust amout of marks correctly. Data for test empirically
+--- If the granularity of consequent blocks differs a lot, then adaptive
+--- granularity will adjust the amount of marks correctly. Data for test empirically
 --- derived, it's quite hard to get good parameters.
 
 CREATE TABLE adaptive_table(

From 17a0943b30e0a42b395c499f70f7934d920fed8a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Mar 2023 21:49:02 +0100
Subject: [PATCH 0018/1097] Update a test

---
 tests/queries/0_stateless/01091_num_threads.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/01091_num_threads.sql b/tests/queries/0_stateless/01091_num_threads.sql
index 0d2a66a8c2e..9fc82b470c9 100644
--- a/tests/queries/0_stateless/01091_num_threads.sql
+++ b/tests/queries/0_stateless/01091_num_threads.sql
@@ -1,3 +1,5 @@
+-- Tags: no-parallel
+
 set log_queries=1;
 set log_query_threads=1;
 set max_threads=0;

From 0be8cd5bce1488cb220f4ab64bf7fadd69ca087a Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Mar 2023 21:03:25 +0000
Subject: [PATCH 0019/1097] Automatic style fix

---
 tests/integration/test_overcommit_tracker/test.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/integration/test_overcommit_tracker/test.py b/tests/integration/test_overcommit_tracker/test.py
index e364ec3fe4d..a2ddbbda9fe 100644
--- a/tests/integration/test_overcommit_tracker/test.py
+++ b/tests/integration/test_overcommit_tracker/test.py
@@ -6,9 +6,7 @@ cluster = ClickHouseCluster(__file__)
 
 node = cluster.add_instance(
     "node",
-    main_configs=[
-        "configs/config.d/config.xml"
-    ],
+    main_configs=["configs/config.d/config.xml"],
     user_configs=[
         "configs/users.d/users.xml",
     ],

From b680b1ac1ed8623e5dc57707a5fec74726f0c9fb Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 11 Jun 2023 23:58:49 +0000
Subject: [PATCH 0020/1097] propagate insertion values storage into subquery

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 36 +++++++++----------
 src/Analyzer/Passes/QueryAnalysisPass.h       |  4 ++-
 src/Analyzer/QueryTreePassManager.cpp         |  4 +--
 src/Analyzer/QueryTreePassManager.h           |  3 +-
 .../InterpreterSelectQueryAnalyzer.cpp        |  5 ++-
 5 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index aa915e48d35..4ef01524ac8 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1056,7 +1056,7 @@ private:
 class QueryAnalyzer
 {
 public:
-    void resolve(QueryTreeNodePtr node, const QueryTreeNodePtr & table_expression, ContextPtr context)
+    void resolve(QueryTreeNodePtr node, const QueryTreeNodePtr & table_expression, ContextPtr context, const StoragePtr & storage = nullptr)
     {
         IdentifierResolveScope scope(node, nullptr /*parent_scope*/);
 
@@ -1073,7 +1073,7 @@ public:
                     throw Exception(ErrorCodes::LOGICAL_ERROR,
                         "For query analysis table expression must be empty");
 
-                resolveQuery(node, scope);
+                resolveQuery(node, scope, storage);
                 break;
             }
             case QueryTreeNodeType::UNION:
@@ -1187,7 +1187,7 @@ private:
 
     QueryTreeNodePtr tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context);
 
-    void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, IdentifierResolveScope & scope);
+    void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, IdentifierResolveScope & scope, const StoragePtr & storage = nullptr);
 
     static void mergeWindowWithParentWindow(const QueryTreeNodePtr & window_node, const QueryTreeNodePtr & parent_window_node, IdentifierResolveScope & scope);
 
@@ -1292,9 +1292,9 @@ private:
 
     ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope);
 
-    ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
+    ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const StoragePtr & storage = nullptr);
 
-    ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
+    ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const StoragePtr & storage = nullptr);
 
     ProjectionNames resolveSortNodeList(QueryTreeNodePtr & sort_node_list, IdentifierResolveScope & scope);
 
@@ -1302,7 +1302,7 @@ private:
 
     void resolveWindowNodeList(QueryTreeNodePtr & window_node_list, IdentifierResolveScope & scope);
 
-    NamesAndTypes resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope);
+    NamesAndTypes resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope, const StoragePtr & storage = nullptr);
 
     void initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope);
 
@@ -1316,7 +1316,7 @@ private:
 
     void resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor);
 
-    void resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope);
+    void resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope, const StoragePtr & storage = nullptr);
 
     void resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope);
 
@@ -1834,7 +1834,7 @@ QueryTreeNodePtr QueryAnalyzer::tryGetLambdaFromSQLUserDefinedFunctions(const st
 }
 
 /// Evaluate scalar subquery and perform constant folding if scalar subquery does not have constant value
-void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, IdentifierResolveScope & scope)
+void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, IdentifierResolveScope & scope, const StoragePtr & storage)
 {
     auto * query_node = node->as<QueryNode>();
     auto * union_node = node->as<UnionNode>();
@@ -1870,7 +1870,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
         subquery_context->setSettings(subquery_settings);
 
         auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_subquery*/);
-        auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node->toAST(), subquery_context, options);
+        auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node->toAST(), subquery_context, storage, options);
 
         auto io = interpreter->execute();
 
@@ -5230,7 +5230,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
   *
   * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process.
   */
-ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression)
+ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const StoragePtr & storage)
 {
     checkStackSize();
 
@@ -5487,7 +5487,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
                 resolveUnion(node, subquery_scope);
 
             if (!allow_table_expression)
-                evaluateScalarSubqueryIfNeeded(node, subquery_scope);
+                evaluateScalarSubqueryIfNeeded(node, subquery_scope, storage);
 
             if (result_projection_names.empty())
                 result_projection_names.push_back(std::move(projection_name));
@@ -5572,7 +5572,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
   * Example: CREATE TABLE test_table (id UInt64, value UInt64) ENGINE=TinyLog; SELECT plus(*) FROM test_table;
   * Example: SELECT *** FROM system.one;
   */
-ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression)
+ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const StoragePtr & storage)
 {
     auto & node_list_typed = node_list->as<ListNode &>();
     size_t node_list_size = node_list_typed.getNodes().size();
@@ -5585,7 +5585,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node
     for (auto & node : node_list_typed.getNodes())
     {
         auto node_to_resolve = node;
-        auto expression_node_projection_names = resolveExpressionNode(node_to_resolve, scope, allow_lambda_expression, allow_table_expression);
+        auto expression_node_projection_names = resolveExpressionNode(node_to_resolve, scope, allow_lambda_expression, allow_table_expression, storage);
 
         size_t expected_projection_names_size = 1;
         if (auto * expression_list = node_to_resolve->as<ListNode>())
@@ -5772,9 +5772,9 @@ void QueryAnalyzer::resolveWindowNodeList(QueryTreeNodePtr & window_node_list, I
         resolveWindow(node, scope);
 }
 
-NamesAndTypes QueryAnalyzer::resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope)
+NamesAndTypes QueryAnalyzer::resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope, const StoragePtr & storage)
 {
-    ProjectionNames projection_names = resolveExpressionNodeList(projection_node_list, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+    ProjectionNames projection_names = resolveExpressionNodeList(projection_node_list, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, storage);
 
     auto projection_nodes = projection_node_list->as<ListNode &>().getNodes();
     size_t projection_nodes_size = projection_nodes.size();
@@ -6681,7 +6681,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
   * 10. Remove aliases from expression and lambda nodes.
   * 11. Resolve query tree node with projection columns.
   */
-void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope)
+void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope, const StoragePtr & storage)
 {
     size_t max_subquery_depth = scope.context->getSettingsRef().max_subquery_depth;
     if (max_subquery_depth && scope.subquery_depth > max_subquery_depth)
@@ -6838,7 +6838,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
 
     if (!scope.group_by_use_nulls)
     {
-        projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope);
+        projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope, storage);
         if (query_node_typed.getProjection().getNodes().empty())
             throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED,
                 "Empty list of columns in projection. In scope {}",
@@ -7077,7 +7077,7 @@ QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_)
 void QueryAnalysisPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
 {
     QueryAnalyzer analyzer;
-    analyzer.resolve(query_tree_node, table_expression, context);
+    analyzer.resolve(query_tree_node, table_expression, context, storage);
 }
 
 }
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.h b/src/Analyzer/Passes/QueryAnalysisPass.h
index fa8778ebf76..e0b9612f9e2 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.h
+++ b/src/Analyzer/Passes/QueryAnalysisPass.h
@@ -2,6 +2,7 @@
 
 #include <Interpreters/Context_fwd.h>
 #include <Analyzer/IQueryTreePass.h>
+#include <Storages/IStorage_fwd.h>
 
 
 namespace DB
@@ -71,7 +72,7 @@ public:
     /** Construct query analysis pass for query or union analysis.
       * Available columns are extracted from query node join tree.
       */
-    QueryAnalysisPass() = default;
+    explicit QueryAnalysisPass(const StoragePtr & storage_ = nullptr) : storage(storage_) {}
 
     /** Construct query analysis pass for expression or list of expressions analysis.
       * Available expression columns are extracted from table expression.
@@ -93,6 +94,7 @@ public:
 
 private:
     QueryTreeNodePtr table_expression;
+    const StoragePtr storage = nullptr;
 };
 
 }
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index a6da2a66615..7178c994490 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -238,9 +238,9 @@ void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index)
     }
 }
 
-void addQueryTreePasses(QueryTreePassManager & manager)
+void addQueryTreePasses(QueryTreePassManager & manager, const StoragePtr & storage)
 {
-    manager.addPass(std::make_unique<QueryAnalysisPass>());
+    manager.addPass(std::make_unique<QueryAnalysisPass>(storage));
     manager.addPass(std::make_unique<FunctionToSubcolumnsPass>());
 
     manager.addPass(std::make_unique<ConvertLogicalExpressionToCNFPass>());
diff --git a/src/Analyzer/QueryTreePassManager.h b/src/Analyzer/QueryTreePassManager.h
index 3c67fc36178..478f04440d0 100644
--- a/src/Analyzer/QueryTreePassManager.h
+++ b/src/Analyzer/QueryTreePassManager.h
@@ -3,6 +3,7 @@
 #include <Analyzer/IQueryTreePass.h>
 
 #include <Interpreters/Context_fwd.h>
+#include <Storages/IStorage_fwd.h>
 
 namespace DB
 {
@@ -44,6 +45,6 @@ private:
     std::vector<QueryTreePassPtr> passes;
 };
 
-void addQueryTreePasses(QueryTreePassManager & manager);
+void addQueryTreePasses(QueryTreePassManager & manager, const StoragePtr & storage = nullptr);
 
 }
diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
index 98f70c25dcd..6f9e28deb10 100644
--- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
+++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
@@ -109,6 +109,9 @@ void replaceStorageInQueryTree(QueryTreeNodePtr & query_tree, const ContextPtr &
         }
     }
 
+    if (auto * table_node = table_expression_to_replace->as<TableNode>(); table_node && table_node->getStorageID().getFullNameNotQuoted() != storage->getStorageID().getFullTableName())
+        return;
+
     auto replacement_table_expression = std::make_shared<TableNode>(storage, context);
     std::optional<TableExpressionModifiers> table_expression_modifiers;
 
@@ -133,7 +136,7 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query,
     auto query_tree = buildQueryTree(query, context);
 
     QueryTreePassManager query_tree_pass_manager(context);
-    addQueryTreePasses(query_tree_pass_manager);
+    addQueryTreePasses(query_tree_pass_manager, storage);
 
     if (select_query_options.ignore_ast_optimizations)
         query_tree_pass_manager.run(query_tree, 1 /*up_to_pass_index*/);

From 4efa82abf406056ff53b5205e21bf21e8de8f99c Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Wed, 14 Jun 2023 09:54:43 -0400
Subject: [PATCH 0021/1097] remove 01268_mv_scalars

---
 tests/broken_tests.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt
index faee1c5b295..38a51194279 100644
--- a/tests/broken_tests.txt
+++ b/tests/broken_tests.txt
@@ -33,7 +33,6 @@
 01232_extremes
 01244_optimize_distributed_group_by_sharding_key
 01247_optimize_distributed_group_by_sharding_key_dist_on_dist
-01268_mv_scalars
 01268_shard_avgweighted
 01270_optimize_skip_unused_shards_low_cardinality
 01319_optimize_skip_unused_shards_nesting

From 3f5682a792699980cfce10b7f6c510ee68d0419d Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 14 Jun 2023 21:34:43 +0000
Subject: [PATCH 0022/1097] use view_source from context

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 36 +++++++++----------
 src/Analyzer/Passes/QueryAnalysisPass.h       |  4 +--
 src/Analyzer/QueryTreePassManager.cpp         |  4 +--
 src/Analyzer/QueryTreePassManager.h           |  3 +-
 .../InterpreterSelectQueryAnalyzer.cpp        |  2 +-
 5 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 4ef01524ac8..6e24125880e 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1056,7 +1056,7 @@ private:
 class QueryAnalyzer
 {
 public:
-    void resolve(QueryTreeNodePtr node, const QueryTreeNodePtr & table_expression, ContextPtr context, const StoragePtr & storage = nullptr)
+    void resolve(QueryTreeNodePtr node, const QueryTreeNodePtr & table_expression, ContextPtr context)
     {
         IdentifierResolveScope scope(node, nullptr /*parent_scope*/);
 
@@ -1073,7 +1073,7 @@ public:
                     throw Exception(ErrorCodes::LOGICAL_ERROR,
                         "For query analysis table expression must be empty");
 
-                resolveQuery(node, scope, storage);
+                resolveQuery(node, scope);
                 break;
             }
             case QueryTreeNodeType::UNION:
@@ -1187,7 +1187,7 @@ private:
 
     QueryTreeNodePtr tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context);
 
-    void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, IdentifierResolveScope & scope, const StoragePtr & storage = nullptr);
+    void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, IdentifierResolveScope & scope);
 
     static void mergeWindowWithParentWindow(const QueryTreeNodePtr & window_node, const QueryTreeNodePtr & parent_window_node, IdentifierResolveScope & scope);
 
@@ -1292,9 +1292,9 @@ private:
 
     ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope);
 
-    ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const StoragePtr & storage = nullptr);
+    ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
 
-    ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const StoragePtr & storage = nullptr);
+    ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
 
     ProjectionNames resolveSortNodeList(QueryTreeNodePtr & sort_node_list, IdentifierResolveScope & scope);
 
@@ -1302,7 +1302,7 @@ private:
 
     void resolveWindowNodeList(QueryTreeNodePtr & window_node_list, IdentifierResolveScope & scope);
 
-    NamesAndTypes resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope, const StoragePtr & storage = nullptr);
+    NamesAndTypes resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope);
 
     void initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope);
 
@@ -1316,7 +1316,7 @@ private:
 
     void resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor);
 
-    void resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope, const StoragePtr & storage = nullptr);
+    void resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope);
 
     void resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope);
 
@@ -1834,7 +1834,7 @@ QueryTreeNodePtr QueryAnalyzer::tryGetLambdaFromSQLUserDefinedFunctions(const st
 }
 
 /// Evaluate scalar subquery and perform constant folding if scalar subquery does not have constant value
-void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, IdentifierResolveScope & scope, const StoragePtr & storage)
+void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, IdentifierResolveScope & scope)
 {
     auto * query_node = node->as<QueryNode>();
     auto * union_node = node->as<UnionNode>();
@@ -1870,7 +1870,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
         subquery_context->setSettings(subquery_settings);
 
         auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_subquery*/);
-        auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node->toAST(), subquery_context, storage, options);
+        auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node->toAST(), subquery_context, subquery_context->getViewSource(), options);
 
         auto io = interpreter->execute();
 
@@ -5230,7 +5230,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
   *
   * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process.
   */
-ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const StoragePtr & storage)
+ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression)
 {
     checkStackSize();
 
@@ -5487,7 +5487,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
                 resolveUnion(node, subquery_scope);
 
             if (!allow_table_expression)
-                evaluateScalarSubqueryIfNeeded(node, subquery_scope, storage);
+                evaluateScalarSubqueryIfNeeded(node, subquery_scope);
 
             if (result_projection_names.empty())
                 result_projection_names.push_back(std::move(projection_name));
@@ -5572,7 +5572,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
   * Example: CREATE TABLE test_table (id UInt64, value UInt64) ENGINE=TinyLog; SELECT plus(*) FROM test_table;
   * Example: SELECT *** FROM system.one;
   */
-ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, const StoragePtr & storage)
+ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression)
 {
     auto & node_list_typed = node_list->as<ListNode &>();
     size_t node_list_size = node_list_typed.getNodes().size();
@@ -5585,7 +5585,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node
     for (auto & node : node_list_typed.getNodes())
     {
         auto node_to_resolve = node;
-        auto expression_node_projection_names = resolveExpressionNode(node_to_resolve, scope, allow_lambda_expression, allow_table_expression, storage);
+        auto expression_node_projection_names = resolveExpressionNode(node_to_resolve, scope, allow_lambda_expression, allow_table_expression);
 
         size_t expected_projection_names_size = 1;
         if (auto * expression_list = node_to_resolve->as<ListNode>())
@@ -5772,9 +5772,9 @@ void QueryAnalyzer::resolveWindowNodeList(QueryTreeNodePtr & window_node_list, I
         resolveWindow(node, scope);
 }
 
-NamesAndTypes QueryAnalyzer::resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope, const StoragePtr & storage)
+NamesAndTypes QueryAnalyzer::resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope)
 {
-    ProjectionNames projection_names = resolveExpressionNodeList(projection_node_list, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, storage);
+    ProjectionNames projection_names = resolveExpressionNodeList(projection_node_list, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
 
     auto projection_nodes = projection_node_list->as<ListNode &>().getNodes();
     size_t projection_nodes_size = projection_nodes.size();
@@ -6681,7 +6681,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
   * 10. Remove aliases from expression and lambda nodes.
   * 11. Resolve query tree node with projection columns.
   */
-void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope, const StoragePtr & storage)
+void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope)
 {
     size_t max_subquery_depth = scope.context->getSettingsRef().max_subquery_depth;
     if (max_subquery_depth && scope.subquery_depth > max_subquery_depth)
@@ -6838,7 +6838,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
 
     if (!scope.group_by_use_nulls)
     {
-        projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope, storage);
+        projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope);
         if (query_node_typed.getProjection().getNodes().empty())
             throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED,
                 "Empty list of columns in projection. In scope {}",
@@ -7077,7 +7077,7 @@ QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_)
 void QueryAnalysisPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
 {
     QueryAnalyzer analyzer;
-    analyzer.resolve(query_tree_node, table_expression, context, storage);
+    analyzer.resolve(query_tree_node, table_expression, context);
 }
 
 }
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.h b/src/Analyzer/Passes/QueryAnalysisPass.h
index e0b9612f9e2..fa8778ebf76 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.h
+++ b/src/Analyzer/Passes/QueryAnalysisPass.h
@@ -2,7 +2,6 @@
 
 #include <Interpreters/Context_fwd.h>
 #include <Analyzer/IQueryTreePass.h>
-#include <Storages/IStorage_fwd.h>
 
 
 namespace DB
@@ -72,7 +71,7 @@ public:
     /** Construct query analysis pass for query or union analysis.
       * Available columns are extracted from query node join tree.
       */
-    explicit QueryAnalysisPass(const StoragePtr & storage_ = nullptr) : storage(storage_) {}
+    QueryAnalysisPass() = default;
 
     /** Construct query analysis pass for expression or list of expressions analysis.
       * Available expression columns are extracted from table expression.
@@ -94,7 +93,6 @@ public:
 
 private:
     QueryTreeNodePtr table_expression;
-    const StoragePtr storage = nullptr;
 };
 
 }
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 7178c994490..a6da2a66615 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -238,9 +238,9 @@ void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index)
     }
 }
 
-void addQueryTreePasses(QueryTreePassManager & manager, const StoragePtr & storage)
+void addQueryTreePasses(QueryTreePassManager & manager)
 {
-    manager.addPass(std::make_unique<QueryAnalysisPass>(storage));
+    manager.addPass(std::make_unique<QueryAnalysisPass>());
     manager.addPass(std::make_unique<FunctionToSubcolumnsPass>());
 
     manager.addPass(std::make_unique<ConvertLogicalExpressionToCNFPass>());
diff --git a/src/Analyzer/QueryTreePassManager.h b/src/Analyzer/QueryTreePassManager.h
index 478f04440d0..3c67fc36178 100644
--- a/src/Analyzer/QueryTreePassManager.h
+++ b/src/Analyzer/QueryTreePassManager.h
@@ -3,7 +3,6 @@
 #include <Analyzer/IQueryTreePass.h>
 
 #include <Interpreters/Context_fwd.h>
-#include <Storages/IStorage_fwd.h>
 
 namespace DB
 {
@@ -45,6 +44,6 @@ private:
     std::vector<QueryTreePassPtr> passes;
 };
 
-void addQueryTreePasses(QueryTreePassManager & manager, const StoragePtr & storage = nullptr);
+void addQueryTreePasses(QueryTreePassManager & manager);
 
 }
diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
index 6f9e28deb10..9e675f0f25a 100644
--- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
+++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
@@ -136,7 +136,7 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query,
     auto query_tree = buildQueryTree(query, context);
 
     QueryTreePassManager query_tree_pass_manager(context);
-    addQueryTreePasses(query_tree_pass_manager, storage);
+    addQueryTreePasses(query_tree_pass_manager);
 
     if (select_query_options.ignore_ast_optimizations)
         query_tree_pass_manager.run(query_tree, 1 /*up_to_pass_index*/);

From 8f0aa8781ce37097cef8dd02b2e696b18fd411bb Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 19 Jun 2023 02:14:44 +0000
Subject: [PATCH 0023/1097] replace storage in function 'in' subquery

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 6e24125880e..b7bd606b6cf 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -4723,6 +4723,22 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
         }
         else
         {
+            /// Replace storage with values storage of insertion block
+            if (auto * query_node = in_second_argument->as<QueryNode>())
+            {
+                auto table_expression = extractLeftTableExpression(query_node->getJoinTree());
+                if (auto * query_table_node = table_expression->as<TableNode>())
+                {
+                    if (StoragePtr storage = scope.context->getViewSource(); storage && query_table_node->getStorageID().getFullNameNotQuoted() == storage->getStorageID().getFullTableName())
+                    {
+                        auto replacement_table_expression = std::make_shared<TableNode>(storage, scope.context);
+                        if (std::optional<TableExpressionModifiers> table_expression_modifiers = query_table_node->getTableExpressionModifiers())
+                            replacement_table_expression->setTableExpressionModifiers(*table_expression_modifiers);
+                        in_second_argument = in_second_argument->cloneAndReplace(table_expression, std::move(replacement_table_expression));
+                    }
+                }
+            }
+
             resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
         }
     }

From f5c9d278ad7be8a90d92d66546134575ad54c7e7 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Wed, 9 Aug 2023 22:57:49 +0200
Subject: [PATCH 0024/1097] use statistic to order prewhere conditions better

---
 src/Access/Common/AccessType.h                |   5 +
 src/AggregateFunctions/QuantileTDigest.h      |  11 +
 src/CMakeLists.txt                            |   1 +
 src/Databases/DatabasesCommon.cpp             |   2 +
 src/Interpreters/InterpreterAlterQuery.cpp    |  15 +
 src/Interpreters/InterpreterCreateQuery.cpp   |  17 ++
 src/Interpreters/InterpreterCreateQuery.h     |   2 +
 src/Interpreters/InterpreterSelectQuery.cpp   |   1 +
 src/Interpreters/MutationsInterpreter.cpp     |  36 ++-
 src/Interpreters/MutationsInterpreter.h       |   5 +-
 src/Parsers/ASTAlterQuery.cpp                 |  38 +++
 src/Parsers/ASTAlterQuery.h                   |   9 +
 src/Parsers/ASTCreateQuery.cpp                |  12 +
 src/Parsers/ASTCreateQuery.h                  |   3 +-
 src/Parsers/ASTStatisticDeclaration.cpp       |  35 +++
 src/Parsers/ASTStatisticDeclaration.h         |  26 ++
 src/Parsers/ParserAlterQuery.cpp              |  70 +++++
 src/Parsers/ParserCreateQuery.cpp             |  46 +++
 src/Parsers/ParserCreateQuery.h               |  12 +
 .../Optimizations/optimizePrewhere.cpp        |   1 +
 src/Storages/AlterCommands.cpp                | 100 +++++++
 src/Storages/AlterCommands.h                  |   6 +
 src/Storages/ColumnDependency.h               |   5 +-
 src/Storages/IStorage.h                       |   3 +
 src/Storages/MergeTree/DataPartsExchange.cpp  |   2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.h   |   2 +
 src/Storages/MergeTree/MergeTask.cpp          |   5 +-
 src/Storages/MergeTree/MergeTreeData.cpp      |   4 +-
 .../MergeTree/MergeTreeDataPartCompact.cpp    |   3 +-
 .../MergeTree/MergeTreeDataPartCompact.h      |   1 +
 .../MergeTree/MergeTreeDataPartInMemory.cpp   |   5 +-
 .../MergeTree/MergeTreeDataPartInMemory.h     |   1 +
 .../MergeTree/MergeTreeDataPartWide.cpp       |   3 +-
 .../MergeTree/MergeTreeDataPartWide.h         |   1 +
 .../MergeTreeDataPartWriterCompact.cpp        |   6 +-
 .../MergeTreeDataPartWriterCompact.h          |   1 +
 .../MergeTreeDataPartWriterOnDisk.cpp         | 126 +++++++--
 .../MergeTree/MergeTreeDataPartWriterOnDisk.h |  30 +-
 .../MergeTree/MergeTreeDataPartWriterWide.cpp |  14 +-
 .../MergeTree/MergeTreeDataPartWriterWide.h   |   1 +
 .../MergeTree/MergeTreeDataWriter.cpp         |   2 +
 .../MergeTree/MergeTreeWhereOptimizer.cpp     | 131 ++++-----
 .../MergeTree/MergeTreeWhereOptimizer.h       |  10 +-
 .../MergeTree/MergeTreeWriteAheadLog.cpp      |   2 +-
 .../MergeTree/MergedBlockOutputStream.cpp     |   3 +-
 .../MergeTree/MergedBlockOutputStream.h       |   2 +
 .../MergedColumnOnlyOutputStream.cpp          |   2 +
 .../MergeTree/MergedColumnOnlyOutputStream.h  |   2 +
 src/Storages/MergeTree/MutateTask.cpp         |  64 ++++-
 .../MergeTree/registerStorageMergeTree.cpp    |   5 +
 src/Storages/MutationCommands.cpp             |  11 +
 src/Storages/MutationCommands.h               |   5 +-
 src/Storages/Statistic/Statistic.cpp          | 155 +++++++++++
 src/Storages/Statistic/Statistic.h            | 262 ++++++++++++++++++
 src/Storages/StatisticsDescription.cpp        | 120 ++++++++
 src/Storages/StatisticsDescription.h          |  46 +++
 src/Storages/StorageInMemoryMetadata.cpp      |  13 +
 src/Storages/StorageInMemoryMetadata.h        |   8 +
 58 files changed, 1391 insertions(+), 118 deletions(-)
 create mode 100644 src/Parsers/ASTStatisticDeclaration.cpp
 create mode 100644 src/Parsers/ASTStatisticDeclaration.h
 create mode 100644 src/Storages/Statistic/Statistic.cpp
 create mode 100644 src/Storages/Statistic/Statistic.h
 create mode 100644 src/Storages/StatisticsDescription.cpp
 create mode 100644 src/Storages/StatisticsDescription.h

diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index f65a77c1d6a..dac3f813dd6 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -51,6 +51,11 @@ enum class AccessType
     M(ALTER_CLEAR_INDEX, "CLEAR INDEX", TABLE, ALTER_INDEX) \
     M(ALTER_INDEX, "INDEX", GROUP, ALTER_TABLE) /* allows to execute ALTER ORDER BY or ALTER {ADD|DROP...} INDEX */\
     \
+    M(ALTER_ADD_STATISTIC, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTIC) \
+    M(ALTER_DROP_STATISTIC, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTIC) \
+    M(ALTER_MATERIALIZE_STATISTIC, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTIC) \
+    M(ALTER_STATISTIC, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER ORDER BY or ALTER {ADD|DROP...} INDEX */\
+    \
     M(ALTER_ADD_PROJECTION, "ADD PROJECTION", TABLE, ALTER_PROJECTION) \
     M(ALTER_DROP_PROJECTION, "DROP PROJECTION", TABLE, ALTER_PROJECTION) \
     M(ALTER_MATERIALIZE_PROJECTION, "MATERIALIZE PROJECTION", TABLE, ALTER_PROJECTION) \
diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h
index 915f6763e52..8706f77c12d 100644
--- a/src/AggregateFunctions/QuantileTDigest.h
+++ b/src/AggregateFunctions/QuantileTDigest.h
@@ -43,6 +43,7 @@ namespace ErrorCodes
 template <typename T>
 class QuantileTDigest
 {
+    friend class TDigestStatistic;
     using Value = Float32;
     using Count = Float32;
     using BetterFloat = Float64; // For intermediate results and sum(Count). Must have better precision, than Count
@@ -334,6 +335,16 @@ public:
         compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params
     }
 
+    Float64 getCountLessThan(Float64 value) const
+    {
+
+        ///Count sum = 0;
+        ///Value prev_mean = centroids.front().mean;
+        ///Count prev_count = centroids.front().count;
+
+        return value;
+    }
+
     /** Calculates the quantile q [0, 1] based on the digest.
       * For an empty digest returns NaN.
       */
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ca428fbff3a..df49992595d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -247,6 +247,7 @@ add_object_library(clickhouse_storages Storages)
 add_object_library(clickhouse_storages_mysql Storages/MySQL)
 add_object_library(clickhouse_storages_distributed Storages/Distributed)
 add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
+add_object_library(clickhouse_storages_statistic Storages/Statistic)
 add_object_library(clickhouse_storages_liveview Storages/LiveView)
 add_object_library(clickhouse_storages_windowview Storages/WindowView)
 add_object_library(clickhouse_client Client)
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index bb98e2bd3bb..de27c4fd8e7 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -46,11 +46,13 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo
     {
         ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
         ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
+        ASTPtr new_statistics = InterpreterCreateQuery::formatStatistics(metadata.statistics);
         ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
         ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections);
 
         ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
         ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
+        ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->stats, new_statistics);
         ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
         ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections);
     }
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index e82415f1aca..7f30ee9337d 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -287,6 +287,21 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS
             required_access.emplace_back(AccessType::ALTER_SAMPLE_BY, database, table);
             break;
         }
+        case ASTAlterCommand::ADD_STATISTIC:
+        {
+            required_access.emplace_back(AccessType::ALTER_ADD_STATISTIC, database, table);
+            break;
+        }
+        case ASTAlterCommand::DROP_STATISTIC:
+        {
+            required_access.emplace_back(AccessType::ALTER_DROP_STATISTIC, database, table);
+            break;
+        }
+        case ASTAlterCommand::MATERIALIZE_STATISTIC:
+        {
+            required_access.emplace_back(AccessType::ALTER_MATERIALIZE_STATISTIC, database, table);
+            break;
+        }
         case ASTAlterCommand::ADD_INDEX:
         {
             required_access.emplace_back(AccessType::ALTER_ADD_INDEX, database, table);
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index d0bb3dd389f..5c0b58eddc8 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -455,6 +455,16 @@ ASTPtr InterpreterCreateQuery::formatIndices(const IndicesDescription & indices)
     return res;
 }
 
+ASTPtr InterpreterCreateQuery::formatStatistics(const StatisticsDescriptions & statistics)
+{
+    auto res = std::make_shared<ASTExpressionList>();
+
+    for (const auto & statistic : statistics)
+        res->children.push_back(statistic.definition_ast->clone());
+
+    return res;
+}
+
 ASTPtr InterpreterCreateQuery::formatConstraints(const ConstraintsDescription & constraints)
 {
     auto res = std::make_shared<ASTExpressionList>();
@@ -706,6 +716,11 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
 
                 properties.indices.push_back(index_desc);
             }
+        if (create.columns_list->stats)
+            for (const auto & statistic : create.columns_list->stats->children)
+                properties.stats.push_back(
+                    StatisticDescription::getStatisticFromAST(statistic->clone(), properties.columns, getContext()));
+
         if (create.columns_list->projections)
             for (const auto & projection_ast : create.columns_list->projections->children)
             {
@@ -791,11 +806,13 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
 
     ASTPtr new_columns = formatColumns(properties.columns);
     ASTPtr new_indices = formatIndices(properties.indices);
+    ASTPtr new_statistics = formatStatistics(properties.stats);
     ASTPtr new_constraints = formatConstraints(properties.constraints);
     ASTPtr new_projections = formatProjections(properties.projections);
 
     create.columns_list->setOrReplace(create.columns_list->columns, new_columns);
     create.columns_list->setOrReplace(create.columns_list->indices, new_indices);
+    create.columns_list->setOrReplace(create.columns_list->stats, new_statistics);
     create.columns_list->setOrReplace(create.columns_list->constraints, new_constraints);
     create.columns_list->setOrReplace(create.columns_list->projections, new_projections);
 
diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h
index a5fa6576091..88eba3e0d79 100644
--- a/src/Interpreters/InterpreterCreateQuery.h
+++ b/src/Interpreters/InterpreterCreateQuery.h
@@ -38,6 +38,7 @@ public:
     static ASTPtr formatColumns(const NamesAndTypesList & columns, const NamesAndAliases & alias_columns);
     static ASTPtr formatColumns(const ColumnsDescription & columns);
     static ASTPtr formatIndices(const IndicesDescription & indices);
+    static ASTPtr formatStatistics(const StatisticsDescriptions & statistics);
     static ASTPtr formatConstraints(const ConstraintsDescription & constraints);
     static ASTPtr formatProjections(const ProjectionsDescription & projections);
 
@@ -80,6 +81,7 @@ private:
     {
         ColumnsDescription columns;
         IndicesDescription indices;
+        StatisticsDescriptions stats;
         ConstraintsDescription constraints;
         ProjectionsDescription projections;
     };
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 6ea15312ec4..be8443d56a7 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -687,6 +687,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                 MergeTreeWhereOptimizer where_optimizer{
                     std::move(column_compressed_sizes),
                     metadata_snapshot,
+                    storage->getConditionEstimatorByPredicate(query_info, context),
                     queried_columns,
                     supported_prewhere_columns,
                     log};
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 25c52ad8925..22105f063fa 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -479,6 +479,7 @@ void MutationsInterpreter::prepare(bool dry_run)
     /// TODO Should we get columns, indices and projections from the part itself? Table metadata may be different
     const ColumnsDescription & columns_desc = metadata_snapshot->getColumns();
     const IndicesDescription & indices_desc = metadata_snapshot->getSecondaryIndices();
+    const StatisticsDescriptions & statistics_desc = metadata_snapshot->getStatistics();
     const ProjectionsDescription & projections_desc = metadata_snapshot->getProjections();
 
     auto storage_snapshot = std::make_shared<StorageSnapshot>(*source.getStorage(), metadata_snapshot);
@@ -682,7 +683,7 @@ void MutationsInterpreter::prepare(bool dry_run)
         }
         else if (command.type == MutationCommand::MATERIALIZE_INDEX)
         {
-            mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION);
+            mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
             auto it = std::find_if(
                     std::cbegin(indices_desc), std::end(indices_desc),
                     [&](const IndexDescription & index)
@@ -703,9 +704,25 @@ void MutationsInterpreter::prepare(bool dry_run)
                 materialized_indices.emplace(command.index_name);
             }
         }
+        else if (command.type == MutationCommand::MATERIALIZE_STATISTIC)
+        {
+            mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
+            auto it = std::find_if(
+                    std::cbegin(statistics_desc), std::end(statistics_desc),
+                    [&](const StatisticDescription & statistic)
+                    {
+                        return statistic.name == command.statistic_name;
+                    });
+            if (it == std::cend(statistics_desc))
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic: {}", command.statistic_name);
+
+            for (const auto & column : it->column_names)
+                dependencies.emplace(column, ColumnDependency::STATISTIC);
+            materialized_statistics.emplace(command.statistic_name);
+        }
         else if (command.type == MutationCommand::MATERIALIZE_PROJECTION)
         {
-            mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION);
+            mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
             const auto & projection = projections_desc.get(command.projection_name);
             if (!source.hasIndexOrProjection(projection.getDirectoryName()))
             {
@@ -716,12 +733,17 @@ void MutationsInterpreter::prepare(bool dry_run)
         }
         else if (command.type == MutationCommand::DROP_INDEX)
         {
-            mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION);
+            mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
             materialized_indices.erase(command.index_name);
         }
+        else if (command.type == MutationCommand::DROP_STATISTIC)
+        {
+            mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
+            materialized_statistics.erase(command.statistic_name);
+        }
         else if (command.type == MutationCommand::DROP_PROJECTION)
         {
-            mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION);
+            mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
             materialized_projections.erase(command.projection_name);
         }
         else if (command.type == MutationCommand::MATERIALIZE_TTL)
@@ -770,7 +792,9 @@ void MutationsInterpreter::prepare(bool dry_run)
                 auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_index_or_projection);
                 for (const auto & dependency : new_dependencies)
                 {
-                    if (dependency.kind == ColumnDependency::SKIP_INDEX || dependency.kind == ColumnDependency::PROJECTION)
+                    if (dependency.kind == ColumnDependency::SKIP_INDEX
+                        || dependency.kind == ColumnDependency::PROJECTION
+                        || dependency.kind == ColumnDependency::STATISTIC)
                         dependencies.insert(dependency);
                 }
             }
@@ -1288,7 +1312,7 @@ QueryPipelineBuilder MutationsInterpreter::execute()
 Block MutationsInterpreter::getUpdatedHeader() const
 {
     // If it's an index/projection materialization, we don't write any data columns, thus empty header is used
-    return mutation_kind.mutation_kind == MutationKind::MUTATE_INDEX_PROJECTION ? Block{} : *updated_header;
+    return mutation_kind.mutation_kind == MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION ? Block{} : *updated_header;
 }
 
 const ColumnDependencies & MutationsInterpreter::getColumnDependencies() const
diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h
index d783b503531..4d95f56ee71 100644
--- a/src/Interpreters/MutationsInterpreter.h
+++ b/src/Interpreters/MutationsInterpreter.h
@@ -91,6 +91,8 @@ public:
 
     NameSet grabMaterializedIndices() { return std::move(materialized_indices); }
 
+    NameSet grabMaterializedStatistics() { return std::move(materialized_statistics); }
+
     NameSet grabMaterializedProjections() { return std::move(materialized_projections); }
 
     struct MutationKind
@@ -98,7 +100,7 @@ public:
         enum MutationKindEnum
         {
             MUTATE_UNKNOWN,
-            MUTATE_INDEX_PROJECTION,
+            MUTATE_INDEX_STATISTIC_PROJECTION,
             MUTATE_OTHER,
         } mutation_kind = MUTATE_UNKNOWN;
 
@@ -212,6 +214,7 @@ private:
 
     NameSet materialized_indices;
     NameSet materialized_projections;
+    NameSet materialized_statistics;
 
     MutationKind mutation_kind; /// Do we meet any index or projection mutation.
 
diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index 61e5903fad5..b04c0efa85c 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -123,6 +123,9 @@ const char * ASTAlterCommand::typeToString(ASTAlterCommand::Type type)
         case LIVE_VIEW_REFRESH: return "LIVE_VIEW_REFRESH";
         case MODIFY_DATABASE_SETTING: return "MODIFY_DATABASE_SETTING";
         case MODIFY_COMMENT: return "MODIFY_COMMENT";
+        case ADD_STATISTIC: return "ADD_STATISTIC";
+        case DROP_STATISTIC: return "DROP_STATISTIC";
+        case MATERIALIZE_STATISTIC: return "MATERIALIZE_STATISTIC";
     }
     UNREACHABLE();
 }
@@ -248,6 +251,41 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
             partition->formatImpl(settings, state, frame);
         }
     }
+    else if (type == ASTAlterCommand::ADD_STATISTIC)
+    {
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD STATISTIC " << (if_not_exists ? "IF NOT EXISTS " : "")
+                      << (settings.hilite ? hilite_none : "");
+        statistic_decl->formatImpl(settings, state, frame);
+
+        if (first)
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " FIRST " << (settings.hilite ? hilite_none : "");
+        else if (statistic) /// AFTER
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " AFTER " << (settings.hilite ? hilite_none : "");
+            statistic->formatImpl(settings, state, frame);
+        }
+    }
+    else if (type == ASTAlterCommand::DROP_STATISTIC)
+    {
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << (clear_statistic ? "CLEAR " : "DROP ") << "INDEX "
+                      << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
+        statistic->formatImpl(settings, state, frame);
+        if (partition)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
+            partition->formatImpl(settings, state, frame);
+        }
+    }
+    else if (type == ASTAlterCommand::MATERIALIZE_STATISTIC)
+    {
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << "MATERIALIZE STATISTIC " << (settings.hilite ? hilite_none : "");
+        statistic->formatImpl(settings, state, frame);
+        if (partition)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
+            partition->formatImpl(settings, state, frame);
+        }
+    }
     else if (type == ASTAlterCommand::ADD_CONSTRAINT)
     {
         settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD CONSTRAINT " << (if_not_exists ? "IF NOT EXISTS " : "")
diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h
index 1400113fa9c..251e8c233e2 100644
--- a/src/Parsers/ASTAlterQuery.h
+++ b/src/Parsers/ASTAlterQuery.h
@@ -54,6 +54,10 @@ public:
         DROP_PROJECTION,
         MATERIALIZE_PROJECTION,
 
+        ADD_STATISTIC,
+        DROP_STATISTIC,
+        MATERIALIZE_STATISTIC,
+
         DROP_PARTITION,
         DROP_DETACHED_PARTITION,
         ATTACH_PARTITION,
@@ -129,6 +133,9 @@ public:
      */
     ASTPtr projection;
 
+    ASTPtr statistic_decl;
+    ASTPtr statistic;
+
     /** Used in DROP PARTITION, ATTACH PARTITION FROM, UPDATE, DELETE queries.
      *  The value or ID of the partition is stored here.
      */
@@ -167,6 +174,8 @@ public:
 
     bool clear_index = false;   /// for CLEAR INDEX (do not drop index from metadata)
 
+    bool clear_statistic = false;   /// for CLEAR STATISTIC (do not drop statistic from metadata)
+
     bool clear_projection = false;   /// for CLEAR PROJECTION (do not drop projection from metadata)
 
     bool if_not_exists = false; /// option for ADD_COLUMN
diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index 196681a8801..5c34841e6a7 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -132,6 +132,8 @@ ASTPtr ASTColumns::clone() const
         res->set(res->columns, columns->clone());
     if (indices)
         res->set(res->indices, indices->clone());
+    if (stats)
+        res->set(res->stats, stats->clone());
     if (constraints)
         res->set(res->constraints, constraints->clone());
     if (projections)
@@ -166,6 +168,16 @@ void ASTColumns::formatImpl(const FormatSettings & s, FormatState & state, Forma
             list.children.push_back(elem);
         }
     }
+    if (stats)
+    {
+        for (const auto & stat : stats->children)
+        {
+            auto elem = std::make_shared<ASTColumnsElement>();
+            elem->prefix = "STATISTIC";
+            elem->set(elem->elem, stat->clone());
+            list.children.push_back(elem);
+        }
+    }
     if (constraints)
     {
         for (const auto & constraint : constraints->children)
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index 230996f610e..0c1a139c2eb 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -53,6 +53,7 @@ class ASTColumns : public IAST
 public:
     ASTExpressionList * columns = nullptr;
     ASTExpressionList * indices = nullptr;
+    ASTExpressionList * stats = nullptr;
     ASTExpressionList * constraints = nullptr;
     ASTExpressionList * projections = nullptr;
     IAST              * primary_key = nullptr;
@@ -66,7 +67,7 @@ public:
     bool empty() const
     {
         return (!columns || columns->children.empty()) && (!indices || indices->children.empty()) && (!constraints || constraints->children.empty())
-            && (!projections || projections->children.empty());
+            && (!projections || projections->children.empty()) && (!stats || stats->children.empty());
     }
 
     void forEachPointerToChild(std::function<void(void**)> f) override
diff --git a/src/Parsers/ASTStatisticDeclaration.cpp b/src/Parsers/ASTStatisticDeclaration.cpp
new file mode 100644
index 00000000000..53b20b167b7
--- /dev/null
+++ b/src/Parsers/ASTStatisticDeclaration.cpp
@@ -0,0 +1,35 @@
+#include <Parsers/ASTStatisticDeclaration.h>
+
+#include <Common/quoteString.h>
+#include <IO/Operators.h>
+#include <Parsers/ASTFunction.h>
+
+
+namespace DB
+{
+
+ASTPtr ASTStatisticDeclaration::clone() const
+{
+    auto res = std::make_shared<ASTStatisticDeclaration>();
+
+    res->name = name;
+
+    if (columns)
+        res->set(res->columns, columns->clone());
+    if (type)
+        res->set(res->type, type->clone());
+    return std::move(res);
+}
+
+
+void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
+{
+    s.ostr << backQuoteIfNeed(name);
+    s.ostr << " ";
+    columns->formatImpl(s, state, frame);
+    s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
+    type->formatImpl(s, state, frame);
+}
+
+}
+
diff --git a/src/Parsers/ASTStatisticDeclaration.h b/src/Parsers/ASTStatisticDeclaration.h
new file mode 100644
index 00000000000..0d5ab7723e9
--- /dev/null
+++ b/src/Parsers/ASTStatisticDeclaration.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+
+namespace DB
+{
+
+class ASTFunction;
+
+/** name BY columns TYPE typename(args) in create query
+  */
+class ASTStatisticDeclaration : public IAST
+{
+public:
+    String name;
+    IAST * columns;
+    ASTFunction * type;
+
+    /** Get the text that identifies this element. */
+    String getID(char) const override { return "Stat"; }
+
+    ASTPtr clone() const override;
+    void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
+};
+
+}
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index 8292b52f092..bb94d98d587 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -44,6 +44,11 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     ParserKeyword s_clear_index("CLEAR INDEX");
     ParserKeyword s_materialize_index("MATERIALIZE INDEX");
 
+    ParserKeyword s_add_statistic("ADD STATISTIC");
+    ParserKeyword s_drop_statistic("DROP STATISTIC");
+    ParserKeyword s_clear_statistic("CLEAR STATISTIC");
+    ParserKeyword s_materialize_statistic("MATERIALIZE STATISTIC");
+
     ParserKeyword s_add_constraint("ADD CONSTRAINT");
     ParserKeyword s_drop_constraint("DROP CONSTRAINT");
 
@@ -112,6 +117,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     ParserIdentifier parser_remove_property;
     ParserCompoundColumnDeclaration parser_col_decl;
     ParserIndexDeclaration parser_idx_decl;
+    ParserStatisticDeclaration parser_stat_decl;
     ParserConstraintDeclaration parser_constraint_decl;
     ParserProjectionDeclaration parser_projection_decl;
     ParserCompoundColumnDeclaration parser_modify_col_decl(false, false, true);
@@ -327,6 +333,70 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
                         return false;
                 }
             }
+            else if (s_add_statistic.ignore(pos, expected))
+            {
+                if (s_if_not_exists.ignore(pos, expected))
+                    command->if_not_exists = true;
+
+                if (!parser_stat_decl.parse(pos, command->statistic_decl, expected))
+                    return false;
+
+                command->type = ASTAlterCommand::ADD_STATISTIC;
+
+                if (s_first.ignore(pos, expected))
+                    command->first = true;
+                else if (s_after.ignore(pos, expected))
+                {
+                    if (!parser_name.parse(pos, command->statistic, expected))
+                        return false;
+                }
+            }
+            else if (s_drop_statistic.ignore(pos, expected))
+            {
+                if (s_if_exists.ignore(pos, expected))
+                    command->if_exists = true;
+
+                if (!parser_name.parse(pos, command->statistic, expected))
+                    return false;
+
+                command->type = ASTAlterCommand::DROP_STATISTIC;
+                command->detach = false;
+            }
+            else if (s_clear_statistic.ignore(pos, expected))
+            {
+                if (s_if_exists.ignore(pos, expected))
+                    command->if_exists = true;
+
+                if (!parser_name.parse(pos, command->statistic, expected))
+                    return false;
+
+                command->type = ASTAlterCommand::DROP_STATISTIC;
+                command->clear_statistic = true;
+                command->detach = false;
+
+                if (s_in_partition.ignore(pos, expected))
+                {
+                    if (!parser_partition.parse(pos, command->partition, expected))
+                        return false;
+                }
+            }
+            else if (s_materialize_statistic.ignore(pos, expected))
+            {
+                if (s_if_exists.ignore(pos, expected))
+                    command->if_exists = true;
+
+                if (!parser_name.parse(pos, command->statistic, expected))
+                    return false;
+
+                command->type = ASTAlterCommand::MATERIALIZE_STATISTIC;
+                command->detach = false;
+
+                if (s_in_partition.ignore(pos, expected))
+                {
+                    if (!parser_partition.parse(pos, command->partition, expected))
+                        return false;
+                }
+            }
             else if (s_add_projection.ignore(pos, expected))
             {
                 if (s_if_not_exists.ignore(pos, expected))
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index adf3513ba40..eb79e250a1d 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -5,6 +5,7 @@
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTIndexDeclaration.h>
+#include <Parsers/ASTStatisticDeclaration.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTProjectionDeclaration.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
@@ -157,6 +158,39 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
     return true;
 }
 
+bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    ParserKeyword s_type("TYPE");
+
+    ParserIdentifier name_p;
+    ParserDataType data_type_p;
+    ParserExpression expression_p;
+
+    ASTPtr name;
+    ASTPtr columns;
+    ASTPtr type;
+
+    if (!name_p.parse(pos, name, expected))
+        return false;
+
+    if (!expression_p.parse(pos, columns, expected))
+        return false;
+
+    if (!s_type.ignore(pos, expected))
+        return false;
+
+    if (!data_type_p.parse(pos, type, expected))
+        return false;
+
+    auto stat = std::make_shared<ASTStatisticDeclaration>();
+    stat->name = name->as<ASTIdentifier &>().name();
+    stat->set(stat->columns, columns);
+    stat->set(stat->type, type);
+    node = stat;
+
+    return true;
+}
+
 bool ParserConstraintDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ParserKeyword s_check("CHECK");
@@ -226,11 +260,13 @@ bool ParserProjectionDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected &
 bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ParserKeyword s_index("INDEX");
+    ParserKeyword s_stat("STATISTIC");
     ParserKeyword s_constraint("CONSTRAINT");
     ParserKeyword s_projection("PROJECTION");
     ParserKeyword s_primary_key("PRIMARY KEY");
 
     ParserIndexDeclaration index_p;
+    ParserStatisticDeclaration stat_p;
     ParserConstraintDeclaration constraint_p;
     ParserProjectionDeclaration projection_p;
     ParserColumnDeclaration column_p{true, true};
@@ -248,6 +284,11 @@ bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expecte
         if (!constraint_p.parse(pos, new_node, expected))
             return false;
     }
+    else if (s_stat.ignore(pos, expected))
+    {
+        if (!stat_p.parse(pos, new_node, expected))
+            return false;
+    }
     else if (s_projection.ignore(pos, expected))
     {
         if (!projection_p.parse(pos, new_node, expected))
@@ -297,6 +338,7 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E
 
     ASTPtr columns = std::make_shared<ASTExpressionList>();
     ASTPtr indices = std::make_shared<ASTExpressionList>();
+    ASTPtr stats = std::make_shared<ASTExpressionList>();
     ASTPtr constraints = std::make_shared<ASTExpressionList>();
     ASTPtr projections = std::make_shared<ASTExpressionList>();
     ASTPtr primary_key;
@@ -307,6 +349,8 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E
             columns->children.push_back(elem);
         else if (elem->as<ASTIndexDeclaration>())
             indices->children.push_back(elem);
+        else if (elem->as<ASTStatisticDeclaration>())
+            stats->children.push_back(elem);
         else if (elem->as<ASTConstraintDeclaration>())
             constraints->children.push_back(elem);
         else if (elem->as<ASTProjectionDeclaration>())
@@ -330,6 +374,8 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E
         res->set(res->columns, columns);
     if (!indices->children.empty())
         res->set(res->indices, indices);
+    if (!stats->children.empty())
+        res->set(res->stats, stats);
     if (!constraints->children.empty())
         res->set(res->constraints, constraints);
     if (!projections->children.empty())
diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h
index 5f79a4b68f6..4e85e3456f3 100644
--- a/src/Parsers/ParserCreateQuery.h
+++ b/src/Parsers/ParserCreateQuery.h
@@ -380,6 +380,18 @@ protected:
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 };
 
+/** name BY columns TYPE typename(arg1, arg2, ...) */
+/** name BY columns */
+class ParserStatisticDeclaration : public IParserBase
+{
+public:
+    ParserStatisticDeclaration() = default;
+
+protected:
+    const char * getName() const override { return "statistics declaration"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
 class ParserConstraintDeclaration : public IParserBase
 {
 protected:
diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index ca8a412bf2e..7c542733927 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -158,6 +158,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
     MergeTreeWhereOptimizer where_optimizer{
         std::move(column_compressed_sizes),
         storage_metadata,
+        storage.getConditionEstimatorByPredicate(read_from_merge_tree->getQueryInfo(), context),
         queried_columns,
         storage.supportedPrewhereColumns(),
         &Poco::Logger::get("QueryPlanOptimizePrewhere")};
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index a9247f9b898..98a5ae3bb2f 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -21,6 +21,7 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTIndexDeclaration.h>
 #include <Parsers/ASTProjectionDeclaration.h>
+#include <Parsers/ASTStatisticDeclaration.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSetQuery.h>
 #include <Parsers/queryToString.h>
@@ -232,6 +233,25 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
 
         return command;
     }
+    else if (command_ast->type == ASTAlterCommand::ADD_STATISTIC)
+    {
+        AlterCommand command;
+        command.ast = command_ast->clone();
+        command.statistic_decl = command_ast->statistic_decl;
+        command.type = AlterCommand::ADD_STATISTIC;
+
+        const auto & ast_stat_decl = command_ast->statistic_decl->as<ASTStatisticDeclaration &>();
+
+        command.statistic_name = ast_stat_decl.name;
+
+        if (command_ast->statistic)
+            command.after_statistic_name = command_ast->statistic->as<ASTIdentifier &>().name();
+
+        command.if_not_exists = command_ast->if_not_exists;
+        command.first = command_ast->first;
+
+        return command;
+    }
     else if (command_ast->type == ASTAlterCommand::ADD_CONSTRAINT)
     {
         AlterCommand command;
@@ -291,6 +311,20 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
 
         return command;
     }
+    else if (command_ast->type == ASTAlterCommand::DROP_STATISTIC)
+    {
+        AlterCommand command;
+        command.ast = command_ast->clone();
+        command.type = AlterCommand::DROP_STATISTIC;
+        command.statistic_name = command_ast->statistic->as<ASTIdentifier &>().name();
+        command.if_exists = command_ast->if_exists;
+        command.clear = command_ast->clear_statistic;
+
+        if (command_ast->partition)
+            command.partition = command_ast->partition;
+
+        return command;
+    }
     else if (command_ast->type == ASTAlterCommand::DROP_PROJECTION)
     {
         AlterCommand command;
@@ -553,6 +587,68 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
             metadata.secondary_indices.erase(erase_it);
         }
     }
+    else if (type == ADD_STATISTIC)
+    {
+        if (std::any_of(
+                metadata.statistics.cbegin(),
+                metadata.statistics.cend(),
+                [this](const auto & statistic)
+                {
+                    return statistic.name == statistic_name;
+                }))
+        {
+            if (if_not_exists)
+                return;
+            else
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} : statistic with this name already exists", statistic_name);
+        }
+
+        auto insert_it = metadata.statistics.end();
+
+        /// insert the index in the beginning of the indices list
+        if (first)
+            insert_it = metadata.statistics.begin();
+
+        if (!after_statistic_name.empty())
+        {
+            insert_it = std::find_if(
+                    metadata.statistics.begin(),
+                    metadata.statistics.end(),
+                    [this](const auto & statistic)
+                    {
+                        return statistic.name == after_statistic_name;
+                    });
+
+            if (insert_it == metadata.statistics.end())
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} to insert after", backQuote(after_statistic_name));
+
+            ++insert_it;
+        }
+
+        metadata.statistics.emplace(insert_it, StatisticDescription::getStatisticFromAST(statistic_decl, metadata.columns, context));
+    }
+    else if (type == DROP_STATISTIC)
+    {
+        if (!partition && !clear)
+        {
+            auto erase_it = std::find_if(
+                    metadata.statistics.begin(),
+                    metadata.statistics.end(),
+                    [this](const auto & statistic)
+                    {
+                        return statistic.name == statistic_name;
+                    });
+
+            if (erase_it == metadata.statistics.end())
+            {
+                if (if_exists)
+                    return;
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} to drop", backQuote(statistic_name));
+            }
+
+            metadata.statistics.erase(erase_it);
+        }
+    }
     else if (type == ADD_CONSTRAINT)
     {
         auto constraints = metadata.constraints.getConstraints();
@@ -877,6 +973,10 @@ std::optional<MutationCommand> AlterCommand::tryConvertToMutationCommand(Storage
             result.partition = partition;
 
         result.predicate = nullptr;
+    }
+    else if (type == DROP_STATISTIC)
+    {
+
     }
     else if (type == DROP_PROJECTION)
     {
diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h
index 3e526dcc0bb..eae538815f0 100644
--- a/src/Storages/AlterCommands.h
+++ b/src/Storages/AlterCommands.h
@@ -38,6 +38,8 @@ struct AlterCommand
         DROP_CONSTRAINT,
         ADD_PROJECTION,
         DROP_PROJECTION,
+        ADD_STATISTIC,
+        DROP_STATISTIC,
         MODIFY_TTL,
         MODIFY_SETTING,
         RESET_SETTING,
@@ -118,6 +120,10 @@ struct AlterCommand
     /// For ADD/DROP PROJECTION
     String projection_name;
 
+    ASTPtr statistic_decl = nullptr;
+    String after_statistic_name;
+    String statistic_name;
+
     /// For MODIFY TTL
     ASTPtr ttl = nullptr;
 
diff --git a/src/Storages/ColumnDependency.h b/src/Storages/ColumnDependency.h
index 6c3c96ec62a..b9088dd0227 100644
--- a/src/Storages/ColumnDependency.h
+++ b/src/Storages/ColumnDependency.h
@@ -24,7 +24,10 @@ struct ColumnDependency
         TTL_EXPRESSION,
 
         /// TTL is set for @column_name.
-        TTL_TARGET
+        TTL_TARGET,
+
+        /// Exists any statistic, that requires @column_name
+        STATISTIC,
     };
 
     ColumnDependency(const String & column_name_, Kind kind_)
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index b262d88db57..9231b0c3286 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -11,6 +11,7 @@
 #include <Storages/IStorage_fwd.h>
 #include <Storages/SelectQueryDescription.h>
 #include <Storages/StorageInMemoryMetadata.h>
+#include <Storages/Statistic/Statistic.h>
 #include <Storages/TableLockHolder.h>
 #include <Storages/StorageSnapshot.h>
 #include <Common/ActionLock.h>
@@ -135,6 +136,8 @@ public:
     /// Returns true if the storage supports queries with the PREWHERE section.
     virtual bool supportsPrewhere() const { return false; }
 
+    virtual ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, ContextPtr) const { return {}; }
+
     /// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported.
     /// This is needed for engines whose aggregates data from multiple tables, like Merge.
     virtual std::optional<NameSet> supportedPrewhereColumns() const { return std::nullopt; }
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 23bbc1c7f9d..48fb3303445 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -736,7 +736,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
     }
 
     MergedBlockOutputStream part_out(
-        new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {},
+        new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, {},
         CompressionCodecFactory::instance().get("NONE", {}), NO_TRANSACTION_PTR);
 
     part_out.write(block);
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index fd73d802579..2b1cb5fc2de 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -17,6 +17,7 @@
 #include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
 #include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
 #include <Storages/MergeTree/MergeTreeIOSettings.h>
+#include <Storages/Statistic/Statistic.h>
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
 #include <Storages/ColumnsDescription.h>
@@ -102,6 +103,7 @@ public:
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
         const CompressionCodecPtr & default_codec_,
         const MergeTreeWriterSettings & writer_settings,
         const MergeTreeIndexGranularity & computed_index_granularity) = 0;
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 8f39c31eae0..c5aa74b3a92 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -1,4 +1,5 @@
-#include "Storages/MergeTree/IDataPartStorage.h"
+#include <Storages/MergeTree/IDataPartStorage.h>
+#include <Storages/Statistic/Statistic.h>
 #include <Storages/MergeTree/MergeTask.h>
 
 #include <memory>
@@ -365,6 +366,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
         global_ctx->metadata_snapshot,
         global_ctx->merging_columns,
         MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()),
+        MergeTreeStatisticFactory::instance().getMany(global_ctx->metadata_snapshot->getStatistics()),
         ctx->compression_codec,
         global_ctx->txn,
         /*reset_columns=*/ true,
@@ -580,6 +582,7 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
         /// because all of them were already recalculated and written
         /// as key part of vertical merge
         std::vector<MergeTreeIndexPtr>{},
+        std::vector<StatisticPtr>{}, /// TODO: think about it
         &global_ctx->written_offset_columns,
         global_ctx->to->getIndexGranularity());
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e9c3a7f66ae..31a48d92083 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -8458,7 +8458,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart(
 
     const auto & index_factory = MergeTreeIndexFactory::instance();
     MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns,
-        index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, txn);
+        index_factory.getMany(metadata_snapshot->getSecondaryIndices()),
+        Statistics{},
+        compression_codec, txn);
 
     bool sync_on_insert = settings->fsync_after_insert;
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index 07e20f16a9f..6f5320062b2 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -52,6 +52,7 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
     const NamesAndTypesList & columns_list,
     const StorageMetadataPtr & metadata_snapshot,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+    const Statistics & stats_to_recalc_,
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & writer_settings,
     const MergeTreeIndexGranularity & computed_index_granularity)
@@ -66,7 +67,7 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
 
     return std::make_unique<MergeTreeDataPartWriterCompact>(
         shared_from_this(), ordered_columns_list, metadata_snapshot,
-        indices_to_recalc, getMarksFileExtension(),
+        indices_to_recalc, stats_to_recalc_, getMarksFileExtension(),
         default_codec_, writer_settings, computed_index_granularity);
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
index b115692a7cf..341d464a9da 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
@@ -43,6 +43,7 @@ public:
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
         const CompressionCodecPtr & default_codec_,
         const MergeTreeWriterSettings & writer_settings,
         const MergeTreeIndexGranularity & computed_index_granularity) override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
index 468747a6c36..a23b4395df6 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
@@ -52,6 +52,7 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter(
     const NamesAndTypesList & columns_list,
     const StorageMetadataPtr & metadata_snapshot,
     const std::vector<MergeTreeIndexPtr> & /* indices_to_recalc */,
+    const Statistics & /* stats_to_recalc_ */,
     const CompressionCodecPtr & /* default_codec */,
     const MergeTreeWriterSettings & writer_settings,
     const MergeTreeIndexGranularity & /* computed_index_granularity */)
@@ -92,7 +93,8 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String &
 
     auto compression_codec = storage.getContext()->chooseCompressionCodec(0, 0);
     auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices());
-    MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec, NO_TRANSACTION_PTR);
+    auto stats = MergeTreeStatisticFactory::instance().getMany(metadata_snapshot->getStatistics());
+    MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, stats, compression_codec, NO_TRANSACTION_PTR);
     out.write(block);
 
     const auto & projections = metadata_snapshot->getProjections();
@@ -125,6 +127,7 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String &
             MergedBlockOutputStream projection_out(
                 new_projection_part, desc.metadata,
                 new_projection_part->getColumns(), projection_indices,
+                {},
                 projection_compression_codec, NO_TRANSACTION_PTR);
 
             projection_out.write(old_projection_part->block);
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
index db7244d8e99..e6ef05319c5 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
@@ -32,6 +32,7 @@ public:
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
         const CompressionCodecPtr & default_codec_,
         const MergeTreeWriterSettings & writer_settings,
         const MergeTreeIndexGranularity & computed_index_granularity) override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
index f44cbdd8628..20f430bed8f 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
@@ -50,13 +50,14 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter(
     const NamesAndTypesList & columns_list,
     const StorageMetadataPtr & metadata_snapshot,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+    const Statistics & stats_to_recalc_,
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & writer_settings,
     const MergeTreeIndexGranularity & computed_index_granularity)
 {
     return std::make_unique<MergeTreeDataPartWriterWide>(
         shared_from_this(), columns_list,
-        metadata_snapshot, indices_to_recalc,
+        metadata_snapshot, indices_to_recalc, stats_to_recalc_,
         getMarksFileExtension(),
         default_codec_, writer_settings, computed_index_granularity);
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h
index 5ee497b9b21..6fc195bbfe7 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h
@@ -38,6 +38,7 @@ public:
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
         const CompressionCodecPtr & default_codec_,
         const MergeTreeWriterSettings & writer_settings,
         const MergeTreeIndexGranularity & computed_index_granularity) override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 5e1da21da5b..5024ff0217d 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -21,12 +21,13 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
+    const Statistics & stats_to_recalc,
     const String & marks_file_extension_,
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & settings_,
     const MergeTreeIndexGranularity & index_granularity_)
     : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_,
-        indices_to_recalc_, marks_file_extension_,
+        indices_to_recalc_, stats_to_recalc, marks_file_extension_,
         default_codec_, settings_, index_granularity_)
     , plain_file(data_part_->getDataPartStorage().writeFile(
             MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION,
@@ -176,6 +177,7 @@ void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::P
         auto granules_to_write = getGranulesToWrite(index_granularity, flushed_block.rows(), getCurrentMark(), /* last_block = */ false);
         writeDataBlockPrimaryIndexAndSkipIndices(flushed_block, granules_to_write);
         setCurrentMark(getCurrentMark() + granules_to_write.size());
+        calculateAndSerializeStatistics(flushed_block);
     }
 }
 
@@ -422,6 +424,7 @@ void MergeTreeDataPartWriterCompact::fillChecksums(IMergeTreeDataPart::Checksums
         fillPrimaryIndexChecksums(checksums);
 
     fillSkipIndicesChecksums(checksums);
+    fillStatisticsChecksums(checksums);
 }
 
 void MergeTreeDataPartWriterCompact::finish(bool sync)
@@ -434,6 +437,7 @@ void MergeTreeDataPartWriterCompact::finish(bool sync)
         finishPrimaryIndexSerialization(sync);
 
     finishSkipIndicesSerialization(sync);
+    finishStatisticsSerialization(sync);
 }
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index 06f8122393f..c5a045c42d0 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -15,6 +15,7 @@ public:
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc,
         const String & marks_file_extension,
         const CompressionCodecPtr & default_codec,
         const MergeTreeWriterSettings & settings,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index f57ffa5ee14..c4f037f65c3 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -1,8 +1,6 @@
 #include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h>
 #include <Storages/MergeTree/MergeTreeIndexInverted.h>
 #include <Common/MemoryTrackerBlockerInThread.h>
-#include <utility>
-#include "IO/WriteBufferFromFileDecorator.h"
 
 namespace DB
 {
@@ -11,7 +9,8 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-void MergeTreeDataPartWriterOnDisk::Stream::preFinalize()
+template<bool only_plain_file>
+void MergeTreeDataPartWriterOnDisk::Stream<only_plain_file>::preFinalize()
 {
     /// Here the main goal is to do preFinalize calls for plain_file and marks_file
     /// Before that all hashing and compression buffers have to be finalized
@@ -22,36 +21,45 @@ void MergeTreeDataPartWriterOnDisk::Stream::preFinalize()
     compressor.finalize();
     plain_hashing.finalize();
 
-    if (compress_marks)
+    if constexpr (!only_plain_file)
     {
-        marks_compressed_hashing.finalize();
-        marks_compressor.finalize();
+        if (compress_marks)
+        {
+            marks_compressed_hashing.finalize();
+            marks_compressor.finalize();
+        }
+
+        marks_hashing.finalize();
     }
 
-    marks_hashing.finalize();
-
     plain_file->preFinalize();
-    marks_file->preFinalize();
+    if constexpr (!only_plain_file)
+        marks_file->preFinalize();
 
     is_prefinalized = true;
 }
 
-void MergeTreeDataPartWriterOnDisk::Stream::finalize()
+template<bool only_plain_file>
+void MergeTreeDataPartWriterOnDisk::Stream<only_plain_file>::finalize()
 {
     if (!is_prefinalized)
         preFinalize();
 
     plain_file->finalize();
-    marks_file->finalize();
+    if constexpr (!only_plain_file)
+        marks_file->finalize();
 }
 
-void MergeTreeDataPartWriterOnDisk::Stream::sync() const
+template<bool only_plain_file>
+void MergeTreeDataPartWriterOnDisk::Stream<only_plain_file>::sync() const
 {
     plain_file->sync();
-    marks_file->sync();
+    if constexpr (!only_plain_file)
+        marks_file->sync();
 }
 
-MergeTreeDataPartWriterOnDisk::Stream::Stream(
+template<>
+MergeTreeDataPartWriterOnDisk::Stream<false>::Stream(
     const String & escaped_column_name_,
     const MutableDataPartStoragePtr & data_part_storage,
     const String & data_path_,
@@ -78,7 +86,27 @@ MergeTreeDataPartWriterOnDisk::Stream::Stream(
 {
 }
 
-void MergeTreeDataPartWriterOnDisk::Stream::addToChecksums(MergeTreeData::DataPart::Checksums & checksums)
+template<>
+MergeTreeDataPartWriterOnDisk::Stream<true>::Stream(
+    const String & escaped_column_name_,
+    const MutableDataPartStoragePtr & data_part_storage,
+    const String & data_path_,
+    const std::string & data_file_extension_,
+    const CompressionCodecPtr & compression_codec_,
+    size_t max_compress_block_size_,
+    const WriteSettings & query_write_settings) :
+    escaped_column_name(escaped_column_name_),
+    data_file_extension{data_file_extension_},
+    plain_file(data_part_storage->writeFile(data_path_ + data_file_extension, max_compress_block_size_, query_write_settings)),
+    plain_hashing(*plain_file),
+    compressor(plain_hashing, compression_codec_, max_compress_block_size_),
+    compressed_hashing(compressor),
+    compress_marks(false)
+{
+}
+
+template<bool only_plain_file>
+void MergeTreeDataPartWriterOnDisk::Stream<only_plain_file>::addToChecksums(MergeTreeData::DataPart::Checksums & checksums)
 {
     String name = escaped_column_name;
 
@@ -88,15 +116,18 @@ void MergeTreeDataPartWriterOnDisk::Stream::addToChecksums(MergeTreeData::DataPa
     checksums.files[name + data_file_extension].file_size = plain_hashing.count();
     checksums.files[name + data_file_extension].file_hash = plain_hashing.getHash();
 
-    if (compress_marks)
+    if constexpr (!only_plain_file)
     {
-        checksums.files[name + marks_file_extension].is_compressed = true;
-        checksums.files[name + marks_file_extension].uncompressed_size = marks_compressed_hashing.count();
-        checksums.files[name + marks_file_extension].uncompressed_hash = marks_compressed_hashing.getHash();
-    }
+        if (compress_marks)
+        {
+            checksums.files[name + marks_file_extension].is_compressed = true;
+            checksums.files[name + marks_file_extension].uncompressed_size = marks_compressed_hashing.count();
+            checksums.files[name + marks_file_extension].uncompressed_hash = marks_compressed_hashing.getHash();
+        }
 
-    checksums.files[name + marks_file_extension].file_size = marks_hashing.count();
-    checksums.files[name + marks_file_extension].file_hash = marks_hashing.getHash();
+        checksums.files[name + marks_file_extension].file_size = marks_hashing.count();
+        checksums.files[name + marks_file_extension].file_hash = marks_hashing.getHash();
+    }
 }
 
 
@@ -105,12 +136,14 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const MergeTreeIndices & indices_to_recalc_,
+    const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & settings_,
     const MergeTreeIndexGranularity & index_granularity_)
     : IMergeTreeDataPartWriter(data_part_, columns_list_, metadata_snapshot_, settings_, index_granularity_)
     , skip_indices(indices_to_recalc_)
+    , stats(stats_to_recalc_)
     , marks_file_extension(marks_file_extension_)
     , default_codec(default_codec_)
     , compute_granularity(index_granularity.empty())
@@ -126,6 +159,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     if (settings.rewrite_primary_key)
         initPrimaryIndex();
     initSkipIndices();
+    initStatistics();
 }
 
 // Implementation is split into static functions for ability
@@ -207,6 +241,20 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex()
     }
 }
 
+void MergeTreeDataPartWriterOnDisk::initStatistics()
+{
+    for (const auto & stat_ptr : stats)
+    {
+        String stats_name = stat_ptr->getFileName();
+        stats_streams.emplace_back(std::make_unique<MergeTreeDataPartWriterOnDisk::Stream<true>>(
+                                       stats_name,
+                                       data_part->getDataPartStoragePtr(),
+                                       stats_name, STAT_FILE_SUFFIX,
+                                       default_codec, settings.max_compress_block_size,
+                                       settings.query_write_settings));
+    }
+}
+
 void MergeTreeDataPartWriterOnDisk::initSkipIndices()
 {
     ParserCodec codec_parser;
@@ -217,7 +265,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
     {
         String stream_name = skip_index->getFileName();
         skip_indices_streams.emplace_back(
-                std::make_unique<MergeTreeDataPartWriterOnDisk::Stream>(
+                std::make_unique<MergeTreeDataPartWriterOnDisk::Stream<false>>(
                         stream_name,
                         data_part->getDataPartStoragePtr(),
                         stream_name, skip_index->getSerializedFileExtension(),
@@ -279,6 +327,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc
         last_block_index_columns[j] = primary_index_block.getByPosition(j).column;
 }
 
+void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block)
+{
+    for (const auto & stat_ptr : stats)
+    {
+        stat_ptr->update(block);
+    }
+}
+
 void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block & skip_indexes_block, const Granules & granules_to_write)
 {
     /// Filling and writing skip indices like in MergeTreeDataPartWriterWide::writeColumn
@@ -417,6 +473,27 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data
     }
 }
 
+void MergeTreeDataPartWriterOnDisk::finishStatisticsSerialization(bool sync)
+{
+    for (auto & stream : stats_streams)
+    {
+        stream->finalize();
+        if (sync)
+            stream->sync();
+    }
+}
+
+void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums)
+{
+    for (size_t i = 0; i < stats.size(); i++)
+    {
+        auto & stream = *stats_streams[i];
+        stats[i]->serialize(stream.compressed_hashing);
+        stream.preFinalize();
+        stream.addToChecksums(checksums);
+    }
+}
+
 void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync)
 {
     for (auto & stream : skip_indices_streams)
@@ -442,4 +519,7 @@ Names MergeTreeDataPartWriterOnDisk::getSkipIndicesColumns() const
     return Names(skip_indexes_column_names_set.begin(), skip_indexes_column_names_set.end());
 }
 
+template struct MergeTreeDataPartWriterOnDisk::Stream<false>;
+template struct MergeTreeDataPartWriterOnDisk::Stream<true>;
+
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
index b76b74ab717..30d43b9c180 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@@ -10,6 +10,7 @@
 #include <Disks/IDisk.h>
 #include <Parsers/ExpressionElementParsers.h>
 #include <Parsers/parseQuery.h>
+#include <Storages/Statistic/Statistic.h>
 
 namespace DB
 {
@@ -46,6 +47,7 @@ public:
 
     /// Helper class, which holds chain of buffers to write data file with marks.
     /// It is used to write: one column, skip index or all columns (in compact format).
+    template<bool only_plain_file>
     struct Stream
     {
         Stream(
@@ -61,6 +63,15 @@ public:
             size_t marks_compress_block_size_,
             const WriteSettings & query_write_settings);
 
+        Stream(
+            const String & escaped_column_name_,
+            const MutableDataPartStoragePtr & data_part_storage,
+            const String & data_path_,
+            const std::string & data_file_extension_,
+            const CompressionCodecPtr & compression_codec_,
+            size_t max_compress_block_size_,
+            const WriteSettings & query_write_settings);
+
         String escaped_column_name;
         std::string data_file_extension;
         std::string marks_file_extension;
@@ -73,9 +84,9 @@ public:
 
         /// marks_compressed_hashing -> marks_compressor -> marks_hashing -> marks_file
         std::unique_ptr<WriteBufferFromFileBase> marks_file;
-        HashingWriteBuffer marks_hashing;
-        CompressedWriteBuffer marks_compressor;
-        HashingWriteBuffer marks_compressed_hashing;
+        std::conditional_t<!only_plain_file, HashingWriteBuffer, void*> marks_hashing;
+        std::conditional_t<!only_plain_file, CompressedWriteBuffer, void*> marks_compressor;
+        std::conditional_t<!only_plain_file, HashingWriteBuffer, void*> marks_compressed_hashing;
         bool compress_marks;
 
         bool is_prefinalized = false;
@@ -89,13 +100,15 @@ public:
         void addToChecksums(IMergeTreeDataPart::Checksums & checksums);
     };
 
-    using StreamPtr = std::unique_ptr<Stream>;
+    using StreamPtr = std::unique_ptr<Stream<false>>;
+    using StatisticStreamPtr = std::unique_ptr<Stream<true>>;
 
     MergeTreeDataPartWriterOnDisk(
         const MergeTreeMutableDataPartPtr & data_part_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
         const String & marks_file_extension,
         const CompressionCodecPtr & default_codec,
         const MergeTreeWriterSettings & settings,
@@ -117,6 +130,8 @@ protected:
     /// require additional state: skip_indices_aggregators and skip_index_accumulated_marks
     void calculateAndSerializeSkipIndices(const Block & skip_indexes_block, const Granules & granules_to_write);
 
+    void calculateAndSerializeStatistics(const Block & stats_block);
+
     /// Finishes primary index serialization: write final primary index row (if required) and compute checksums
     void fillPrimaryIndexChecksums(MergeTreeData::DataPart::Checksums & checksums);
     void finishPrimaryIndexSerialization(bool sync);
@@ -124,6 +139,9 @@ protected:
     void fillSkipIndicesChecksums(MergeTreeData::DataPart::Checksums & checksums);
     void finishSkipIndicesSerialization(bool sync);
 
+    void fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums);
+    void finishStatisticsSerialization(bool sync);
+
     /// Get global number of the current which we are writing (or going to start to write)
     size_t getCurrentMark() const { return current_mark; }
 
@@ -134,6 +152,9 @@ protected:
 
     const MergeTreeIndices skip_indices;
 
+    const Statistics stats;
+    std::vector<StatisticStreamPtr> stats_streams;
+
     const String marks_file_extension;
     const CompressionCodecPtr default_codec;
 
@@ -166,6 +187,7 @@ protected:
 private:
     void initSkipIndices();
     void initPrimaryIndex();
+    void initStatistics();
 
     virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0;
 };
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index f9fe6f2c8ab..aa97f515074 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -77,12 +77,13 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
+    const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & settings_,
     const MergeTreeIndexGranularity & index_granularity_)
     : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_,
-           indices_to_recalc_, marks_file_extension_,
+           indices_to_recalc_, stats_to_recalc_, marks_file_extension_,
            default_codec_, settings_, index_granularity_)
 {
     const auto & columns = metadata_snapshot->getColumns();
@@ -116,7 +117,7 @@ void MergeTreeDataPartWriterWide::addStreams(
         auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
         CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr);
 
-        column_streams[stream_name] = std::make_unique<Stream>(
+        column_streams[stream_name] = std::make_unique<Stream<false>>(
             stream_name,
             data_part->getDataPartStoragePtr(),
             stream_name, DATA_FILE_EXTENSION,
@@ -256,6 +257,7 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
         calculateAndSerializePrimaryIndex(primary_key_block, granules_to_write);
 
     calculateAndSerializeSkipIndices(skip_indexes_block, granules_to_write);
+    calculateAndSerializeStatistics(block);
 
     shiftCurrentMark(granules_to_write);
 }
@@ -272,7 +274,7 @@ void MergeTreeDataPartWriterWide::writeSingleMark(
 
 void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stream_with_mark, size_t rows_in_mark)
 {
-    Stream & stream = *column_streams[stream_with_mark.stream_name];
+    auto & stream = *column_streams[stream_with_mark.stream_name];
     WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing;
 
     writeIntBinary(stream_with_mark.mark.offset_in_compressed_file, marks_out);
@@ -296,7 +298,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
         if (is_offsets && offset_columns.contains(stream_name))
             return;
 
-        Stream & stream = *column_streams[stream_name];
+        auto & stream = *column_streams[stream_name];
 
         /// There could already be enough data to compress into the new block.
         if (stream.compressed_hashing.offset() >= settings.min_compress_block_size)
@@ -632,6 +634,8 @@ void MergeTreeDataPartWriterWide::fillChecksums(IMergeTreeDataPart::Checksums &
         fillPrimaryIndexChecksums(checksums);
 
     fillSkipIndicesChecksums(checksums);
+
+    fillStatisticsChecksums(checksums);
 }
 
 void MergeTreeDataPartWriterWide::finish(bool sync)
@@ -644,6 +648,8 @@ void MergeTreeDataPartWriterWide::finish(bool sync)
         finishPrimaryIndexSerialization(sync);
 
     finishSkipIndicesSerialization(sync);
+
+    finishStatisticsSerialization(sync);
 }
 
 void MergeTreeDataPartWriterWide::writeFinalMark(
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
index 633b5119474..574225b9614 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@@ -22,6 +22,7 @@ public:
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
         const String & marks_file_extension,
         const CompressionCodecPtr & default_codec,
         const MergeTreeWriterSettings & settings,
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 7e306880e9c..da67bff07f3 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -538,6 +538,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
         metadata_snapshot,
         columns,
         indices,
+        MergeTreeStatisticFactory::instance().getMany(metadata_snapshot->getStatistics()),
         compression_codec,
         context->getCurrentTransaction(),
         false,
@@ -670,6 +671,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
         metadata_snapshot,
         columns,
         MergeTreeIndices{},
+        Statistics{}, /// TODO(hanfei): It should be helpful to write statistics for projection result.
         compression_codec,
         NO_TRANSACTION_PTR,
         false, false, data.getContext()->getWriteSettings());
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 25a4579c73e..c4046133542 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -20,16 +20,18 @@ namespace DB
 
 /// Conditions like "x = N" are considered good if abs(N) > threshold.
 /// This is used to assume that condition is likely to have good selectivity.
-static constexpr auto threshold = 2;
+/// static constexpr auto threshold = 2;
 
 
 MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
     std::unordered_map<std::string, UInt64> column_sizes_,
     const StorageMetadataPtr & metadata_snapshot,
+    const ConditionEstimator & estimator_,
     const Names & queried_columns_,
     const std::optional<NameSet> & supported_columns_,
     Poco::Logger * log_)
-    : table_columns{collections::map<std::unordered_set>(
+    : estimator(estimator_)
+    , table_columns{collections::map<std::unordered_set>(
         metadata_snapshot->getColumns().getAllPhysical(), [](const NameAndTypePair & col) { return col.name; })}
     , queried_columns{queried_columns_}
     , supported_columns{supported_columns_}
@@ -132,66 +134,66 @@ static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & colu
     }
 }
 
-static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet & columns_names)
-{
-    if (!condition.isFunction())
-        return false;
-
-    auto function_node = condition.toFunctionNode();
-
-    /** We are only considering conditions of form `equals(one, another)` or `one = another`,
-      * especially if either `one` or `another` is ASTIdentifier
-      */
-    if (function_node.getFunctionName() != "equals" || function_node.getArgumentsSize() != 2)
-        return false;
-
-    auto lhs_argument = function_node.getArgumentAt(0);
-    auto rhs_argument = function_node.getArgumentAt(1);
-
-    auto lhs_argument_column_name = lhs_argument.getColumnName();
-    auto rhs_argument_column_name = rhs_argument.getColumnName();
-
-    bool lhs_argument_is_column = columns_names.contains(lhs_argument_column_name);
-    bool rhs_argument_is_column = columns_names.contains(rhs_argument_column_name);
-
-    bool lhs_argument_is_constant = lhs_argument.isConstant();
-    bool rhs_argument_is_constant = rhs_argument.isConstant();
-
-    RPNBuilderTreeNode * constant_node = nullptr;
-
-    if (lhs_argument_is_column && rhs_argument_is_constant)
-        constant_node = &rhs_argument;
-    else if (lhs_argument_is_constant && rhs_argument_is_column)
-        constant_node = &lhs_argument;
-    else
-        return false;
-
-    Field output_value;
-    DataTypePtr output_type;
-    if (!constant_node->tryGetConstant(output_value, output_type))
-        return false;
-
-    const auto type = output_value.getType();
-
-    /// check the value with respect to threshold
-    if (type == Field::Types::UInt64)
-    {
-        const auto value = output_value.get<UInt64>();
-        return value > threshold;
-    }
-    else if (type == Field::Types::Int64)
-    {
-        const auto value = output_value.get<Int64>();
-        return value < -threshold || threshold < value;
-    }
-    else if (type == Field::Types::Float64)
-    {
-        const auto value = output_value.get<Float64>();
-        return value < threshold || threshold < value;
-    }
-
-    return false;
-}
+/// static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet & columns_names)
+/// {
+///     if (!condition.isFunction())
+///         return false;
+///
+///     auto function_node = condition.toFunctionNode();
+///
+///     /** We are only considering conditions of form `equals(one, another)` or `one = another`,
+///       * especially if either `one` or `another` is ASTIdentifier
+///       */
+///     if (function_node.getFunctionName() != "equals" || function_node.getArgumentsSize() != 2)
+///         return false;
+///
+///     auto lhs_argument = function_node.getArgumentAt(0);
+///     auto rhs_argument = function_node.getArgumentAt(1);
+///
+///     auto lhs_argument_column_name = lhs_argument.getColumnName();
+///     auto rhs_argument_column_name = rhs_argument.getColumnName();
+///
+///     bool lhs_argument_is_column = columns_names.contains(lhs_argument_column_name);
+///     bool rhs_argument_is_column = columns_names.contains(rhs_argument_column_name);
+///
+///     bool lhs_argument_is_constant = lhs_argument.isConstant();
+///     bool rhs_argument_is_constant = rhs_argument.isConstant();
+///
+///     RPNBuilderTreeNode * constant_node = nullptr;
+///
+///     if (lhs_argument_is_column && rhs_argument_is_constant)
+///         constant_node = &rhs_argument;
+///     else if (lhs_argument_is_constant && rhs_argument_is_column)
+///         constant_node = &lhs_argument;
+///     else
+///         return false;
+///
+///     Field output_value;
+///     DataTypePtr output_type;
+///     if (!constant_node->tryGetConstant(output_value, output_type))
+///         return false;
+///
+///     const auto type = output_value.getType();
+///
+///     /// check the value with respect to threshold
+///     if (type == Field::Types::UInt64)
+///     {
+///         const auto value = output_value.get<UInt64>();
+///         return value > threshold;
+///     }
+///     else if (type == Field::Types::Int64)
+///     {
+///         const auto value = output_value.get<Int64>();
+///         return value < -threshold || threshold < value;
+///     }
+///     else if (type == Field::Types::Float64)
+///     {
+///         const auto value = output_value.get<Float64>();
+///         return value < threshold || threshold < value;
+///     }
+///
+///     return false;
+/// }
 
 void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const
 {
@@ -229,7 +231,10 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
             && cond.table_columns.size() < queried_columns.size();
 
         if (cond.viable)
-            cond.good = isConditionGood(node, table_columns);
+            cond.selectivity = estimator.estimateSelectivity(node);
+
+        ///if (cond.viable)
+        ///    cond.good = isConditionGood(node, table_columns);
 
         res.emplace_back(std::move(cond));
     }
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index 18555a72db1..6985237a7c6 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -6,6 +6,7 @@
 #include <Storages/MergeTree/RPNBuilder.h>
 
 #include <boost/noncopyable.hpp>
+#include "Storages/Statistic/Statistic.h"
 
 #include <memory>
 #include <set>
@@ -37,6 +38,7 @@ public:
     MergeTreeWhereOptimizer(
         std::unordered_map<std::string, UInt64> column_sizes_,
         const StorageMetadataPtr & metadata_snapshot,
+        const ConditionEstimator & estimator_,
         const Names & queried_columns_,
         const std::optional<NameSet> & supported_columns_,
         Poco::Logger * log_);
@@ -69,12 +71,12 @@ private:
         /// Can condition be moved to prewhere?
         bool viable = false;
 
-        /// Does the condition presumably have good selectivity?
-        bool good = false;
+        /// the lower the better
+        Float64 selectivity = 0;
 
         auto tuple() const
         {
-            return std::make_tuple(!viable, !good, columns_size, table_columns.size());
+            return std::make_tuple(!viable, selectivity, columns_size, table_columns.size());
         }
 
         /// Is condition a better candidate for moving to PREWHERE?
@@ -137,6 +139,8 @@ private:
 
     static NameSet determineArrayJoinedNames(const ASTSelectQuery & select);
 
+    const ConditionEstimator estimator;
+
     const NameSet table_columns;
     const Names queried_columns;
     const std::optional<NameSet> supported_columns;
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
index 39c4157a42e..a3e0f6bf77b 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
@@ -198,7 +198,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(
                 part,
                 metadata_snapshot,
                 block.getNamesAndTypesList(),
-                {},
+                {}, {},
                 CompressionCodecFactory::instance().get("NONE", {}),
                 NO_TRANSACTION_PTR);
 
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index bfd9e92b4eb..f4bff1cd42c 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -19,6 +19,7 @@ MergedBlockOutputStream::MergedBlockOutputStream(
     const StorageMetadataPtr & metadata_snapshot_,
     const NamesAndTypesList & columns_list_,
     const MergeTreeIndices & skip_indices,
+    const Statistics & statistics,
     CompressionCodecPtr default_codec_,
     const MergeTreeTransactionPtr & txn,
     bool reset_columns_,
@@ -47,7 +48,7 @@ MergedBlockOutputStream::MergedBlockOutputStream(
     data_part->version.setCreationTID(tid, nullptr);
     data_part->storeVersionMetadata();
 
-    writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, default_codec, writer_settings, {});
+    writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, {});
 }
 
 /// If data is pre-sorted.
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h
index 20e6de5a99b..48eca3e71f6 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.h
@@ -3,6 +3,7 @@
 #include <Storages/MergeTree/IMergedBlockOutputStream.h>
 #include <Columns/ColumnArray.h>
 #include <IO/WriteSettings.h>
+#include <Storages/Statistic/Statistic.h>
 
 
 namespace DB
@@ -19,6 +20,7 @@ public:
         const StorageMetadataPtr & metadata_snapshot_,
         const NamesAndTypesList & columns_list_,
         const MergeTreeIndices & skip_indices,
+        const Statistics & statistics,
         CompressionCodecPtr default_codec_,
         const MergeTreeTransactionPtr & txn,
         bool reset_columns_ = false,
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
index 3b2eb96f2d4..492a573a738 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
@@ -16,6 +16,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
     const Block & header_,
     CompressionCodecPtr default_codec,
     const MergeTreeIndices & indices_to_recalc,
+    const Statistics & stats_to_recalc_,
     WrittenOffsetColumns * offset_columns_,
     const MergeTreeIndexGranularity & index_granularity,
     const MergeTreeIndexGranularityInfo * index_granularity_info)
@@ -36,6 +37,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
         header.getNamesAndTypesList(),
         metadata_snapshot_,
         indices_to_recalc,
+        stats_to_recalc_,
         default_codec,
         writer_settings,
         index_granularity);
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
index f382b0fef60..1a2c56a4f7b 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Storages/MergeTree/IMergedBlockOutputStream.h>
+#include <Storages/Statistic/Statistic.h>
 
 namespace DB
 {
@@ -19,6 +20,7 @@ public:
         const Block & header_,
         CompressionCodecPtr default_codec_,
         const MergeTreeIndices & indices_to_recalc_,
+        const Statistics & stats_to_recalc_,
         WrittenOffsetColumns * offset_columns_ = nullptr,
         const MergeTreeIndexGranularity & index_granularity = {},
         const MergeTreeIndexGranularityInfo * index_granularity_info_ = nullptr);
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index b98b0844ee7..d16fde50f0a 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -4,6 +4,7 @@
 #include <Common/logger_useful.h>
 #include <Common/escapeForFileName.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
+#include <Storages/Statistic/Statistic.h>
 #include <Columns/ColumnsNumber.h>
 #include <Parsers/queryToString.h>
 #include <Interpreters/SquashingTransform.h>
@@ -71,6 +72,7 @@ static void splitAndModifyMutationCommands(
         for (const auto & command : commands)
         {
             if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
+                || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
                 || command.type == MutationCommand::Type::MATERIALIZE_COLUMN
                 || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
                 || command.type == MutationCommand::Type::MATERIALIZE_TTL
@@ -190,6 +192,7 @@ static void splitAndModifyMutationCommands(
         {
             if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
                 || command.type == MutationCommand::Type::MATERIALIZE_COLUMN
+                || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
                 || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
                 || command.type == MutationCommand::Type::MATERIALIZE_TTL
                 || command.type == MutationCommand::Type::DELETE
@@ -437,6 +440,20 @@ static ExecuteTTLType shouldExecuteTTL(const StorageMetadataPtr & metadata_snaps
     return has_ttl_expression ? ExecuteTTLType::RECALCULATE : ExecuteTTLType::NONE;
 }
 
+static std::set<StatisticPtr> getStatisticsToRecalculate(const StorageMetadataPtr & metadata_snapshot, const NameSet & materialized_stats)
+{
+    const auto & stats_factory = MergeTreeStatisticFactory::instance();
+    std::set<StatisticPtr> stats_to_recalc;
+    const auto & stats = metadata_snapshot->getStatistics();
+    for (const auto & stat_desc : stats)
+    {
+        if (materialized_stats.contains(stat_desc.name))
+        {
+            stats_to_recalc.insert(stats_factory.get(stat_desc));
+        }
+    }
+    return stats_to_recalc;
+}
 
 /// Return set of indices which should be recalculated during mutation also
 /// wraps input stream into additional expression stream
@@ -527,7 +544,8 @@ static NameSet collectFilesToSkip(
     const Block & updated_header,
     const std::set<MergeTreeIndexPtr> & indices_to_recalc,
     const String & mrk_extension,
-    const std::set<ProjectionDescriptionRawPtr> & projections_to_recalc)
+    const std::set<ProjectionDescriptionRawPtr> & projections_to_recalc,
+    const std::set<StatisticPtr> & stats_to_recalc)
 {
     NameSet files_to_skip = source_part->getFileNamesWithoutChecksums();
 
@@ -544,6 +562,9 @@ static NameSet collectFilesToSkip(
     for (const auto & projection : projections_to_recalc)
         files_to_skip.insert(projection->getDirectoryName());
 
+    for (const auto & stat : stats_to_recalc)
+        files_to_skip.insert(stat->getFileName() + STAT_FILE_SUFFIX);
+
     if (isWidePart(source_part))
     {
         auto new_stream_counts = getStreamCounts(new_part, new_part->getColumns().getNames());
@@ -620,6 +641,11 @@ static NameToNameVector collectFilesForRenames(
             if (source_part->checksums.has(command.column_name + ".proj"))
                 add_rename(command.column_name + ".proj", "");
         }
+        //else if (command.type == MutationCommand::Type::DROP_STATISTICS)
+        //{
+        //    if (source_part->checksums.has(command.column_name + ".stat"))
+        //        add_rename(command.column_name + ".stat", "");
+        //}
         else if (isWidePart(source_part))
         {
             if (command.type == MutationCommand::Type::DROP_COLUMN)
@@ -830,6 +856,7 @@ struct MutationContext
     NamesAndTypesList storage_columns;
     NameSet materialized_indices;
     NameSet materialized_projections;
+    NameSet materialized_statistics;
 
     MergeTreeData::MutableDataPartPtr new_data_part;
     IMergedBlockOutputStreamPtr out{nullptr};
@@ -840,6 +867,7 @@ struct MutationContext
     IMergeTreeDataPart::MinMaxIndexPtr minmax_idx{nullptr};
 
     std::set<MergeTreeIndexPtr> indices_to_recalc;
+    std::set<StatisticPtr> stats_to_recalc;
     std::set<ProjectionDescriptionRawPtr> projections_to_recalc;
     MergeTreeData::DataPart::Checksums existing_indices_checksums;
     NameSet files_to_skip;
@@ -1292,6 +1320,30 @@ private:
             }
         }
 
+        Statistics stats;
+        const auto & statistics = ctx->metadata_snapshot->getStatistics();
+        for (const auto & stat : statistics)
+        {
+            if (ctx->materialized_statistics.contains(stat.name))
+            {
+                stats.push_back(MergeTreeStatisticFactory::instance().get(stat));
+            }
+            else
+            {
+                auto prefix = fmt::format("{}{}.", STAT_FILE_PREFIX, stat.name);
+                auto it = ctx->source_part->checksums.files.upper_bound(prefix);
+                while (it != ctx->source_part->checksums.files.end())
+                {
+                    if (!startsWith(it->first, prefix))
+                        break;
+
+                    entries_to_hardlink.insert(it->first);
+                    ctx->existing_indices_checksums.addFile(it->first, it->second.file_size, it->second.file_hash);
+                    ++it;
+                }
+            }
+        }
+
         NameSet removed_projections;
         for (const auto & command : ctx->for_file_renames)
         {
@@ -1376,11 +1428,13 @@ private:
 
         ctx->minmax_idx = std::make_shared<IMergeTreeDataPart::MinMaxIndex>();
 
+        LOG_TRACE(ctx->log, "going to write {} stats", stats.size());
         ctx->out = std::make_shared<MergedBlockOutputStream>(
             ctx->new_data_part,
             ctx->metadata_snapshot,
             ctx->new_data_part->getColumns(),
             skip_indices,
+            stats,
             ctx->compression_codec,
             ctx->txn,
             /*reset_columns=*/ true,
@@ -1575,6 +1629,7 @@ private:
                 ctx->updated_header,
                 ctx->compression_codec,
                 std::vector<MergeTreeIndexPtr>(ctx->indices_to_recalc.begin(), ctx->indices_to_recalc.end()),
+                Statistics(ctx->stats_to_recalc.begin(), ctx->stats_to_recalc.end()),
                 nullptr,
                 ctx->source_part->index_granularity,
                 &ctx->source_part->index_granularity_info
@@ -1840,6 +1895,8 @@ bool MutateTask::prepare()
             ctx->metadata_snapshot->getColumns().getNamesOfPhysical(), context_for_reading, settings);
 
         ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices();
+        ctx->materialized_statistics = ctx->interpreter->grabMaterializedStatistics();
+        LOG_INFO(ctx->log, "stats number {}", ctx->materialized_statistics.size());
         ctx->materialized_projections = ctx->interpreter->grabMaterializedProjections();
         ctx->mutating_pipeline_builder = ctx->interpreter->execute();
         ctx->updated_header = ctx->interpreter->getUpdatedHeader();
@@ -1904,13 +1961,16 @@ bool MutateTask::prepare()
 
         ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(ctx->metadata_snapshot, ctx->materialized_projections);
 
+        ctx->stats_to_recalc = MutationHelpers::getStatisticsToRecalculate(ctx->metadata_snapshot, ctx->materialized_statistics);
+
         ctx->files_to_skip = MutationHelpers::collectFilesToSkip(
             ctx->source_part,
             ctx->new_data_part,
             ctx->updated_header,
             ctx->indices_to_recalc,
             ctx->mrk_extension,
-            ctx->projections_to_recalc);
+            ctx->projections_to_recalc,
+            ctx->stats_to_recalc);
 
         ctx->files_to_rename = MutationHelpers::collectFilesForRenames(
             ctx->source_part,
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 75f1542e30e..b93604bcac6 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -579,6 +579,11 @@ static StoragePtr create(const StorageFactory::Arguments & args)
             for (auto & index : args.query.columns_list->indices->children)
                 metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, columns, context));
 
+        if (args.query.columns_list && args.query.columns_list->stats)
+            for (const auto & stat : args.query.columns_list->stats->children)
+                metadata.statistics.push_back(
+                    StatisticDescription::getStatisticFromAST(stat, columns, args.getContext()));
+
         if (args.query.columns_list && args.query.columns_list->projections)
             for (auto & projection_ast : args.query.columns_list->projections->children)
             {
diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp
index 6eb345b449e..b00dca95c56 100644
--- a/src/Storages/MutationCommands.cpp
+++ b/src/Storages/MutationCommands.cpp
@@ -10,6 +10,7 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Common/typeid_cast.h>
 #include <Common/quoteString.h>
+#include "Parsers/ASTAlterQuery.h"
 #include <Core/Defines.h>
 #include <DataTypes/DataTypeFactory.h>
 
@@ -68,6 +69,16 @@ std::optional<MutationCommand> MutationCommand::parse(ASTAlterCommand * command,
         res.index_name = command->index->as<ASTIdentifier &>().name();
         return res;
     }
+    else if (command->type == ASTAlterCommand::MATERIALIZE_STATISTIC)
+    {
+        MutationCommand res;
+        res.ast = command->ptr();
+        res.type = MATERIALIZE_STATISTIC;
+        res.partition = command->partition;
+        res.predicate = nullptr;
+        res.statistic_name = command->statistic->as<ASTIdentifier &>().name();
+        return res;
+    }
     else if (command->type == ASTAlterCommand::MATERIALIZE_PROJECTION)
     {
         MutationCommand res;
diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h
index 5ef0cfda1be..c9fa59bc309 100644
--- a/src/Storages/MutationCommands.h
+++ b/src/Storages/MutationCommands.h
@@ -30,10 +30,12 @@ struct MutationCommand
         UPDATE,
         MATERIALIZE_INDEX,
         MATERIALIZE_PROJECTION,
+        MATERIALIZE_STATISTIC,
         READ_COLUMN, /// Read column and apply conversions (MODIFY COLUMN alter query).
         DROP_COLUMN,
         DROP_INDEX,
         DROP_PROJECTION,
+        DROP_STATISTIC,
         MATERIALIZE_TTL,
         RENAME_COLUMN,
         MATERIALIZE_COLUMN,
@@ -48,9 +50,10 @@ struct MutationCommand
     /// Columns with corresponding actions
     std::unordered_map<String, ASTPtr> column_to_update_expression;
 
-    /// For MATERIALIZE INDEX and PROJECTION
+    /// For MATERIALIZE INDEX and PROJECTION and STATISTIC
     String index_name;
     String projection_name;
+    String statistic_name;
 
     /// For MATERIALIZE INDEX, UPDATE and DELETE.
     ASTPtr partition;
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
new file mode 100644
index 00000000000..eaf7d828e1f
--- /dev/null
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -0,0 +1,155 @@
+#include <optional>
+#include <Storages/Statistic/Statistic.h>
+#include <Storages/StatisticsDescription.h>
+#include <Common/Exception.h>
+#include "Storages/MergeTree/RPNBuilder.h"
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int INCORRECT_QUERY;
+}
+
+
+std::optional<std::string> ConditionEstimator::extractSingleColumn(const RPNBuilderTreeNode & node) const
+{
+    if (node.isConstant())
+    {
+        return std::nullopt;
+    }
+
+    if (!node.isFunction())
+    {
+        auto column_name = node.getColumnName();
+        return {column_name};
+    }
+
+    auto function_node = node.toFunctionNode();
+    size_t arguments_size = function_node.getArgumentsSize();
+    std::optional<std::string> result;
+    for (size_t i = 0; i < arguments_size; ++i)
+    {
+        auto function_argument = function_node.getArgumentAt(i);
+        auto subresult = extractSingleColumn(function_argument);
+        if (subresult == std::nullopt)
+            continue;
+        else if (subresult == "")
+            return "";
+        else if (result == std::nullopt)
+            result = subresult;
+        else if (result.value() != subresult.value())
+            return "";
+    }
+    return result;
+}
+
+std::pair<std::string, Float64> ConditionEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const
+{
+    if (!node.isFunction())
+        return {};
+
+    auto function_node = node.toFunctionNode();
+    if (function_node.getArgumentsSize() != 2)
+        return {};
+
+    std::string function_name = function_node.getFunctionName();
+
+    auto lhs_argument = function_node.getArgumentAt(0);
+    auto rhs_argument = function_node.getArgumentAt(1);
+
+    auto lhs_argument_column_name = lhs_argument.getColumnName();
+    auto rhs_argument_column_name = rhs_argument.getColumnName();
+
+    bool lhs_argument_is_column = column_name == (lhs_argument_column_name);
+    bool rhs_argument_is_column = column_name == (rhs_argument_column_name);
+
+    bool lhs_argument_is_constant = lhs_argument.isConstant();
+    bool rhs_argument_is_constant = rhs_argument.isConstant();
+
+    RPNBuilderTreeNode * constant_node = nullptr;
+
+    if (lhs_argument_is_column && rhs_argument_is_constant)
+        constant_node = &rhs_argument;
+    else if (lhs_argument_is_constant && rhs_argument_is_column)
+        constant_node = &lhs_argument;
+    else
+        return {};
+
+    Field output_value;
+    DataTypePtr output_type;
+    if (!constant_node->tryGetConstant(output_value, output_type))
+        return {};
+
+    const auto type = output_value.getType();
+    Float64 value;
+    if (type == Field::Types::Int64)
+        value = output_value.get<Int64>();
+    else if (type == Field::Types::UInt64)
+        value = output_value.get<UInt64>();
+    else if (type == Field::Types::Float64)
+        value = output_value.get<Float64>();
+    return std::make_pair(function_name, value);
+}
+
+StatisticPtr TDigestCreator(const StatisticDescription & stat)
+{
+    if (stat.column_names.size() != 1)
+    {
+        /// throw
+    }
+
+    /// TODO: check column data types.
+    return StatisticPtr(new TDigestStatistic(stat));
+}
+
+void MergeTreeStatisticFactory::registerCreator(const std::string & stat_type, Creator creator)
+{
+    if (!creators.emplace(stat_type, std::move(creator)).second)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic creator type {} is not unique", stat_type);
+}
+
+MergeTreeStatisticFactory::MergeTreeStatisticFactory()
+{
+    registerCreator("t_digest", TDigestCreator);
+
+    ///registerCreator("cm_sketch", CMSketchCreator);
+}
+
+MergeTreeStatisticFactory & MergeTreeStatisticFactory::instance()
+{
+    static MergeTreeStatisticFactory instance;
+    return instance;
+}
+
+StatisticPtr MergeTreeStatisticFactory::get(const StatisticDescription & stat) const
+{
+    auto it = creators.find(stat.type);
+    if (it == creators.end())
+    {
+        throw Exception(ErrorCodes::INCORRECT_QUERY,
+                "Unknown Statistic type '{}'. Available types: {}", stat.type,
+                std::accumulate(creators.cbegin(), creators.cend(), std::string{},
+                        [] (auto && left, const auto & right) -> std::string
+                        {
+                            if (left.empty())
+                                return right.first;
+                            else
+                                return left + ", " + right.first;
+                        })
+                );
+    }
+    return std::make_shared<TDigestStatistic>(stat);
+}
+
+Statistics MergeTreeStatisticFactory::getMany(const std::vector<StatisticDescription> & stats) const
+{
+    Statistics result;
+    for (const auto & stat : stats)
+        result.push_back(get(stat));
+    return result;
+}
+
+}
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
new file mode 100644
index 00000000000..2d16ef90ebe
--- /dev/null
+++ b/src/Storages/Statistic/Statistic.h
@@ -0,0 +1,262 @@
+#pragma once
+
+#include <cstddef>
+#include <memory>
+#include <optional>
+#include <AggregateFunctions/QuantileTDigest.h>
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+#include <Storages/StatisticsDescription.h>
+#include "Common/Exception.h"
+#include <Common/logger_useful.h>
+#include "Storages/MergeTree/RPNBuilder.h"
+
+#include <boost/core/noncopyable.hpp>
+
+/// this is for user-defined statistic.
+/// For auto collected statisic, we can use 'auto_statistic_'
+constexpr auto STAT_FILE_PREFIX = "statistic_";
+constexpr auto STAT_FILE_SUFFIX = ".stat";
+
+namespace DB
+{
+
+class IStatistic;
+using StatisticPtr = std::shared_ptr<IStatistic>;
+using Statistics = std::vector<StatisticPtr>;
+
+class IStatistic
+{
+public:
+    explicit IStatistic(const StatisticDescription & stat_)
+        : statistics(stat_)
+    {
+    }
+    virtual ~IStatistic() = default;
+
+    String getFileName() const
+    {
+        return STAT_FILE_PREFIX + name();
+    }
+
+    const String & name() const
+    {
+        return statistics.name;
+    }
+
+    const String & columnName() const
+    {
+        return statistics.column_names[0];
+    }
+    /// const String& type() const = 0;
+    /// virtual StatisticType statisticType() const = 0;
+
+    virtual void serialize(WriteBuffer & buf) = 0;
+    virtual void deserialize(ReadBuffer & buf) = 0;
+    virtual void update(const Block & block) = 0;
+    virtual UInt64 count() = 0;
+
+protected:
+
+    const StatisticDescription & statistics;
+
+};
+
+class TDigestStatistic : public IStatistic
+{
+    QuantileTDigest<Float64> data;
+public:
+    explicit TDigestStatistic(const StatisticDescription & stat) : IStatistic(stat)
+    {
+    }
+
+    struct Range
+    {
+        Float64 left, right;
+    };
+
+    /// FIXME: implement correct count estimate method.
+    Float64 estimateLess(Float64 val) const
+    {
+        return data.getCountLessThan(val);
+    }
+
+    void serialize(WriteBuffer & buf) override
+    {
+        data.serialize(buf);
+        LOG_DEBUG(&Poco::Logger::get("t-digest"), "serialize into {} data", buf.offset());
+    }
+
+    void deserialize(ReadBuffer & buf) override
+    {
+        data.deserialize(buf);
+    }
+
+    void update(const Block & block) override
+    {
+        const auto & column_with_type = block.getByName(statistics.column_names[0]);
+        size_t size = block.rows();
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            /// TODO: support more types.
+            Float64 value = column_with_type.column->getFloat64(i);
+            data.add(value, 1);
+        }
+
+        LOG_DEBUG(&Poco::Logger::get("t-digest"), "write into {} data", size);
+    }
+
+    UInt64 count() override
+    {
+        return static_cast<UInt64>(data.count);
+    }
+};
+
+class MergeTreeStatisticFactory : private boost::noncopyable
+{
+public:
+    static MergeTreeStatisticFactory & instance();
+
+    using Creator = std::function<StatisticPtr(const StatisticDescription & stat)>;
+
+    StatisticPtr get(const StatisticDescription & stat) const;
+
+    Statistics getMany(const std::vector<StatisticDescription> & stats) const;
+
+    void registerCreator(const std::string & type, Creator creator);
+
+protected:
+    MergeTreeStatisticFactory();
+
+private:
+    using Creators = std::unordered_map<std::string, Creator>;
+    Creators creators;
+};
+
+class ConditionEstimator
+{
+private:
+
+    static constexpr auto default_good_cond_factor = 0.1;
+    static constexpr auto default_normal_cond_factor = 0.5;
+    static constexpr auto default_unknown_cond_factor = 1.0;
+    /// Conditions like "x = N" are considered good if abs(N) > threshold.
+    /// This is used to assume that condition is likely to have good selectivity.
+    static constexpr auto threshold = 2;
+
+    UInt64 total_count;
+
+    struct PartColumnEstimator
+    {
+        UInt64 part_count;
+
+        std::shared_ptr<TDigestStatistic> t_digest;
+
+        void merge(StatisticPtr statistic)
+        {
+            UInt64 cur_part_count = statistic->count();
+            if (part_count == 0)
+                part_count = cur_part_count;
+
+            if (typeid_cast<TDigestStatistic *>(statistic.get()))
+            {
+                t_digest = std::static_pointer_cast<TDigestStatistic>(statistic);
+            }
+        }
+
+        Float64 estimateLess(Float64 val) const
+        {
+            if (t_digest != nullptr)
+                return t_digest -> estimateLess(val);
+            return part_count * default_normal_cond_factor;
+        }
+
+        Float64 estimateGreator(Float64 val) const
+        {
+            if (t_digest != nullptr)
+                return part_count - t_digest -> estimateLess(val);
+            return part_count * default_normal_cond_factor;
+        }
+    };
+
+    struct ColumnEstimator
+    {
+        std::map<std::string, PartColumnEstimator> estimators;
+
+        void merge(std::string part_name, StatisticPtr statistic)
+        {
+            estimators[part_name].merge(statistic);
+        }
+        Float64 estimateLess(Float64 val) const
+        {
+            if (estimators.empty())
+                return default_normal_cond_factor;
+            Float64 result = 0;
+            for (const auto & [key, estimator] : estimators)
+                result += estimator.estimateLess(val);
+            return result;
+        }
+
+        Float64 estimateGreater(Float64 val) const
+        {
+            if (estimators.empty())
+                return default_normal_cond_factor;
+            Float64 result = 0;
+            for (const auto & [key, estimator] : estimators)
+                result += estimator.estimateGreator(val);
+            return result;
+        }
+    };
+
+    std::map<String, ColumnEstimator> column_estimators;
+    std::optional<std::string> extractSingleColumn(const RPNBuilderTreeNode & node) const;
+    std::pair<std::string, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const;
+
+public:
+
+    ConditionEstimator() = default;
+
+    /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
+    /// Right now we only support simple condition like col = val / col < val
+    Float64 estimateSelectivity(const RPNBuilderTreeNode & node) const
+    {
+        auto col = extractSingleColumn(node);
+        if (col == std::nullopt || col == "")
+        {
+            return default_unknown_cond_factor;
+        }
+        auto it = column_estimators.find(col.value());
+        ColumnEstimator estimator;
+        if (it != column_estimators.end())
+        {
+            estimator = it->second;
+        }
+        auto [op, val] = extractBinaryOp(node, col.value());
+        if (op == "equals")
+        {
+            if (val < - threshold || val > threshold)
+                return default_normal_cond_factor;
+            else
+                return default_good_cond_factor;
+        }
+        else if (op == "less" || op == "lessThan")
+        {
+            return estimator.estimateLess(val) / total_count;
+        }
+        else if (op == "greater" || op == "greaterThan")
+        {
+            return estimator.estimateLess(val) / total_count;
+        }
+        else
+            return default_unknown_cond_factor;
+    }
+    void merge(std::string part_name, StatisticPtr statistic)
+    {
+        column_estimators[statistic->columnName()].merge(part_name, statistic);
+    }
+
+};
+
+
+}
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
new file mode 100644
index 00000000000..6c7e1244fcd
--- /dev/null
+++ b/src/Storages/StatisticsDescription.cpp
@@ -0,0 +1,120 @@
+#include <base/defines.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTStatisticDeclaration.h>
+#include <Parsers/formatAST.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Poco/Logger.h>
+#include <Storages/extractKeyExpressionList.h>
+#include <Storages/StatisticsDescription.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INCORRECT_QUERY;
+    extern const int LOGICAL_ERROR;
+};
+
+StatisticDescription StatisticDescription::getStatisticFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context)
+{
+    const auto * stat_definition = definition_ast->as<ASTStatisticDeclaration>();
+    if (!stat_definition)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create statistic from non ASTStatisticDeclaration AST");
+
+    if (stat_definition->name.empty())
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Statistic must have name in definition.");
+
+    // type == nullptr => auto
+    if (!stat_definition->type)
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "TYPE is required for statistics");
+
+    if (stat_definition->type->parameters && !stat_definition->type->parameters->children.empty())
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Statistics type cannot have parameters");
+
+    StatisticDescription stat;
+    stat.definition_ast = definition_ast->clone();
+    stat.name = stat_definition->name;
+    stat.type = Poco::toLower(stat_definition->type->name);
+
+    ASTPtr expr_list = extractKeyExpressionList(stat_definition->columns->clone());
+    for (const auto & ast : expr_list->children)
+    {
+        ASTIdentifier* ident = ast->as<ASTIdentifier>();
+        if (!ident || !columns.hasPhysical(ident->getColumnName()))
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column");
+        const auto & column = columns.get(ident->getColumnName());
+        stat.column_names.push_back(column.name);
+        stat.data_types.push_back(column.type);
+    }
+
+    UNUSED(context);
+
+    return stat;
+}
+
+StatisticDescription::StatisticDescription(const StatisticDescription & other)
+    : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
+    , name(other.name)
+    , type(other.type)
+    , column_names(other.column_names)
+{
+}
+
+StatisticDescription & StatisticDescription::operator=(const StatisticDescription & other)
+{
+    if (&other == this)
+        return *this;
+
+    if (other.definition_ast)
+        definition_ast = other.definition_ast->clone();
+    else
+        definition_ast.reset();
+
+    name = other.name;
+    type = other.type;
+    column_names = other.column_names;
+
+    return *this;
+}
+
+
+bool StatisticsDescriptions::has(const String & name) const
+{
+    for (const auto & statistic : *this)
+        if (statistic.name == name)
+            return true;
+    return false;
+}
+
+String StatisticsDescriptions::toString() const
+{
+    if (empty())
+        return {};
+
+    ASTExpressionList list;
+    for (const auto & statistic : *this)
+        list.children.push_back(statistic.definition_ast);
+
+    return serializeAST(list, true);
+}
+
+StatisticsDescriptions StatisticsDescriptions::parse(const String & str, const ColumnsDescription & columns, ContextPtr context)
+{
+    StatisticsDescriptions result;
+    if (str.empty())
+        return result;
+
+    ParserStatisticDeclaration parser;
+    ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+
+    for (const auto & index : list->children)
+        result.emplace_back(StatisticDescription::getStatisticFromAST(index, columns, context));
+
+    return result;
+}
+
+}
diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h
new file mode 100644
index 00000000000..2cbce381990
--- /dev/null
+++ b/src/Storages/StatisticsDescription.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <Parsers/IAST_fwd.h>
+#include <base/types.h>
+#include <Storages/ColumnsDescription.h>
+
+namespace DB
+{
+
+struct StatisticDescription
+{
+    /// Definition AST of statistic
+    ASTPtr definition_ast;
+
+    /// Statistic name
+    String name;
+
+    String type;
+
+    /// Names of statistic columns
+    Names column_names;
+
+    /// Data types of statistic columns
+    DataTypes data_types;
+
+    static StatisticDescription getStatisticFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context);
+
+    StatisticDescription() = default;
+
+    /// We need custom copy constructors because we don't want
+    /// unintentionaly share AST variables and modify them.
+    StatisticDescription(const StatisticDescription & other);
+    StatisticDescription & operator=(const StatisticDescription & other);
+};
+
+struct StatisticsDescriptions : public std::vector<StatisticDescription>
+{
+    /// Stat with name exists
+    bool has(const String & name) const;
+    /// Convert description to string
+    String toString() const;
+    /// Parse description from string
+    static StatisticsDescriptions parse(const String & str, const ColumnsDescription & columns, ContextPtr context);
+};
+
+}
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index afe75349864..4546d9a8bda 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -4,6 +4,7 @@
 #include <Common/HashTable/HashSet.h>
 #include <Common/quoteString.h>
 #include <Common/StringUtils/StringUtils.h>
+#include "Storages/StatisticsDescription.h"
 #include <Core/ColumnWithTypeAndName.h>
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeEnum.h>
@@ -28,6 +29,7 @@ namespace ErrorCodes
 StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & other)
     : columns(other.columns)
     , secondary_indices(other.secondary_indices)
+    , statistics(other.statistics)
     , constraints(other.constraints)
     , projections(other.projections.clone())
     , minmax_count_projection(
@@ -52,6 +54,7 @@ StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemo
 
     columns = other.columns;
     secondary_indices = other.secondary_indices;
+    statistics = other.statistics;
     constraints = other.constraints;
     projections = other.projections.clone();
     if (other.minmax_count_projection)
@@ -91,6 +94,11 @@ void StorageInMemoryMetadata::setSecondaryIndices(IndicesDescription secondary_i
     secondary_indices = std::move(secondary_indices_);
 }
 
+void StorageInMemoryMetadata::setStatistics(StatisticsDescriptions statistics_)
+{
+    statistics = std::move(statistics_);
+}
+
 void StorageInMemoryMetadata::setConstraints(ConstraintsDescription constraints_)
 {
     constraints = std::move(constraints_);
@@ -146,6 +154,11 @@ const IndicesDescription & StorageInMemoryMetadata::getSecondaryIndices() const
     return secondary_indices;
 }
 
+const StatisticsDescriptions & StorageInMemoryMetadata::getStatistics() const
+{
+    return statistics;
+}
+
 bool StorageInMemoryMetadata::hasSecondaryIndices() const
 {
     return !secondary_indices.empty();
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 4ed7eb8bf29..761788949fb 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -5,6 +5,7 @@
 #include <Storages/ColumnsDescription.h>
 #include <Storages/ConstraintsDescription.h>
 #include <Storages/IndicesDescription.h>
+#include <Storages/StatisticsDescription.h>
 #include <Storages/ProjectionsDescription.h>
 #include <Storages/KeyDescription.h>
 #include <Storages/SelectQueryDescription.h>
@@ -24,6 +25,8 @@ struct StorageInMemoryMetadata
     ColumnsDescription columns;
     /// Table indices. Currently supported for MergeTree only.
     IndicesDescription secondary_indices;
+
+    StatisticsDescriptions statistics;
     /// Table constraints. Currently supported for MergeTree only.
     ConstraintsDescription constraints;
     /// Table projections. Currently supported for MergeTree only.
@@ -75,6 +78,9 @@ struct StorageInMemoryMetadata
     /// Sets secondary indices
     void setSecondaryIndices(IndicesDescription secondary_indices_);
 
+    /// Sets statistics
+    void setStatistics(StatisticsDescriptions statistics_);
+
     /// Sets constraints
     void setConstraints(ConstraintsDescription constraints_);
 
@@ -105,6 +111,8 @@ struct StorageInMemoryMetadata
     /// Returns secondary indices
     const IndicesDescription & getSecondaryIndices() const;
 
+    const StatisticsDescriptions & getStatistics() const;
+
     /// Has at least one non primary index
     bool hasSecondaryIndices() const;
 

From 0fb68f41c2afb774524896749a6092da7884339e Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Fri, 11 Aug 2023 16:45:06 +0200
Subject: [PATCH 0025/1097] fix tests

---
 src/Core/Settings.h                           |   2 +
 .../MergeTree/MergeTreeWhereOptimizer.cpp     | 137 ++++++++++--------
 .../MergeTree/MergeTreeWhereOptimizer.h       |   8 +-
 .../01271_show_privileges.reference           |   4 +
 .../02117_show_create_table_system.reference  |   6 +-
 5 files changed, 91 insertions(+), 66 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 6c3d339b4be..0936d092f99 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -132,6 +132,8 @@ class IColumn;
     M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
     M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \
     \
+    M(Bool, allow_statistic_optimize, false, "use statistic to optimize queries", 0) \
+    \
     M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
     M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
     \
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index d19e9c410b2..6c331a22b1d 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -10,6 +10,7 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/formatAST.h>
 #include <Interpreters/misc.h>
+#include "Common/logger_useful.h"
 #include <Common/typeid_cast.h>
 #include <DataTypes/NestedUtils.h>
 #include <Interpreters/ActionsDAG.h>
@@ -20,7 +21,7 @@ namespace DB
 
 /// Conditions like "x = N" are considered good if abs(N) > threshold.
 /// This is used to assume that condition is likely to have good selectivity.
-/// static constexpr auto threshold = 2;
+static constexpr auto threshold = 2;
 
 static NameToIndexMap fillNamesPositions(const Names & names)
 {
@@ -74,6 +75,8 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
         if (it != column_sizes.end())
             total_size_of_queried_columns += it->second;
     }
+
+    LOG_DEBUG(log, "pk columns size : {}", metadata_snapshot->getPrimaryKey().column_names.size());
 }
 
 void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, const ContextPtr & context) const
@@ -92,6 +95,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons
     where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere;
     where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere;
     where_optimizer_context.is_final = select.final();
+    where_optimizer_context.use_statistic = context->getSettingsRef().allow_statistic_optimize;
 
     RPNBuilderTreeContext tree_context(context, std::move(block_with_constants), {} /*prepared_sets*/);
     RPNBuilderTreeNode node(select.where().get(), tree_context);
@@ -122,6 +126,7 @@ std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhe
     where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere;
     where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere;
     where_optimizer_context.is_final = is_final;
+    where_optimizer_context.use_statistic = context->getSettingsRef().allow_statistic_optimize;
 
     RPNBuilderTreeContext tree_context(context);
     RPNBuilderTreeNode node(&filter_dag->findInOutputs(filter_column_name), tree_context);
@@ -167,66 +172,66 @@ static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & colu
     }
 }
 
-/// static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet & columns_names)
-/// {
-///     if (!condition.isFunction())
-///         return false;
-///
-///     auto function_node = condition.toFunctionNode();
-///
-///     /** We are only considering conditions of form `equals(one, another)` or `one = another`,
-///       * especially if either `one` or `another` is ASTIdentifier
-///       */
-///     if (function_node.getFunctionName() != "equals" || function_node.getArgumentsSize() != 2)
-///         return false;
-///
-///     auto lhs_argument = function_node.getArgumentAt(0);
-///     auto rhs_argument = function_node.getArgumentAt(1);
-///
-///     auto lhs_argument_column_name = lhs_argument.getColumnName();
-///     auto rhs_argument_column_name = rhs_argument.getColumnName();
-///
-///     bool lhs_argument_is_column = columns_names.contains(lhs_argument_column_name);
-///     bool rhs_argument_is_column = columns_names.contains(rhs_argument_column_name);
-///
-///     bool lhs_argument_is_constant = lhs_argument.isConstant();
-///     bool rhs_argument_is_constant = rhs_argument.isConstant();
-///
-///     RPNBuilderTreeNode * constant_node = nullptr;
-///
-///     if (lhs_argument_is_column && rhs_argument_is_constant)
-///         constant_node = &rhs_argument;
-///     else if (lhs_argument_is_constant && rhs_argument_is_column)
-///         constant_node = &lhs_argument;
-///     else
-///         return false;
-///
-///     Field output_value;
-///     DataTypePtr output_type;
-///     if (!constant_node->tryGetConstant(output_value, output_type))
-///         return false;
-///
-///     const auto type = output_value.getType();
-///
-///     /// check the value with respect to threshold
-///     if (type == Field::Types::UInt64)
-///     {
-///         const auto value = output_value.get<UInt64>();
-///         return value > threshold;
-///     }
-///     else if (type == Field::Types::Int64)
-///     {
-///         const auto value = output_value.get<Int64>();
-///         return value < -threshold || threshold < value;
-///     }
-///     else if (type == Field::Types::Float64)
-///     {
-///         const auto value = output_value.get<Float64>();
-///         return value < threshold || threshold < value;
-///     }
-///
-///     return false;
-/// }
+static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet & columns_names)
+{
+    if (!condition.isFunction())
+        return false;
+
+    auto function_node = condition.toFunctionNode();
+
+    /** We are only considering conditions of form `equals(one, another)` or `one = another`,
+      * especially if either `one` or `another` is ASTIdentifier
+      */
+    if (function_node.getFunctionName() != "equals" || function_node.getArgumentsSize() != 2)
+        return false;
+
+    auto lhs_argument = function_node.getArgumentAt(0);
+    auto rhs_argument = function_node.getArgumentAt(1);
+
+    auto lhs_argument_column_name = lhs_argument.getColumnName();
+    auto rhs_argument_column_name = rhs_argument.getColumnName();
+
+    bool lhs_argument_is_column = columns_names.contains(lhs_argument_column_name);
+    bool rhs_argument_is_column = columns_names.contains(rhs_argument_column_name);
+
+    bool lhs_argument_is_constant = lhs_argument.isConstant();
+    bool rhs_argument_is_constant = rhs_argument.isConstant();
+
+    RPNBuilderTreeNode * constant_node = nullptr;
+
+    if (lhs_argument_is_column && rhs_argument_is_constant)
+        constant_node = &rhs_argument;
+    else if (lhs_argument_is_constant && rhs_argument_is_column)
+        constant_node = &lhs_argument;
+    else
+        return false;
+
+    Field output_value;
+    DataTypePtr output_type;
+    if (!constant_node->tryGetConstant(output_value, output_type))
+        return false;
+
+    const auto type = output_value.getType();
+
+    /// check the value with respect to threshold
+    if (type == Field::Types::UInt64)
+    {
+        const auto value = output_value.get<UInt64>();
+        return value > threshold;
+    }
+    else if (type == Field::Types::Int64)
+    {
+        const auto value = output_value.get<Int64>();
+        return value < -threshold || threshold < value;
+    }
+    else if (type == Field::Types::Float64)
+    {
+        const auto value = output_value.get<Float64>();
+        return value < threshold || threshold < value;
+    }
+
+    return false;
+}
 
 void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const
 {
@@ -264,13 +269,23 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
             && cond.table_columns.size() < queried_columns.size();
 
         if (cond.viable)
+            cond.good = isConditionGood(node, table_columns);
+
+        if (where_optimizer_context.use_statistic)
+        {
+            cond.good = cond.viable;
+
             cond.selectivity = estimator.estimateSelectivity(node);
+        }
 
         if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere)
         {
+            /// Consider all conditions good with this setting enabled.
+            cond.good = cond.viable;
             /// Find min position in PK of any column that is used in this condition.
             cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions);
         }
+        LOG_DEBUG(log, "node {}, min pos : {}", node.getASTNode()->dumpTree(), cond.min_position_in_primary_key);
 
         res.emplace_back(std::move(cond));
     }
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index f5e6b6fbd49..e64ae1ba89c 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -71,8 +71,11 @@ private:
         /// Can condition be moved to prewhere?
         bool viable = false;
 
+        /// Does the condition presumably have good selectivity?
+        bool good = false;
+
         /// the lower the better
-        Float64 selectivity = 0;
+        Float64 selectivity = 1.0;
 
         /// Does the condition contain primary key column?
         /// If so, it is better to move it further to the end of PREWHERE chain depending on minimal position in PK of any
@@ -81,7 +84,7 @@ private:
 
         auto tuple() const
         {
-            return std::make_tuple(!viable, selectivity, -min_position_in_primary_key, columns_size, table_columns.size());
+            return std::make_tuple(!viable, !good, -min_position_in_primary_key, selectivity, columns_size, table_columns.size());
         }
 
         /// Is condition a better candidate for moving to PREWHERE?
@@ -100,6 +103,7 @@ private:
         bool move_all_conditions_to_prewhere = false;
         bool move_primary_key_columns_to_end_of_prewhere = false;
         bool is_final = false;
+        bool use_statistic = false;
     };
 
     struct OptimizeResult
diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference
index a3e15f0793c..c096a182f2b 100644
--- a/tests/queries/0_stateless/01271_show_privileges.reference
+++ b/tests/queries/0_stateless/01271_show_privileges.reference
@@ -24,6 +24,10 @@ ALTER DROP INDEX	['DROP INDEX']	TABLE	ALTER INDEX
 ALTER MATERIALIZE INDEX	['MATERIALIZE INDEX']	TABLE	ALTER INDEX
 ALTER CLEAR INDEX	['CLEAR INDEX']	TABLE	ALTER INDEX
 ALTER INDEX	['INDEX']	\N	ALTER TABLE
+ALTER ADD STATISTIC	['ALTER ADD STATISTIC']	TABLE	ALTER STATISTIC
+ALTER DROP STATISTIC	['ALTER DROP STATISTIC']	TABLE	ALTER STATISTIC
+ALTER MATERIALIZE STATISTIC	['ALTER MATERIALIZE STATISTIC']	TABLE	ALTER STATISTIC
+ALTER STATISTIC	['STATISTIC']	\N	ALTER TABLE
 ALTER ADD PROJECTION	['ADD PROJECTION']	TABLE	ALTER PROJECTION
 ALTER DROP PROJECTION	['DROP PROJECTION']	TABLE	ALTER PROJECTION
 ALTER MATERIALIZE PROJECTION	['MATERIALIZE PROJECTION']	TABLE	ALTER PROJECTION
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index 5b678537248..f740753382f 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -298,7 +298,7 @@ CREATE TABLE system.grants
 (
     `user_name` Nullable(String),
     `role_name` Nullable(String),
-    `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM PULLING REPLICATION LOG' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 136, 'SYSTEM FLUSH' = 137, 'SYSTEM THREAD FUZZER' = 138, 'SYSTEM UNFREEZE' = 139, 'SYSTEM FAILPOINT' = 140, 'SYSTEM LISTEN' = 141, 'SYSTEM' = 142, 'dictGet' = 143, 'displaySecretsInShowAndSelect' = 144, 'addressToLine' = 145, 'addressToLineWithInlines' = 146, 'addressToSymbol' = 147, 'demangle' = 148, 'INTROSPECTION' = 149, 'FILE' = 150, 'URL' = 151, 'REMOTE' = 152, 'MONGO' = 153, 'REDIS' = 154, 'MEILISEARCH' = 155, 'MYSQL' = 156, 'POSTGRES' = 157, 'SQLITE' = 158, 'ODBC' = 159, 'JDBC' = 160, 'HDFS' = 161, 'S3' = 162, 'HIVE' = 163, 'AZURE' = 164, 'SOURCES' = 165, 'CLUSTER' = 166, 'ALL' = 167, 'NONE' = 168),
+    `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD STATISTIC' = 26, 'ALTER DROP STATISTIC' = 27, 'ALTER MATERIALIZE STATISTIC' = 28, 'ALTER STATISTIC' = 29, 'ALTER ADD PROJECTION' = 30, 'ALTER DROP PROJECTION' = 31, 'ALTER MATERIALIZE PROJECTION' = 32, 'ALTER CLEAR PROJECTION' = 33, 'ALTER PROJECTION' = 34, 'ALTER ADD CONSTRAINT' = 35, 'ALTER DROP CONSTRAINT' = 36, 'ALTER CONSTRAINT' = 37, 'ALTER TTL' = 38, 'ALTER MATERIALIZE TTL' = 39, 'ALTER SETTINGS' = 40, 'ALTER MOVE PARTITION' = 41, 'ALTER FETCH PARTITION' = 42, 'ALTER FREEZE PARTITION' = 43, 'ALTER DATABASE SETTINGS' = 44, 'ALTER NAMED COLLECTION' = 45, 'ALTER TABLE' = 46, 'ALTER DATABASE' = 47, 'ALTER VIEW REFRESH' = 48, 'ALTER VIEW MODIFY QUERY' = 49, 'ALTER VIEW' = 50, 'ALTER' = 51, 'CREATE DATABASE' = 52, 'CREATE TABLE' = 53, 'CREATE VIEW' = 54, 'CREATE DICTIONARY' = 55, 'CREATE TEMPORARY TABLE' = 56, 'CREATE ARBITRARY TEMPORARY TABLE' = 57, 'CREATE FUNCTION' = 58, 'CREATE NAMED COLLECTION' = 59, 'CREATE' = 60, 'DROP DATABASE' = 61, 'DROP TABLE' = 62, 'DROP VIEW' = 63, 'DROP DICTIONARY' = 64, 'DROP FUNCTION' = 65, 'DROP NAMED COLLECTION' = 66, 'DROP' = 67, 'UNDROP TABLE' = 68, 'TRUNCATE' = 69, 'OPTIMIZE' = 70, 'BACKUP' = 71, 'KILL QUERY' = 72, 'KILL TRANSACTION' = 73, 'MOVE PARTITION BETWEEN SHARDS' = 74, 'CREATE USER' = 75, 'ALTER USER' = 76, 'DROP USER' = 77, 'CREATE ROLE' = 78, 'ALTER ROLE' = 79, 'DROP ROLE' = 80, 'ROLE ADMIN' = 81, 'CREATE ROW POLICY' = 82, 'ALTER ROW POLICY' = 83, 'DROP ROW POLICY' = 84, 'CREATE QUOTA' = 85, 'ALTER QUOTA' = 86, 'DROP QUOTA' = 87, 'CREATE SETTINGS PROFILE' = 88, 'ALTER SETTINGS PROFILE' = 89, 'DROP SETTINGS PROFILE' = 90, 'SHOW USERS' = 91, 'SHOW ROLES' = 92, 'SHOW ROW POLICIES' = 93, 'SHOW QUOTAS' = 94, 'SHOW SETTINGS PROFILES' = 95, 'SHOW ACCESS' = 96, 'ACCESS MANAGEMENT' = 97, 'SHOW NAMED COLLECTIONS' = 98, 'SHOW NAMED COLLECTIONS SECRETS' = 99, 'NAMED COLLECTION' = 100, 'NAMED COLLECTION ADMIN' = 101, 'SYSTEM SHUTDOWN' = 102, 'SYSTEM DROP DNS CACHE' = 103, 'SYSTEM DROP MARK CACHE' = 104, 'SYSTEM DROP UNCOMPRESSED CACHE' = 105, 'SYSTEM DROP MMAP CACHE' = 106, 'SYSTEM DROP QUERY CACHE' = 107, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 108, 'SYSTEM DROP FILESYSTEM CACHE' = 109, 'SYSTEM DROP SCHEMA CACHE' = 110, 'SYSTEM DROP S3 CLIENT CACHE' = 111, 'SYSTEM DROP CACHE' = 112, 'SYSTEM RELOAD CONFIG' = 113, 'SYSTEM RELOAD USERS' = 114, 'SYSTEM RELOAD DICTIONARY' = 115, 'SYSTEM RELOAD MODEL' = 116, 'SYSTEM RELOAD FUNCTION' = 117, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 118, 'SYSTEM RELOAD' = 119, 'SYSTEM RESTART DISK' = 120, 'SYSTEM MERGES' = 121, 'SYSTEM TTL MERGES' = 122, 'SYSTEM FETCHES' = 123, 'SYSTEM MOVES' = 124, 'SYSTEM PULLING REPLICATION LOG' = 125, 'SYSTEM DISTRIBUTED SENDS' = 126, 'SYSTEM REPLICATED SENDS' = 127, 'SYSTEM SENDS' = 128, 'SYSTEM REPLICATION QUEUES' = 129, 'SYSTEM DROP REPLICA' = 130, 'SYSTEM SYNC REPLICA' = 131, 'SYSTEM RESTART REPLICA' = 132, 'SYSTEM RESTORE REPLICA' = 133, 'SYSTEM WAIT LOADING PARTS' = 134, 'SYSTEM SYNC DATABASE REPLICA' = 135, 'SYSTEM SYNC TRANSACTION LOG' = 136, 'SYSTEM SYNC FILE CACHE' = 137, 'SYSTEM FLUSH DISTRIBUTED' = 138, 'SYSTEM FLUSH LOGS' = 139, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 140, 'SYSTEM FLUSH' = 141, 'SYSTEM THREAD FUZZER' = 142, 'SYSTEM UNFREEZE' = 143, 'SYSTEM FAILPOINT' = 144, 'SYSTEM LISTEN' = 145, 'SYSTEM' = 146, 'dictGet' = 147, 'displaySecretsInShowAndSelect' = 148, 'addressToLine' = 149, 'addressToLineWithInlines' = 150, 'addressToSymbol' = 151, 'demangle' = 152, 'INTROSPECTION' = 153, 'FILE' = 154, 'URL' = 155, 'REMOTE' = 156, 'MONGO' = 157, 'REDIS' = 158, 'MEILISEARCH' = 159, 'MYSQL' = 160, 'POSTGRES' = 161, 'SQLITE' = 162, 'ODBC' = 163, 'JDBC' = 164, 'HDFS' = 165, 'S3' = 166, 'HIVE' = 167, 'AZURE' = 168, 'SOURCES' = 169, 'CLUSTER' = 170, 'ALL' = 171, 'NONE' = 172),
     `database` Nullable(String),
     `table` Nullable(String),
     `column` Nullable(String),
@@ -586,10 +586,10 @@ ENGINE = SystemPartsColumns
 COMMENT 'SYSTEM TABLE is built on the fly.'
 CREATE TABLE system.privileges
 (
-    `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM PULLING REPLICATION LOG' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 136, 'SYSTEM FLUSH' = 137, 'SYSTEM THREAD FUZZER' = 138, 'SYSTEM UNFREEZE' = 139, 'SYSTEM FAILPOINT' = 140, 'SYSTEM LISTEN' = 141, 'SYSTEM' = 142, 'dictGet' = 143, 'displaySecretsInShowAndSelect' = 144, 'addressToLine' = 145, 'addressToLineWithInlines' = 146, 'addressToSymbol' = 147, 'demangle' = 148, 'INTROSPECTION' = 149, 'FILE' = 150, 'URL' = 151, 'REMOTE' = 152, 'MONGO' = 153, 'REDIS' = 154, 'MEILISEARCH' = 155, 'MYSQL' = 156, 'POSTGRES' = 157, 'SQLITE' = 158, 'ODBC' = 159, 'JDBC' = 160, 'HDFS' = 161, 'S3' = 162, 'HIVE' = 163, 'AZURE' = 164, 'SOURCES' = 165, 'CLUSTER' = 166, 'ALL' = 167, 'NONE' = 168),
+    `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD STATISTIC' = 26, 'ALTER DROP STATISTIC' = 27, 'ALTER MATERIALIZE STATISTIC' = 28, 'ALTER STATISTIC' = 29, 'ALTER ADD PROJECTION' = 30, 'ALTER DROP PROJECTION' = 31, 'ALTER MATERIALIZE PROJECTION' = 32, 'ALTER CLEAR PROJECTION' = 33, 'ALTER PROJECTION' = 34, 'ALTER ADD CONSTRAINT' = 35, 'ALTER DROP CONSTRAINT' = 36, 'ALTER CONSTRAINT' = 37, 'ALTER TTL' = 38, 'ALTER MATERIALIZE TTL' = 39, 'ALTER SETTINGS' = 40, 'ALTER MOVE PARTITION' = 41, 'ALTER FETCH PARTITION' = 42, 'ALTER FREEZE PARTITION' = 43, 'ALTER DATABASE SETTINGS' = 44, 'ALTER NAMED COLLECTION' = 45, 'ALTER TABLE' = 46, 'ALTER DATABASE' = 47, 'ALTER VIEW REFRESH' = 48, 'ALTER VIEW MODIFY QUERY' = 49, 'ALTER VIEW' = 50, 'ALTER' = 51, 'CREATE DATABASE' = 52, 'CREATE TABLE' = 53, 'CREATE VIEW' = 54, 'CREATE DICTIONARY' = 55, 'CREATE TEMPORARY TABLE' = 56, 'CREATE ARBITRARY TEMPORARY TABLE' = 57, 'CREATE FUNCTION' = 58, 'CREATE NAMED COLLECTION' = 59, 'CREATE' = 60, 'DROP DATABASE' = 61, 'DROP TABLE' = 62, 'DROP VIEW' = 63, 'DROP DICTIONARY' = 64, 'DROP FUNCTION' = 65, 'DROP NAMED COLLECTION' = 66, 'DROP' = 67, 'UNDROP TABLE' = 68, 'TRUNCATE' = 69, 'OPTIMIZE' = 70, 'BACKUP' = 71, 'KILL QUERY' = 72, 'KILL TRANSACTION' = 73, 'MOVE PARTITION BETWEEN SHARDS' = 74, 'CREATE USER' = 75, 'ALTER USER' = 76, 'DROP USER' = 77, 'CREATE ROLE' = 78, 'ALTER ROLE' = 79, 'DROP ROLE' = 80, 'ROLE ADMIN' = 81, 'CREATE ROW POLICY' = 82, 'ALTER ROW POLICY' = 83, 'DROP ROW POLICY' = 84, 'CREATE QUOTA' = 85, 'ALTER QUOTA' = 86, 'DROP QUOTA' = 87, 'CREATE SETTINGS PROFILE' = 88, 'ALTER SETTINGS PROFILE' = 89, 'DROP SETTINGS PROFILE' = 90, 'SHOW USERS' = 91, 'SHOW ROLES' = 92, 'SHOW ROW POLICIES' = 93, 'SHOW QUOTAS' = 94, 'SHOW SETTINGS PROFILES' = 95, 'SHOW ACCESS' = 96, 'ACCESS MANAGEMENT' = 97, 'SHOW NAMED COLLECTIONS' = 98, 'SHOW NAMED COLLECTIONS SECRETS' = 99, 'NAMED COLLECTION' = 100, 'NAMED COLLECTION ADMIN' = 101, 'SYSTEM SHUTDOWN' = 102, 'SYSTEM DROP DNS CACHE' = 103, 'SYSTEM DROP MARK CACHE' = 104, 'SYSTEM DROP UNCOMPRESSED CACHE' = 105, 'SYSTEM DROP MMAP CACHE' = 106, 'SYSTEM DROP QUERY CACHE' = 107, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 108, 'SYSTEM DROP FILESYSTEM CACHE' = 109, 'SYSTEM DROP SCHEMA CACHE' = 110, 'SYSTEM DROP S3 CLIENT CACHE' = 111, 'SYSTEM DROP CACHE' = 112, 'SYSTEM RELOAD CONFIG' = 113, 'SYSTEM RELOAD USERS' = 114, 'SYSTEM RELOAD DICTIONARY' = 115, 'SYSTEM RELOAD MODEL' = 116, 'SYSTEM RELOAD FUNCTION' = 117, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 118, 'SYSTEM RELOAD' = 119, 'SYSTEM RESTART DISK' = 120, 'SYSTEM MERGES' = 121, 'SYSTEM TTL MERGES' = 122, 'SYSTEM FETCHES' = 123, 'SYSTEM MOVES' = 124, 'SYSTEM PULLING REPLICATION LOG' = 125, 'SYSTEM DISTRIBUTED SENDS' = 126, 'SYSTEM REPLICATED SENDS' = 127, 'SYSTEM SENDS' = 128, 'SYSTEM REPLICATION QUEUES' = 129, 'SYSTEM DROP REPLICA' = 130, 'SYSTEM SYNC REPLICA' = 131, 'SYSTEM RESTART REPLICA' = 132, 'SYSTEM RESTORE REPLICA' = 133, 'SYSTEM WAIT LOADING PARTS' = 134, 'SYSTEM SYNC DATABASE REPLICA' = 135, 'SYSTEM SYNC TRANSACTION LOG' = 136, 'SYSTEM SYNC FILE CACHE' = 137, 'SYSTEM FLUSH DISTRIBUTED' = 138, 'SYSTEM FLUSH LOGS' = 139, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 140, 'SYSTEM FLUSH' = 141, 'SYSTEM THREAD FUZZER' = 142, 'SYSTEM UNFREEZE' = 143, 'SYSTEM FAILPOINT' = 144, 'SYSTEM LISTEN' = 145, 'SYSTEM' = 146, 'dictGet' = 147, 'displaySecretsInShowAndSelect' = 148, 'addressToLine' = 149, 'addressToLineWithInlines' = 150, 'addressToSymbol' = 151, 'demangle' = 152, 'INTROSPECTION' = 153, 'FILE' = 154, 'URL' = 155, 'REMOTE' = 156, 'MONGO' = 157, 'REDIS' = 158, 'MEILISEARCH' = 159, 'MYSQL' = 160, 'POSTGRES' = 161, 'SQLITE' = 162, 'ODBC' = 163, 'JDBC' = 164, 'HDFS' = 165, 'S3' = 166, 'HIVE' = 167, 'AZURE' = 168, 'SOURCES' = 169, 'CLUSTER' = 170, 'ALL' = 171, 'NONE' = 172),
     `aliases` Array(String),
     `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)),
-    `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM PULLING REPLICATION LOG' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 136, 'SYSTEM FLUSH' = 137, 'SYSTEM THREAD FUZZER' = 138, 'SYSTEM UNFREEZE' = 139, 'SYSTEM FAILPOINT' = 140, 'SYSTEM LISTEN' = 141, 'SYSTEM' = 142, 'dictGet' = 143, 'displaySecretsInShowAndSelect' = 144, 'addressToLine' = 145, 'addressToLineWithInlines' = 146, 'addressToSymbol' = 147, 'demangle' = 148, 'INTROSPECTION' = 149, 'FILE' = 150, 'URL' = 151, 'REMOTE' = 152, 'MONGO' = 153, 'REDIS' = 154, 'MEILISEARCH' = 155, 'MYSQL' = 156, 'POSTGRES' = 157, 'SQLITE' = 158, 'ODBC' = 159, 'JDBC' = 160, 'HDFS' = 161, 'S3' = 162, 'HIVE' = 163, 'AZURE' = 164, 'SOURCES' = 165, 'CLUSTER' = 166, 'ALL' = 167, 'NONE' = 168))
+    `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD STATISTIC' = 26, 'ALTER DROP STATISTIC' = 27, 'ALTER MATERIALIZE STATISTIC' = 28, 'ALTER STATISTIC' = 29, 'ALTER ADD PROJECTION' = 30, 'ALTER DROP PROJECTION' = 31, 'ALTER MATERIALIZE PROJECTION' = 32, 'ALTER CLEAR PROJECTION' = 33, 'ALTER PROJECTION' = 34, 'ALTER ADD CONSTRAINT' = 35, 'ALTER DROP CONSTRAINT' = 36, 'ALTER CONSTRAINT' = 37, 'ALTER TTL' = 38, 'ALTER MATERIALIZE TTL' = 39, 'ALTER SETTINGS' = 40, 'ALTER MOVE PARTITION' = 41, 'ALTER FETCH PARTITION' = 42, 'ALTER FREEZE PARTITION' = 43, 'ALTER DATABASE SETTINGS' = 44, 'ALTER NAMED COLLECTION' = 45, 'ALTER TABLE' = 46, 'ALTER DATABASE' = 47, 'ALTER VIEW REFRESH' = 48, 'ALTER VIEW MODIFY QUERY' = 49, 'ALTER VIEW' = 50, 'ALTER' = 51, 'CREATE DATABASE' = 52, 'CREATE TABLE' = 53, 'CREATE VIEW' = 54, 'CREATE DICTIONARY' = 55, 'CREATE TEMPORARY TABLE' = 56, 'CREATE ARBITRARY TEMPORARY TABLE' = 57, 'CREATE FUNCTION' = 58, 'CREATE NAMED COLLECTION' = 59, 'CREATE' = 60, 'DROP DATABASE' = 61, 'DROP TABLE' = 62, 'DROP VIEW' = 63, 'DROP DICTIONARY' = 64, 'DROP FUNCTION' = 65, 'DROP NAMED COLLECTION' = 66, 'DROP' = 67, 'UNDROP TABLE' = 68, 'TRUNCATE' = 69, 'OPTIMIZE' = 70, 'BACKUP' = 71, 'KILL QUERY' = 72, 'KILL TRANSACTION' = 73, 'MOVE PARTITION BETWEEN SHARDS' = 74, 'CREATE USER' = 75, 'ALTER USER' = 76, 'DROP USER' = 77, 'CREATE ROLE' = 78, 'ALTER ROLE' = 79, 'DROP ROLE' = 80, 'ROLE ADMIN' = 81, 'CREATE ROW POLICY' = 82, 'ALTER ROW POLICY' = 83, 'DROP ROW POLICY' = 84, 'CREATE QUOTA' = 85, 'ALTER QUOTA' = 86, 'DROP QUOTA' = 87, 'CREATE SETTINGS PROFILE' = 88, 'ALTER SETTINGS PROFILE' = 89, 'DROP SETTINGS PROFILE' = 90, 'SHOW USERS' = 91, 'SHOW ROLES' = 92, 'SHOW ROW POLICIES' = 93, 'SHOW QUOTAS' = 94, 'SHOW SETTINGS PROFILES' = 95, 'SHOW ACCESS' = 96, 'ACCESS MANAGEMENT' = 97, 'SHOW NAMED COLLECTIONS' = 98, 'SHOW NAMED COLLECTIONS SECRETS' = 99, 'NAMED COLLECTION' = 100, 'NAMED COLLECTION ADMIN' = 101, 'SYSTEM SHUTDOWN' = 102, 'SYSTEM DROP DNS CACHE' = 103, 'SYSTEM DROP MARK CACHE' = 104, 'SYSTEM DROP UNCOMPRESSED CACHE' = 105, 'SYSTEM DROP MMAP CACHE' = 106, 'SYSTEM DROP QUERY CACHE' = 107, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 108, 'SYSTEM DROP FILESYSTEM CACHE' = 109, 'SYSTEM DROP SCHEMA CACHE' = 110, 'SYSTEM DROP S3 CLIENT CACHE' = 111, 'SYSTEM DROP CACHE' = 112, 'SYSTEM RELOAD CONFIG' = 113, 'SYSTEM RELOAD USERS' = 114, 'SYSTEM RELOAD DICTIONARY' = 115, 'SYSTEM RELOAD MODEL' = 116, 'SYSTEM RELOAD FUNCTION' = 117, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 118, 'SYSTEM RELOAD' = 119, 'SYSTEM RESTART DISK' = 120, 'SYSTEM MERGES' = 121, 'SYSTEM TTL MERGES' = 122, 'SYSTEM FETCHES' = 123, 'SYSTEM MOVES' = 124, 'SYSTEM PULLING REPLICATION LOG' = 125, 'SYSTEM DISTRIBUTED SENDS' = 126, 'SYSTEM REPLICATED SENDS' = 127, 'SYSTEM SENDS' = 128, 'SYSTEM REPLICATION QUEUES' = 129, 'SYSTEM DROP REPLICA' = 130, 'SYSTEM SYNC REPLICA' = 131, 'SYSTEM RESTART REPLICA' = 132, 'SYSTEM RESTORE REPLICA' = 133, 'SYSTEM WAIT LOADING PARTS' = 134, 'SYSTEM SYNC DATABASE REPLICA' = 135, 'SYSTEM SYNC TRANSACTION LOG' = 136, 'SYSTEM SYNC FILE CACHE' = 137, 'SYSTEM FLUSH DISTRIBUTED' = 138, 'SYSTEM FLUSH LOGS' = 139, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 140, 'SYSTEM FLUSH' = 141, 'SYSTEM THREAD FUZZER' = 142, 'SYSTEM UNFREEZE' = 143, 'SYSTEM FAILPOINT' = 144, 'SYSTEM LISTEN' = 145, 'SYSTEM' = 146, 'dictGet' = 147, 'displaySecretsInShowAndSelect' = 148, 'addressToLine' = 149, 'addressToLineWithInlines' = 150, 'addressToSymbol' = 151, 'demangle' = 152, 'INTROSPECTION' = 153, 'FILE' = 154, 'URL' = 155, 'REMOTE' = 156, 'MONGO' = 157, 'REDIS' = 158, 'MEILISEARCH' = 159, 'MYSQL' = 160, 'POSTGRES' = 161, 'SQLITE' = 162, 'ODBC' = 163, 'JDBC' = 164, 'HDFS' = 165, 'S3' = 166, 'HIVE' = 167, 'AZURE' = 168, 'SOURCES' = 169, 'CLUSTER' = 170, 'ALL' = 171, 'NONE' = 172))
 )
 ENGINE = SystemPrivileges
 COMMENT 'SYSTEM TABLE is built on the fly.'

From 9cbcf3a72a30b6dfb23bfb7df29b60ea5337c40e Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Fri, 11 Aug 2023 21:02:44 +0200
Subject: [PATCH 0026/1097] fix build

---
 src/Storages/Statistic/Statistic.cpp | 35 +++++++++++++++++++++++++-
 src/Storages/Statistic/Statistic.h   | 37 +++-------------------------
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index eaf7d828e1f..2b2ebde070a 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -2,7 +2,7 @@
 #include <Storages/Statistic/Statistic.h>
 #include <Storages/StatisticsDescription.h>
 #include <Common/Exception.h>
-#include "Storages/MergeTree/RPNBuilder.h"
+#include <Storages/MergeTree/RPNBuilder.h>
 
 namespace DB
 {
@@ -94,6 +94,39 @@ std::pair<std::string, Float64> ConditionEstimator::extractBinaryOp(const RPNBui
     return std::make_pair(function_name, value);
 }
 
+Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node) const
+{
+    auto col = extractSingleColumn(node);
+    if (col == std::nullopt || col == "")
+    {
+        return default_unknown_cond_factor;
+    }
+    auto it = column_estimators.find(col.value());
+    ColumnEstimator estimator;
+    if (it != column_estimators.end())
+    {
+        estimator = it->second;
+    }
+    auto [op, val] = extractBinaryOp(node, col.value());
+    if (op == "equals")
+    {
+        if (val < - threshold || val > threshold)
+            return default_normal_cond_factor;
+        else
+            return default_good_cond_factor;
+    }
+    else if (op == "less" || op == "lessThan")
+    {
+        return estimator.estimateLess(val) / total_count;
+    }
+    else if (op == "greater" || op == "greaterThan")
+    {
+        return estimator.estimateLess(val) / total_count;
+    }
+    else
+        return default_unknown_cond_factor;
+}
+
 StatisticPtr TDigestCreator(const StatisticDescription & stat)
 {
     if (stat.column_names.size() != 1)
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index 2d16ef90ebe..460dea382fc 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -9,7 +9,6 @@
 #include <Storages/StatisticsDescription.h>
 #include "Common/Exception.h"
 #include <Common/logger_useful.h>
-#include "Storages/MergeTree/RPNBuilder.h"
 
 #include <boost/core/noncopyable.hpp>
 
@@ -134,6 +133,8 @@ private:
     Creators creators;
 };
 
+class RPNBuilderTreeNode;
+
 class ConditionEstimator
 {
 private:
@@ -219,38 +220,8 @@ public:
 
     /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
     /// Right now we only support simple condition like col = val / col < val
-    Float64 estimateSelectivity(const RPNBuilderTreeNode & node) const
-    {
-        auto col = extractSingleColumn(node);
-        if (col == std::nullopt || col == "")
-        {
-            return default_unknown_cond_factor;
-        }
-        auto it = column_estimators.find(col.value());
-        ColumnEstimator estimator;
-        if (it != column_estimators.end())
-        {
-            estimator = it->second;
-        }
-        auto [op, val] = extractBinaryOp(node, col.value());
-        if (op == "equals")
-        {
-            if (val < - threshold || val > threshold)
-                return default_normal_cond_factor;
-            else
-                return default_good_cond_factor;
-        }
-        else if (op == "less" || op == "lessThan")
-        {
-            return estimator.estimateLess(val) / total_count;
-        }
-        else if (op == "greater" || op == "greaterThan")
-        {
-            return estimator.estimateLess(val) / total_count;
-        }
-        else
-            return default_unknown_cond_factor;
-    }
+    Float64 estimateSelectivity(const RPNBuilderTreeNode & node) const;
+
     void merge(std::string part_name, StatisticPtr statistic)
     {
         column_estimators[statistic->columnName()].merge(part_name, statistic);

From 82996ad33bca4e4236c2ac8f5f28ed69c0003de6 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Fri, 11 Aug 2023 23:33:06 +0200
Subject: [PATCH 0027/1097] add missing header

---
 src/Storages/Statistic/Statistic.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index 2b2ebde070a..a8018458e5f 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -1,4 +1,6 @@
 #include <optional>
+#include <numeric>
+
 #include <Storages/Statistic/Statistic.h>
 #include <Storages/StatisticsDescription.h>
 #include <Common/Exception.h>

From 838a83d1082bd86c3bd1c9eda0cdf92fc5cbbea8 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Sat, 12 Aug 2023 00:53:33 +0200
Subject: [PATCH 0028/1097] remove logs

---
 src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 6c331a22b1d..8fffbdb5068 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -75,8 +75,6 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
         if (it != column_sizes.end())
             total_size_of_queried_columns += it->second;
     }
-
-    LOG_DEBUG(log, "pk columns size : {}", metadata_snapshot->getPrimaryKey().column_names.size());
 }
 
 void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, const ContextPtr & context) const
@@ -285,7 +283,6 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
             /// Find min position in PK of any column that is used in this condition.
             cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions);
         }
-        LOG_DEBUG(log, "node {}, min pos : {}", node.getASTNode()->dumpTree(), cond.min_position_in_primary_key);
 
         res.emplace_back(std::move(cond));
     }

From ec07032173b7b12d01e03b4570d05e4f1669c232 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Sat, 12 Aug 2023 01:33:19 +0200
Subject: [PATCH 0029/1097] fix test

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 0936d092f99..98479368fae 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -132,7 +132,7 @@ class IColumn;
     M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
     M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \
     \
-    M(Bool, allow_statistic_optimize, false, "use statistic to optimize queries", 0) \
+    M(Bool, allow_statistic_optimize, false, "Allows using statistic to optimize queries", 0) \
     \
     M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
     M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \

From 16275168cb515cab7b734cd56629f9d8f32961a6 Mon Sep 17 00:00:00 2001
From: root <takakawa@163.com>
Date: Wed, 23 Aug 2023 16:13:05 +0800
Subject: [PATCH 0030/1097] [bugfix] possible postgresql logical replication
 error: wrong type coversion

---
 src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index 2c97c92ba99..e9ffdebc583 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -263,7 +263,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
            "attnotnull AS not_null, attndims AS dims, atttypid as type_id, atttypmod as type_modifier "
            "FROM pg_attribute "
            "WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) "
-           "AND NOT attisdropped AND attnum > 0", where);
+           "AND NOT attisdropped AND attnum > 0 order by attnum asc", where);
 
     auto postgres_table_with_schema = postgres_schema.empty() ? postgres_table : doubleQuoteString(postgres_schema) + '.' + doubleQuoteString(postgres_table);
     table.physical_columns = readNamesAndTypesList(tx, postgres_table_with_schema, query, use_nulls, false);

From c29261e5b45b63e0df989f522a62fba5e535afde Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Thu, 24 Aug 2023 14:09:42 +0200
Subject: [PATCH 0031/1097] support estimate by t-digest

---
 src/AggregateFunctions/QuantileTDigest.h      | 35 +++++++++++++--
 src/Interpreters/MutationsInterpreter.cpp     |  3 +-
 .../MergeTree/MergeTreeWhereOptimizer.cpp     |  1 -
 src/Storages/Statistic/Statistic.cpp          |  5 ---
 src/Storages/Statistic/Statistic.h            |  4 +-
 src/Storages/Statistic/tests/gtest_stats.cpp  | 44 +++++++++++++++++++
 src/Storages/StatisticsDescription.cpp        | 14 ++++--
 src/Storages/StatisticsDescription.h          |  4 +-
 8 files changed, 90 insertions(+), 20 deletions(-)
 create mode 100644 src/Storages/Statistic/tests/gtest_stats.cpp

diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h
index 8706f77c12d..58d19299a8e 100644
--- a/src/AggregateFunctions/QuantileTDigest.h
+++ b/src/AggregateFunctions/QuantileTDigest.h
@@ -337,12 +337,39 @@ public:
 
     Float64 getCountLessThan(Float64 value) const
     {
+        bool first = true;
+        Count sum = 0;
+        Count prev_count = 0;
+        Float64 prev_x = 0;
+        Value prev_mean = 0;
 
-        ///Count sum = 0;
-        ///Value prev_mean = centroids.front().mean;
-        ///Count prev_count = centroids.front().count;
+        for (const auto & c : centroids)
+        {
+            std::cerr << "c "<< c.mean << " "<< c.count << std::endl;
+            Float64 current_x = sum + c.count * 0.5;
+            if (c.mean >= value)
+            {
+                /// value is smaller than any value.
+                if (first)
+                    return 0;
 
-        return value;
+                Float64 left = prev_x + 0.5 * (prev_count == 1);
+                Float64 right = current_x - 0.5 * (c.count == 1);
+                return checkOverflow<Float64>(interpolate(
+                    static_cast<Value>(value),
+                    prev_mean,
+                    static_cast<Value>(left),
+                    c.mean,
+                    static_cast<Value>(right)));
+            }
+            sum += c.count;
+            prev_mean = c.mean;
+            prev_count = c.count;
+            prev_x = current_x;
+            first = false;
+        }
+        /// count is larger than any value.
+        return count;
     }
 
     /** Calculates the quantile q [0, 1] based on the digest.
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 22105f063fa..86082816ff1 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -716,8 +716,7 @@ void MutationsInterpreter::prepare(bool dry_run)
             if (it == std::cend(statistics_desc))
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic: {}", command.statistic_name);
 
-            for (const auto & column : it->column_names)
-                dependencies.emplace(column, ColumnDependency::STATISTIC);
+            dependencies.emplace(it->column_name, ColumnDependency::STATISTIC);
             materialized_statistics.emplace(command.statistic_name);
         }
         else if (command.type == MutationCommand::MATERIALIZE_PROJECTION)
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 8fffbdb5068..df1844c2a87 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -10,7 +10,6 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/formatAST.h>
 #include <Interpreters/misc.h>
-#include "Common/logger_useful.h"
 #include <Common/typeid_cast.h>
 #include <DataTypes/NestedUtils.h>
 #include <Interpreters/ActionsDAG.h>
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index a8018458e5f..f2358c9b221 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -131,11 +131,6 @@ Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node)
 
 StatisticPtr TDigestCreator(const StatisticDescription & stat)
 {
-    if (stat.column_names.size() != 1)
-    {
-        /// throw
-    }
-
     /// TODO: check column data types.
     return StatisticPtr(new TDigestStatistic(stat));
 }
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index 460dea382fc..b4d38a76a05 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -45,7 +45,7 @@ public:
 
     const String & columnName() const
     {
-        return statistics.column_names[0];
+        return statistics.column_name;
     }
     /// const String& type() const = 0;
     /// virtual StatisticType statisticType() const = 0;
@@ -93,7 +93,7 @@ public:
 
     void update(const Block & block) override
     {
-        const auto & column_with_type = block.getByName(statistics.column_names[0]);
+        const auto & column_with_type = block.getByName(statistics.column_name);
         size_t size = block.rows();
 
         for (size_t i = 0; i < size; ++i)
diff --git a/src/Storages/Statistic/tests/gtest_stats.cpp b/src/Storages/Statistic/tests/gtest_stats.cpp
new file mode 100644
index 00000000000..48c2d2e1f5e
--- /dev/null
+++ b/src/Storages/Statistic/tests/gtest_stats.cpp
@@ -0,0 +1,44 @@
+#include <gtest/gtest.h>
+
+#include <Storages/Statistic/Statistic.h>
+
+TEST(Statistic, TDigestLessThan)
+{
+    /// this is the simplest data which is continuous integeters.
+    /// so the estimated errors should be low.
+
+    std::vector<Int64> data;
+    data.reserve(100000);
+    for (int i = 0; i < 100000; i++)
+        data.push_back(i);
+
+    auto test_less_than = [](const std::vector<Int64> & data1,
+                             const std::vector<double> & v,
+                             const std::vector<double> & answers,
+                             const std::vector<double> & eps)
+    {
+
+        DB::QuantileTDigest<Int64> t_digest;
+
+        for (int i = 0; i < data1.size(); i++)
+            t_digest.add(data1[i]);
+        t_digest.compress();
+
+        for (int i = 0; i < v.size(); i ++)
+        {
+            auto value = v[i];
+            auto result = t_digest.getCountLessThan(value);
+            auto answer = answers[i];
+            auto error = eps[i];
+            ASSERT_LE(result, answer * (1 + error));
+            ASSERT_GE(result, answer * (1 - error));
+        }
+    };
+    test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
+
+    /// If we reversely construct the digest, the error is as bad as 5%.
+    std::reverse(data.begin(), data.end());
+    test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
+
+
+}
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index 50c75995de1..5a3ba84d2cc 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -41,14 +41,18 @@ StatisticDescription StatisticDescription::getStatisticFromAST(const ASTPtr & de
     stat.type = Poco::toLower(stat_definition->type->name);
 
     ASTPtr expr_list = extractKeyExpressionList(stat_definition->columns->clone());
+    if (expr_list->children.size() != 1)
+    {
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Statistic must contain exactly one column");
+    }
     for (const auto & ast : expr_list->children)
     {
         ASTIdentifier* ident = ast->as<ASTIdentifier>();
         if (!ident || !columns.hasPhysical(ident->getColumnName()))
             throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column");
         const auto & column = columns.get(ident->getColumnName());
-        stat.column_names.push_back(column.name);
-        stat.data_types.push_back(column.type);
+        stat.column_name = column.name;
+        stat.data_type = column.type;
     }
 
     UNUSED(context);
@@ -60,7 +64,8 @@ StatisticDescription::StatisticDescription(const StatisticDescription & other)
     : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
     , name(other.name)
     , type(other.type)
-    , column_names(other.column_names)
+    , column_name(other.column_name)
+    , data_type(other.data_type)
 {
 }
 
@@ -76,7 +81,8 @@ StatisticDescription & StatisticDescription::operator=(const StatisticDescriptio
 
     name = other.name;
     type = other.type;
-    column_names = other.column_names;
+    column_name = other.column_name;
+    data_type = other.data_type;
 
     return *this;
 }
diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h
index 2cbce381990..531326eadaf 100644
--- a/src/Storages/StatisticsDescription.h
+++ b/src/Storages/StatisticsDescription.h
@@ -18,10 +18,10 @@ struct StatisticDescription
     String type;
 
     /// Names of statistic columns
-    Names column_names;
+    String column_name;
 
     /// Data types of statistic columns
-    DataTypes data_types;
+    DataTypePtr data_type;
 
     static StatisticDescription getStatisticFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context);
 

From d80fd7c74cea624f71db788e08fe9171c1f80601 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Thu, 24 Aug 2023 14:20:34 +0200
Subject: [PATCH 0032/1097] small improvement

---
 src/Storages/Statistic/Statistic.cpp         | 2 +-
 src/Storages/Statistic/Statistic.h           | 7 +------
 src/Storages/Statistic/tests/gtest_stats.cpp | 2 +-
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index f2358c9b221..b117ff6e603 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -123,7 +123,7 @@ Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node)
     }
     else if (op == "greater" || op == "greaterThan")
     {
-        return estimator.estimateLess(val) / total_count;
+        return estimator.estimateGreater(val) / total_count;
     }
     else
         return default_unknown_cond_factor;
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index b4d38a76a05..7857c43bd16 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -47,6 +47,7 @@ public:
     {
         return statistics.column_name;
     }
+
     /// const String& type() const = 0;
     /// virtual StatisticType statisticType() const = 0;
 
@@ -69,12 +70,6 @@ public:
     {
     }
 
-    struct Range
-    {
-        Float64 left, right;
-    };
-
-    /// FIXME: implement correct count estimate method.
     Float64 estimateLess(Float64 val) const
     {
         return data.getCountLessThan(val);
diff --git a/src/Storages/Statistic/tests/gtest_stats.cpp b/src/Storages/Statistic/tests/gtest_stats.cpp
index 48c2d2e1f5e..b3070040c5c 100644
--- a/src/Storages/Statistic/tests/gtest_stats.cpp
+++ b/src/Storages/Statistic/tests/gtest_stats.cpp
@@ -22,6 +22,7 @@ TEST(Statistic, TDigestLessThan)
 
         for (int i = 0; i < data1.size(); i++)
             t_digest.add(data1[i]);
+
         t_digest.compress();
 
         for (int i = 0; i < v.size(); i ++)
@@ -36,7 +37,6 @@ TEST(Statistic, TDigestLessThan)
     };
     test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
 
-    /// If we reversely construct the digest, the error is as bad as 5%.
     std::reverse(data.begin(), data.end());
     test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
 

From 2330a28f7bd057b1f4fad13021d093a9e17de036 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Mon, 4 Sep 2023 15:51:00 +0200
Subject: [PATCH 0033/1097] support estimate by stat

---
 src/AggregateFunctions/QuantileTDigest.h      |  5 +-
 src/Core/Settings.h                           |  1 +
 src/Interpreters/MutationsInterpreter.cpp     |  8 +--
 src/Parsers/ASTAlterQuery.cpp                 | 12 +----
 src/Parsers/ASTAlterQuery.h                   |  1 -
 src/Parsers/ASTStatisticDeclaration.cpp       | 16 +++---
 src/Parsers/ASTStatisticDeclaration.h         |  6 +--
 src/Parsers/ParserAlterQuery.cpp              | 14 ++----
 src/Parsers/ParserCreateQuery.cpp             | 15 ++----
 src/Storages/AlterCommands.cpp                | 49 +++++++------------
 src/Storages/AlterCommands.h                  |  4 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 26 ++++++++++
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  2 +
 src/Storages/MergeTree/MergeTreeData.cpp      | 46 +++++++++++++++++
 src/Storages/MergeTree/MergeTreeData.h        |  2 +
 .../MergeTree/MergeTreeWhereOptimizer.cpp     |  2 +
 src/Storages/MergeTree/MutateTask.cpp         |  6 +--
 src/Storages/MutationCommands.cpp             |  3 +-
 src/Storages/MutationCommands.h               |  2 +-
 src/Storages/Statistic/Statistic.cpp          |  2 +-
 src/Storages/Statistic/Statistic.h            | 41 ++++++++--------
 src/Storages/StatisticsDescription.cpp        | 41 +++++-----------
 src/Storages/StatisticsDescription.h          |  4 +-
 .../0_stateless/02864_statistic_operate.sql   | 47 ++++++++++++++++++
 24 files changed, 214 insertions(+), 141 deletions(-)
 create mode 100644 tests/queries/0_stateless/02864_statistic_operate.sql

diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h
index 58d19299a8e..979c3f2af15 100644
--- a/src/AggregateFunctions/QuantileTDigest.h
+++ b/src/AggregateFunctions/QuantileTDigest.h
@@ -345,7 +345,7 @@ public:
 
         for (const auto & c : centroids)
         {
-            std::cerr << "c "<< c.mean << " "<< c.count << std::endl;
+            /// std::cerr << "c "<< c.mean << " "<< c.count << std::endl;
             Float64 current_x = sum + c.count * 0.5;
             if (c.mean >= value)
             {
@@ -355,12 +355,13 @@ public:
 
                 Float64 left = prev_x + 0.5 * (prev_count == 1);
                 Float64 right = current_x - 0.5 * (c.count == 1);
-                return checkOverflow<Float64>(interpolate(
+                Float64 result = checkOverflow<Float64>(interpolate(
                     static_cast<Value>(value),
                     prev_mean,
                     static_cast<Value>(left),
                     c.mean,
                     static_cast<Value>(right)));
+                return result;
             }
             sum += c.count;
             prev_mean = c.mean;
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index d27091b191e..f9312889265 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -134,6 +134,7 @@ class IColumn;
     M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \
     \
     M(Bool, allow_statistic_optimize, false, "Allows using statistic to optimize queries", 0) \
+    M(Bool, allow_experimental_statistic, false, "Allows using statistic", 0) \
     \
     M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
     M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index dc65f8a88d9..b9eb6ee9a96 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -728,13 +728,13 @@ void MutationsInterpreter::prepare(bool dry_run)
                     std::cbegin(statistics_desc), std::end(statistics_desc),
                     [&](const StatisticDescription & statistic)
                     {
-                        return statistic.name == command.statistic_name;
+                        return statistic.column_name == command.statistic_column_name;
                     });
             if (it == std::cend(statistics_desc))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic: {}", command.statistic_name);
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic column: {}", command.statistic_column_name);
 
             dependencies.emplace(it->column_name, ColumnDependency::STATISTIC);
-            materialized_statistics.emplace(command.statistic_name);
+            materialized_statistics.emplace(command.statistic_column_name);
         }
         else if (command.type == MutationCommand::MATERIALIZE_PROJECTION)
         {
@@ -755,7 +755,7 @@ void MutationsInterpreter::prepare(bool dry_run)
         else if (command.type == MutationCommand::DROP_STATISTIC)
         {
             mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
-            materialized_statistics.erase(command.statistic_name);
+            materialized_statistics.erase(command.statistic_column_name);
         }
         else if (command.type == MutationCommand::DROP_PROJECTION)
         {
diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index e981392f97b..2db7bb93e8b 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -206,20 +206,12 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
         settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD STATISTIC " << (if_not_exists ? "IF NOT EXISTS " : "")
                       << (settings.hilite ? hilite_none : "");
         statistic_decl->formatImpl(settings, state, frame);
-
-        if (first)
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << " FIRST " << (settings.hilite ? hilite_none : "");
-        else if (statistic) /// AFTER
-        {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << " AFTER " << (settings.hilite ? hilite_none : "");
-            statistic->formatImpl(settings, state, frame);
-        }
     }
     else if (type == ASTAlterCommand::DROP_STATISTIC)
     {
         settings.ostr << (settings.hilite ? hilite_keyword : "") << (clear_statistic ? "CLEAR " : "DROP ") << "INDEX "
                       << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
-        statistic->formatImpl(settings, state, frame);
+        statistic_decl->formatImpl(settings, state, frame);
         if (partition)
         {
             settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
@@ -229,7 +221,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
     else if (type == ASTAlterCommand::MATERIALIZE_STATISTIC)
     {
         settings.ostr << (settings.hilite ? hilite_keyword : "") << "MATERIALIZE STATISTIC " << (settings.hilite ? hilite_none : "");
-        statistic->formatImpl(settings, state, frame);
+        statistic_decl->formatImpl(settings, state, frame);
         if (partition)
         {
             settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h
index d58797b13c8..6be9bcf34a3 100644
--- a/src/Parsers/ASTAlterQuery.h
+++ b/src/Parsers/ASTAlterQuery.h
@@ -134,7 +134,6 @@ public:
     ASTPtr projection;
 
     ASTPtr statistic_decl;
-    ASTPtr statistic;
 
     /** Used in DROP PARTITION, ATTACH PARTITION FROM, UPDATE, DELETE queries.
      *  The value or ID of the partition is stored here.
diff --git a/src/Parsers/ASTStatisticDeclaration.cpp b/src/Parsers/ASTStatisticDeclaration.cpp
index 53b20b167b7..196eb994fed 100644
--- a/src/Parsers/ASTStatisticDeclaration.cpp
+++ b/src/Parsers/ASTStatisticDeclaration.cpp
@@ -12,23 +12,19 @@ ASTPtr ASTStatisticDeclaration::clone() const
 {
     auto res = std::make_shared<ASTStatisticDeclaration>();
 
-    res->name = name;
+    res->column_name = column_name;
+    res->type = type;
 
-    if (columns)
-        res->set(res->columns, columns->clone());
-    if (type)
-        res->set(res->type, type->clone());
-    return std::move(res);
+    return res;
 }
 
 
-void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
+void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState &, FormatStateStacked) const
 {
-    s.ostr << backQuoteIfNeed(name);
+    s.ostr << backQuoteIfNeed(column_name);
     s.ostr << " ";
-    columns->formatImpl(s, state, frame);
     s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
-    type->formatImpl(s, state, frame);
+    s.ostr << backQuoteIfNeed(type);
 }
 
 }
diff --git a/src/Parsers/ASTStatisticDeclaration.h b/src/Parsers/ASTStatisticDeclaration.h
index 0d5ab7723e9..7fba8872a94 100644
--- a/src/Parsers/ASTStatisticDeclaration.h
+++ b/src/Parsers/ASTStatisticDeclaration.h
@@ -12,9 +12,9 @@ class ASTFunction;
 class ASTStatisticDeclaration : public IAST
 {
 public:
-    String name;
-    IAST * columns;
-    ASTFunction * type;
+    String column_name;
+    /// We do not support to set bucket number for tdigest
+    String type;
 
     /** Get the text that identifies this element. */
     String getID(char) const override { return "Stat"; }
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index bb94d98d587..0051136fa1f 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -342,21 +342,13 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
                     return false;
 
                 command->type = ASTAlterCommand::ADD_STATISTIC;
-
-                if (s_first.ignore(pos, expected))
-                    command->first = true;
-                else if (s_after.ignore(pos, expected))
-                {
-                    if (!parser_name.parse(pos, command->statistic, expected))
-                        return false;
-                }
             }
             else if (s_drop_statistic.ignore(pos, expected))
             {
                 if (s_if_exists.ignore(pos, expected))
                     command->if_exists = true;
 
-                if (!parser_name.parse(pos, command->statistic, expected))
+                if (!parser_stat_decl.parse(pos, command->statistic_decl, expected))
                     return false;
 
                 command->type = ASTAlterCommand::DROP_STATISTIC;
@@ -367,7 +359,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
                 if (s_if_exists.ignore(pos, expected))
                     command->if_exists = true;
 
-                if (!parser_name.parse(pos, command->statistic, expected))
+                if (!parser_stat_decl.parse(pos, command->statistic_decl, expected))
                     return false;
 
                 command->type = ASTAlterCommand::DROP_STATISTIC;
@@ -385,7 +377,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
                 if (s_if_exists.ignore(pos, expected))
                     command->if_exists = true;
 
-                if (!parser_name.parse(pos, command->statistic, expected))
+                if (!parser_stat_decl.parse(pos, command->statistic_decl, expected))
                     return false;
 
                 command->type = ASTAlterCommand::MATERIALIZE_STATISTIC;
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index a39190f274b..065ba99b4c8 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -165,29 +165,24 @@ bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected &
     ParserKeyword s_type("TYPE");
 
     ParserIdentifier name_p;
-    ParserDataType data_type_p;
-    ParserExpression expression_p;
+    ParserIdentifier type_p;
 
     ASTPtr name;
-    ASTPtr columns;
+    ASTPtr column;
     ASTPtr type;
 
     if (!name_p.parse(pos, name, expected))
         return false;
 
-    if (!expression_p.parse(pos, columns, expected))
-        return false;
-
     if (!s_type.ignore(pos, expected))
         return false;
 
-    if (!data_type_p.parse(pos, type, expected))
+    if (!type_p.parse(pos, type, expected))
         return false;
 
     auto stat = std::make_shared<ASTStatisticDeclaration>();
-    stat->name = name->as<ASTIdentifier &>().name();
-    stat->set(stat->columns, columns);
-    stat->set(stat->type, type);
+    stat->column_name = name->as<ASTIdentifier &>().name();
+    stat->type = type->as<ASTIdentifier &>().name();
     node = stat;
 
     return true;
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 56de8048339..003e39a738a 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -242,13 +242,10 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
 
         const auto & ast_stat_decl = command_ast->statistic_decl->as<ASTStatisticDeclaration &>();
 
-        command.statistic_name = ast_stat_decl.name;
-
-        if (command_ast->statistic)
-            command.after_statistic_name = command_ast->statistic->as<ASTIdentifier &>().name();
+        command.statistic_column_name = ast_stat_decl.column_name;
+        command.statistic_type = ast_stat_decl.type;
 
         command.if_not_exists = command_ast->if_not_exists;
-        command.first = command_ast->first;
 
         return command;
     }
@@ -316,7 +313,10 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
         AlterCommand command;
         command.ast = command_ast->clone();
         command.type = AlterCommand::DROP_STATISTIC;
-        command.statistic_name = command_ast->statistic->as<ASTIdentifier &>().name();
+        const auto & ast_stat_decl = command_ast->statistic_decl->as<ASTStatisticDeclaration &>();
+
+        command.statistic_column_name = ast_stat_decl.column_name;
+        command.statistic_type = ast_stat_decl.type;
         command.if_exists = command_ast->if_exists;
         command.clear = command_ast->clear_statistic;
 
@@ -589,18 +589,15 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
     }
     else if (type == ADD_STATISTIC)
     {
-        if (std::any_of(
+        if (!if_not_exists && std::any_of(
                 metadata.statistics.cbegin(),
                 metadata.statistics.cend(),
                 [this](const auto & statistic)
                 {
-                    return statistic.name == statistic_name;
+                    return statistic.column_name == statistic_column_name && statistic.type == statistic_type;
                 }))
         {
-            if (if_not_exists)
-                return;
-            else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} : statistic with this name already exists", statistic_name);
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type);
         }
 
         auto insert_it = metadata.statistics.end();
@@ -609,22 +606,6 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
         if (first)
             insert_it = metadata.statistics.begin();
 
-        if (!after_statistic_name.empty())
-        {
-            insert_it = std::find_if(
-                    metadata.statistics.begin(),
-                    metadata.statistics.end(),
-                    [this](const auto & statistic)
-                    {
-                        return statistic.name == after_statistic_name;
-                    });
-
-            if (insert_it == metadata.statistics.end())
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} to insert after", backQuote(after_statistic_name));
-
-            ++insert_it;
-        }
-
         metadata.statistics.emplace(insert_it, StatisticDescription::getStatisticFromAST(statistic_decl, metadata.columns, context));
     }
     else if (type == DROP_STATISTIC)
@@ -636,14 +617,14 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
                     metadata.statistics.end(),
                     [this](const auto & statistic)
                     {
-                        return statistic.name == statistic_name;
+                        return statistic.column_name == statistic_column_name && statistic.type == statistic_type;
                     });
 
             if (erase_it == metadata.statistics.end())
             {
                 if (if_exists)
                     return;
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} to drop", backQuote(statistic_name));
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(statistic_column_name), statistic_type);
             }
 
             metadata.statistics.erase(erase_it);
@@ -976,7 +957,15 @@ std::optional<MutationCommand> AlterCommand::tryConvertToMutationCommand(Storage
     }
     else if (type == DROP_STATISTIC)
     {
+        result.type = MutationCommand::Type::DROP_STATISTIC;
+        result.column_name = statistic_column_name;
 
+        if (clear)
+            result.clear = true;
+        if (partition)
+            result.partition = partition;
+
+        result.predicate = nullptr;
     }
     else if (type == DROP_PROJECTION)
     {
diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h
index eae538815f0..03a6fcefe22 100644
--- a/src/Storages/AlterCommands.h
+++ b/src/Storages/AlterCommands.h
@@ -121,8 +121,8 @@ struct AlterCommand
     String projection_name;
 
     ASTPtr statistic_decl = nullptr;
-    String after_statistic_name;
-    String statistic_name;
+    String statistic_column_name;
+    String statistic_type;
 
     /// For MODIFY TTL
     ASTPtr ttl = nullptr;
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 6564fc67767..f6d7faa7d73 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -19,6 +19,7 @@
 #include <Storages/MergeTree/PartMetadataManagerWithCache.h>
 #include <Core/NamesAndTypes.h>
 #include <Storages/ColumnsDescription.h>
+#include <Compression/CompressedReadBuffer.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/escapeForFileName.h>
 #include <Common/CurrentMetrics.h>
@@ -635,6 +636,31 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(bool with_subc
     return *minimum_size_column;
 }
 
+Statistics IMergeTreeDataPart::loadStatistics() const
+{
+    const auto & metadata_snaphost = storage.getInMemoryMetadata();
+
+    auto total_statistics = MergeTreeStatisticFactory::instance().getMany(metadata_snaphost.getStatistics());
+
+    Statistics result;
+    for (auto & stat : total_statistics)
+    {
+        String file_name = stat->getFileName() + STAT_FILE_SUFFIX;
+        String file_path = fs::path(getDataPartStorage().getRelativePath()) / file_name;
+
+        if (!metadata_manager->exists(file_name))
+        {
+            LOG_INFO(storage.log, "Cannot find stats file {}", file_path);
+            continue;
+        }
+        auto stat_file = metadata_manager->read(file_name);
+        CompressedReadBuffer compressed_buffer(*stat_file);
+        stat->deserialize(compressed_buffer);
+        result.push_back(stat);
+    }
+    return result;
+}
+
 void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency)
 {
     assertOnDisk();
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 58609c77c41..2037cc23105 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -169,6 +169,8 @@ public:
 
     void remove();
 
+    Statistics loadStatistics() const;
+
     /// Initialize columns (from columns.txt if exists, or create from column files if not).
     /// Load various metadata into memory: checksums from checksums.txt, index if required, etc.
     void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency);
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 810e21562b1..bcb42791da0 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -422,6 +422,52 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const
     return storage_policy;
 }
 
+ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(const SelectQueryInfo & query_info, ContextPtr local_context) const
+{
+    auto parts = getDataPartsVectorForInternalUsage();
+
+    auto metadata_snapshot = getInMemoryMetadataPtr();
+    if (parts.empty())
+    {
+        return {};
+    }
+
+    ASTPtr expression_ast;
+    Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */);
+    //
+    // Generate valid expressions for filtering
+    bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, local_context, virtual_columns_block, expression_ast);
+
+    ConditionEstimator result;
+    PartitionPruner partition_pruner(metadata_snapshot, query_info, local_context, true /* strict */);
+
+    if (partition_pruner.isUseless() && !valid)
+    {
+        /// Read all partitions.
+        for (const auto & part : parts)
+        {
+            auto stats = part->loadStatistics();
+            /// TODO: We only have one stats file for every part.
+            for (const auto & stat : stats)
+                result.merge(part->info.getPartNameV1(), part->rows_count, stat);
+        }
+    }
+    else
+    {
+        for (const auto & part : parts)
+        {
+            if (!partition_pruner.canBePruned(*part))
+            {
+                auto stats = part->loadStatistics();
+                for (const auto & stat : stats)
+                    result.merge(part->info.getPartNameV1(), part->rows_count, stat);
+            }
+        }
+    }
+
+    return result;
+}
+
 bool MergeTreeData::supportsFinal() const
 {
     return merging_params.mode == MergingParams::Collapsing
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index e4801cffa36..8e42ecaf28d 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -422,6 +422,8 @@ public:
 
     bool supportsPrewhere() const override { return true; }
 
+    ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, ContextPtr) const override;
+
     bool supportsFinal() const override;
 
     bool supportsSubcolumns() const override { return true; }
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index ad5eca02692..f5f07bdb598 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -273,6 +273,8 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
             cond.good = cond.viable;
 
             cond.selectivity = estimator.estimateSelectivity(node);
+
+            LOG_DEBUG(log, "Condition {} has selectivity {}", node.getASTNode()->dumpTree(), cond.selectivity);
         }
 
         if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere)
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index d6ba6c56349..103ac4b3501 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -459,7 +459,7 @@ static std::set<StatisticPtr> getStatisticsToRecalculate(const StorageMetadataPt
     const auto & stats = metadata_snapshot->getStatistics();
     for (const auto & stat_desc : stats)
     {
-        if (materialized_stats.contains(stat_desc.name))
+        if (materialized_stats.contains(stat_desc.column_name))
         {
             stats_to_recalc.insert(stats_factory.get(stat_desc));
         }
@@ -1358,13 +1358,13 @@ private:
         const auto & statistics = ctx->metadata_snapshot->getStatistics();
         for (const auto & stat : statistics)
         {
-            if (ctx->materialized_statistics.contains(stat.name))
+            if (ctx->materialized_statistics.contains(stat.column_name))
             {
                 stats.push_back(MergeTreeStatisticFactory::instance().get(stat));
             }
             else
             {
-                auto prefix = fmt::format("{}{}.", STAT_FILE_PREFIX, stat.name);
+                auto prefix = fmt::format("{}{}.", STAT_FILE_PREFIX, stat.column_name);
                 auto it = ctx->source_part->checksums.files.upper_bound(prefix);
                 while (it != ctx->source_part->checksums.files.end())
                 {
diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp
index b00dca95c56..4c0f0d80a69 100644
--- a/src/Storages/MutationCommands.cpp
+++ b/src/Storages/MutationCommands.cpp
@@ -6,6 +6,7 @@
 #include <Parsers/parseQuery.h>
 #include <Parsers/ASTAssignment.h>
 #include <Parsers/ASTColumnDeclaration.h>
+#include <Parsers/ASTStatisticDeclaration.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Common/typeid_cast.h>
@@ -76,7 +77,7 @@ std::optional<MutationCommand> MutationCommand::parse(ASTAlterCommand * command,
         res.type = MATERIALIZE_STATISTIC;
         res.partition = command->partition;
         res.predicate = nullptr;
-        res.statistic_name = command->statistic->as<ASTIdentifier &>().name();
+        res.statistic_column_name = command->statistic_decl->as<ASTStatisticDeclaration &>().column_name;
         return res;
     }
     else if (command->type == ASTAlterCommand::MATERIALIZE_PROJECTION)
diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h
index c9fa59bc309..ab925850ddb 100644
--- a/src/Storages/MutationCommands.h
+++ b/src/Storages/MutationCommands.h
@@ -53,7 +53,7 @@ struct MutationCommand
     /// For MATERIALIZE INDEX and PROJECTION and STATISTIC
     String index_name;
     String projection_name;
-    String statistic_name;
+    String statistic_column_name;
 
     /// For MATERIALIZE INDEX, UPDATE and DELETE.
     ASTPtr partition;
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index b117ff6e603..4ea2951b63d 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -143,7 +143,7 @@ void MergeTreeStatisticFactory::registerCreator(const std::string & stat_type, C
 
 MergeTreeStatisticFactory::MergeTreeStatisticFactory()
 {
-    registerCreator("t_digest", TDigestCreator);
+    registerCreator("tdigest", TDigestCreator);
 
     ///registerCreator("cm_sketch", CMSketchCreator);
 }
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index 7857c43bd16..7db4594eb5f 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -13,7 +13,6 @@
 #include <boost/core/noncopyable.hpp>
 
 /// this is for user-defined statistic.
-/// For auto collected statisic, we can use 'auto_statistic_'
 constexpr auto STAT_FILE_PREFIX = "statistic_";
 constexpr auto STAT_FILE_SUFFIX = ".stat";
 
@@ -28,37 +27,38 @@ class IStatistic
 {
 public:
     explicit IStatistic(const StatisticDescription & stat_)
-        : statistics(stat_)
+        : stat(stat_)
     {
     }
     virtual ~IStatistic() = default;
 
+    /// statistic_[col_name]_[type]
     String getFileName() const
     {
-        return STAT_FILE_PREFIX + name();
-    }
-
-    const String & name() const
-    {
-        return statistics.name;
+        return STAT_FILE_PREFIX + columnName();
     }
 
     const String & columnName() const
     {
-        return statistics.column_name;
+        return stat.column_name;
     }
 
-    /// const String& type() const = 0;
-    /// virtual StatisticType statisticType() const = 0;
+    const String & type() const
+    {
+        return stat.type;
+    }
 
     virtual void serialize(WriteBuffer & buf) = 0;
+
     virtual void deserialize(ReadBuffer & buf) = 0;
+
     virtual void update(const Block & block) = 0;
+
     virtual UInt64 count() = 0;
 
 protected:
 
-    const StatisticDescription & statistics;
+    const StatisticDescription & stat;
 
 };
 
@@ -66,7 +66,7 @@ class TDigestStatistic : public IStatistic
 {
     QuantileTDigest<Float64> data;
 public:
-    explicit TDigestStatistic(const StatisticDescription & stat) : IStatistic(stat)
+    explicit TDigestStatistic(const StatisticDescription & stat_) : IStatistic(stat_)
     {
     }
 
@@ -88,7 +88,7 @@ public:
 
     void update(const Block & block) override
     {
-        const auto & column_with_type = block.getByName(statistics.column_name);
+        const auto & column_with_type = block.getByName(columnName());
         size_t size = block.rows();
 
         for (size_t i = 0; i < size; ++i)
@@ -141,11 +141,11 @@ private:
     /// This is used to assume that condition is likely to have good selectivity.
     static constexpr auto threshold = 2;
 
-    UInt64 total_count;
+    UInt64 total_count = 0;
 
     struct PartColumnEstimator
     {
-        UInt64 part_count;
+        UInt64 part_count = 0;
 
         std::shared_ptr<TDigestStatistic> t_digest;
 
@@ -184,6 +184,7 @@ private:
         {
             estimators[part_name].merge(statistic);
         }
+
         Float64 estimateLess(Float64 val) const
         {
             if (estimators.empty())
@@ -210,18 +211,18 @@ private:
     std::pair<std::string, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const;
 
 public:
-
     ConditionEstimator() = default;
 
     /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
     /// Right now we only support simple condition like col = val / col < val
     Float64 estimateSelectivity(const RPNBuilderTreeNode & node) const;
 
-    void merge(std::string part_name, StatisticPtr statistic)
+    void merge(std::string part_name, UInt64 part_count, StatisticPtr statistic)
     {
-        column_estimators[statistic->columnName()].merge(part_name, statistic);
+        total_count += part_count;
+        if (statistic != nullptr)
+            column_estimators[statistic->columnName()].merge(part_name, statistic);
     }
-
 };
 
 
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index 5a3ba84d2cc..2dd8d7ab8e6 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -25,35 +25,20 @@ StatisticDescription StatisticDescription::getStatisticFromAST(const ASTPtr & de
     if (!stat_definition)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create statistic from non ASTStatisticDeclaration AST");
 
-    if (stat_definition->name.empty())
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Statistic must have name in definition.");
-
-    // type == nullptr => auto
-    if (!stat_definition->type)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "TYPE is required for statistics");
-
-    if (stat_definition->type->parameters && !stat_definition->type->parameters->children.empty())
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Statistics type cannot have parameters");
-
     StatisticDescription stat;
     stat.definition_ast = definition_ast->clone();
-    stat.name = stat_definition->name;
-    stat.type = Poco::toLower(stat_definition->type->name);
+    stat.type = Poco::toLower(stat_definition->type);
+    if (stat.type != "tdigest")
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect type name {}", stat.type);
+    String column_name = stat_definition->column_name;
 
-    ASTPtr expr_list = extractKeyExpressionList(stat_definition->columns->clone());
-    if (expr_list->children.size() != 1)
-    {
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Statistic must contain exactly one column");
-    }
-    for (const auto & ast : expr_list->children)
-    {
-        ASTIdentifier* ident = ast->as<ASTIdentifier>();
-        if (!ident || !columns.hasPhysical(ident->getColumnName()))
-            throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column");
-        const auto & column = columns.get(ident->getColumnName());
-        stat.column_name = column.name;
-        stat.data_type = column.type;
-    }
+    if (!columns.hasPhysical(column_name))
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", column_name);
+
+    const auto & column = columns.getPhysical(column_name);
+    stat.column_name = column.name;
+    /// TODO: check if it is numeric.
+    stat.data_type = column.type;
 
     UNUSED(context);
 
@@ -62,7 +47,6 @@ StatisticDescription StatisticDescription::getStatisticFromAST(const ASTPtr & de
 
 StatisticDescription::StatisticDescription(const StatisticDescription & other)
     : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
-    , name(other.name)
     , type(other.type)
     , column_name(other.column_name)
     , data_type(other.data_type)
@@ -79,7 +63,6 @@ StatisticDescription & StatisticDescription::operator=(const StatisticDescriptio
     else
         definition_ast.reset();
 
-    name = other.name;
     type = other.type;
     column_name = other.column_name;
     data_type = other.data_type;
@@ -91,7 +74,7 @@ StatisticDescription & StatisticDescription::operator=(const StatisticDescriptio
 bool StatisticsDescriptions::has(const String & name) const
 {
     for (const auto & statistic : *this)
-        if (statistic.name == name)
+        if (statistic.column_name == name)
             return true;
     return false;
 }
diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h
index 531326eadaf..ea05260b4d8 100644
--- a/src/Storages/StatisticsDescription.h
+++ b/src/Storages/StatisticsDescription.h
@@ -12,9 +12,7 @@ struct StatisticDescription
     /// Definition AST of statistic
     ASTPtr definition_ast;
 
-    /// Statistic name
-    String name;
-
+    /// the type of statistic, right now it's only tdigest.
     String type;
 
     /// Names of statistic columns
diff --git a/tests/queries/0_stateless/02864_statistic_operate.sql b/tests/queries/0_stateless/02864_statistic_operate.sql
new file mode 100644
index 00000000000..5358fd7dde9
--- /dev/null
+++ b/tests/queries/0_stateless/02864_statistic_operate.sql
@@ -0,0 +1,47 @@
+DROP TABLE IF EXISTS t1;
+
+SET allow_experimental_statistic = 1;
+SET allow_statistic_optimize = 1;
+
+CREATE TABLE t1 
+(
+    a Int64,
+    b Float64,
+    pk String,
+    STATISTIC a TYPE tdigest,
+    STATISTIC b TYPE tdigest
+) Engine = MergeTree() ORDER BY pk;
+
+SHOW CREATE TABLE t1;
+
+INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000;
+
+SELECT 'After insert';
+EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
+SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
+
+ALTER TABLE t1 DROP STATISTIC a TYPE tdigest;
+ALTER TABLE t1 DROP STATISTIC b TYPE tdigest;
+
+SELECT 'After drop statistic';
+EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
+SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
+
+ALTER TABLE t1 ADD STATISTIC a TYPE tdigest;
+ALTER TABLE t1 ADD STATISTIC b TYPE tdigest;
+
+ALTER TABLE t1 MATERIALIZE STATISTIC a TYPE tdigest;
+ALTER TABLE t1 MATERIALIZE STATISTIC b TYPE tdigest;
+INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000;
+
+SELECT 'After materialize statistic';
+EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
+SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
+
+OPTIMIZE TABLE t1 FINAL;
+
+SELECT 'After merge';
+EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
+SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
+
+DROP TABLE IF EXISTS t1;

From e1bc6cb0a702cf8f7aed19fa3a78d221ae6c6702 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Tue, 5 Sep 2023 15:24:21 -0300
Subject: [PATCH 0034/1097] increase background_fetches_pool_size to 16,
 background_schedule_pool_size to 512

---
 .../en/operations/server-configuration-parameters/settings.md | 4 ++--
 .../ru/operations/server-configuration-parameters/settings.md | 4 ++--
 src/Core/ServerSettings.h                                     | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 7d0ab494926..d62a80d60f3 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -74,7 +74,7 @@ The maximum number of threads that will be used for fetching data parts from ano
 
 Type: UInt64
 
-Default: 8
+Default: 16
 
 ## background_merges_mutations_concurrency_ratio
 
@@ -136,7 +136,7 @@ The maximum number of threads that will be used for constantly executing some li
 
 Type: UInt64
 
-Default: 128
+Default: 512
 
 ## backup_threads
 
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 2c7f0b773e8..742cac639c9 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -993,7 +993,7 @@ ClickHouse использует потоки из глобального пул
 
 -   Положительное целое число.
 
-Значение по умолчанию: 128.
+Значение по умолчанию: 512.
 
 ## background_fetches_pool_size {#background_fetches_pool_size}
 
@@ -1003,7 +1003,7 @@ ClickHouse использует потоки из глобального пул
 
 -   Положительное целое число.
 
-Значение по умолчанию: 8.
+Значение по умолчанию: 16.
 
 ## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size}
 
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 7e346f3596c..ecf6b4aa53e 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -85,10 +85,10 @@ namespace DB
     M(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \
     M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \
     M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \
-    M(UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
+    M(UInt64, background_fetches_pool_size, 16, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
     M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \
     M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \
-    M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
+    M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
     M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
     M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
     M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \

From ddcb64f39f17c74885e0a00a4a5f732b6af6c7b7 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Fri, 8 Sep 2023 02:27:17 +0200
Subject: [PATCH 0035/1097] update docs and refine statements

---
 .../mergetree-family/mergetree.md             |  24 ++++-
 src/Interpreters/InterpreterCreateQuery.cpp   |  10 +-
 src/Interpreters/MutationsInterpreter.cpp     |  26 +++--
 src/Parsers/ASTStatisticDeclaration.cpp       |  21 +++-
 src/Parsers/ASTStatisticDeclaration.h         |   6 +-
 src/Parsers/ParserCreateQuery.cpp             |   9 +-
 src/Storages/AlterCommands.cpp                |  67 ++++++------
 src/Storages/AlterCommands.h                  |   2 +-
 .../MergeTree/registerStorageMergeTree.cpp    |   8 +-
 src/Storages/MutationCommands.cpp             |   6 +-
 src/Storages/MutationCommands.h               |   2 +-
 src/Storages/Statistic/Statistic.cpp          |  15 +--
 src/Storages/Statistic/Statistic.h            |  12 +--
 src/Storages/StatisticsDescription.cpp        | 100 ++++++++----------
 src/Storages/StatisticsDescription.h          |  22 ++--
 .../02864_statistic_operate.reference         |  22 ++++
 .../0_stateless/02864_statistic_operate.sql   |  18 ++--
 17 files changed, 209 insertions(+), 161 deletions(-)
 create mode 100644 tests/queries/0_stateless/02864_statistic_operate.reference

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 4f506126682..afccce2ed5a 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -44,7 +44,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
     INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
     ...
     PROJECTION projection_name_1 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]),
-    PROJECTION projection_name_2 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY])
+    PROJECTION projection_name_2 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]),
+    ...
+    STATISTIC <COLUMN LIST> TYPE type1,
+    STATISTIC <COLUMN LIST> TYPE type2
 ) ENGINE = MergeTree()
 ORDER BY expr
 [PARTITION BY expr]
@@ -1353,3 +1356,22 @@ In this sample configuration:
 - `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`).
 - `_partition_value` — Values (a tuple) of a `partition by` expression.
 - `_sample_factor` — Sample factor (from the query).
+
+## Column Statistics (Experimental) {#column-statistics}
+
+The statistic declaration is in the columns section of the `CREATE` query.
+
+``` sql
+STATISTIC <list of columns> TYPE type
+```
+
+For tables from the `*MergeTree` family, statistics can be specified.
+
+These lightweight statistics aggregate information about distribution of values in columns.
+They can be used for query optimization (At current time they are used for moving expressions to PREWHERE).
+
+#### Available Types of Column Statistics {#available-types-of-column-statistics}
+
+-   `tdigest`
+
+    Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 9ddb8d83963..cf67b6c9231 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -459,8 +459,8 @@ ASTPtr InterpreterCreateQuery::formatStatistics(const StatisticsDescriptions & s
 {
     auto res = std::make_shared<ASTExpressionList>();
 
-    for (const auto & statistic : statistics)
-        res->children.push_back(statistic.definition_ast->clone());
+    for (const auto & definition_ast : statistics.definition_asts)
+        res->children.push_back(definition_ast->clone());
 
     return res;
 }
@@ -721,8 +721,10 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
             }
         if (create.columns_list->stats)
             for (const auto & statistic : create.columns_list->stats->children)
-                properties.stats.push_back(
-                    StatisticDescription::getStatisticFromAST(statistic->clone(), properties.columns, getContext()));
+            {
+                auto stats = StatisticsDescriptions::getStatisticsFromAST(statistic->clone(), properties.columns, getContext());
+                properties.stats.merge(stats);
+            }
 
         if (create.columns_list->projections)
             for (const auto & projection_ast : create.columns_list->projections->children)
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index b9eb6ee9a96..961a4f7ac72 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -724,17 +724,20 @@ void MutationsInterpreter::prepare(bool dry_run)
         else if (command.type == MutationCommand::MATERIALIZE_STATISTIC)
         {
             mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
-            auto it = std::find_if(
-                    std::cbegin(statistics_desc), std::end(statistics_desc),
-                    [&](const StatisticDescription & statistic)
-                    {
-                        return statistic.column_name == command.statistic_column_name;
-                    });
-            if (it == std::cend(statistics_desc))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic column: {}", command.statistic_column_name);
+            for (const auto & stat_column_name: command.statistic_columns)
+            {
+                auto it = std::find_if(
+                        std::cbegin(statistics_desc), std::end(statistics_desc),
+                        [&](const StatisticDescription & statistic)
+                        {
+                            return statistic.column_name == stat_column_name;
+                        });
+                if (it == std::cend(statistics_desc))
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic column: {}", stat_column_name);
 
-            dependencies.emplace(it->column_name, ColumnDependency::STATISTIC);
-            materialized_statistics.emplace(command.statistic_column_name);
+                dependencies.emplace(it->column_name, ColumnDependency::STATISTIC);
+                materialized_statistics.emplace(stat_column_name);
+            }
         }
         else if (command.type == MutationCommand::MATERIALIZE_PROJECTION)
         {
@@ -755,7 +758,8 @@ void MutationsInterpreter::prepare(bool dry_run)
         else if (command.type == MutationCommand::DROP_STATISTIC)
         {
             mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
-            materialized_statistics.erase(command.statistic_column_name);
+            for (const auto & stat_column_name: command.statistic_columns)
+                materialized_statistics.erase(stat_column_name);
         }
         else if (command.type == MutationCommand::DROP_PROJECTION)
         {
diff --git a/src/Parsers/ASTStatisticDeclaration.cpp b/src/Parsers/ASTStatisticDeclaration.cpp
index 196eb994fed..0e20b020ab3 100644
--- a/src/Parsers/ASTStatisticDeclaration.cpp
+++ b/src/Parsers/ASTStatisticDeclaration.cpp
@@ -1,4 +1,5 @@
 #include <Parsers/ASTStatisticDeclaration.h>
+#include <Parsers/ASTIdentifier.h>
 
 #include <Common/quoteString.h>
 #include <IO/Operators.h>
@@ -12,17 +13,27 @@ ASTPtr ASTStatisticDeclaration::clone() const
 {
     auto res = std::make_shared<ASTStatisticDeclaration>();
 
-    res->column_name = column_name;
+    res->set(res->columns, columns->clone());
     res->type = type;
 
     return res;
 }
 
-
-void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState &, FormatStateStacked) const
+std::vector<String> ASTStatisticDeclaration::getColumnNames() const
 {
-    s.ostr << backQuoteIfNeed(column_name);
-    s.ostr << " ";
+    std::vector<String> result;
+    result.reserve(columns->children.size());
+    for (const ASTPtr & column_ast : columns->children)
+    {
+        result.push_back(column_ast->as<ASTIdentifier &>().name());
+    }
+    return result;
+
+}
+
+void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
+{
+    columns->formatImpl(s, state, frame);
     s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
     s.ostr << backQuoteIfNeed(type);
 }
diff --git a/src/Parsers/ASTStatisticDeclaration.h b/src/Parsers/ASTStatisticDeclaration.h
index 7fba8872a94..f936c93f2ba 100644
--- a/src/Parsers/ASTStatisticDeclaration.h
+++ b/src/Parsers/ASTStatisticDeclaration.h
@@ -12,13 +12,15 @@ class ASTFunction;
 class ASTStatisticDeclaration : public IAST
 {
 public:
-    String column_name;
-    /// We do not support to set bucket number for tdigest
+    IAST * columns;
+    /// TODO type should be a list of ASTFunction, for example, 'tdigest(256), hyperloglog(128)', etc.
     String type;
 
     /** Get the text that identifies this element. */
     String getID(char) const override { return "Stat"; }
 
+    std::vector<String> getColumnNames() const;
+
     ASTPtr clone() const override;
     void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
 };
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 3ac12212054..344d00beb4e 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -165,14 +165,13 @@ bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected &
 {
     ParserKeyword s_type("TYPE");
 
-    ParserIdentifier name_p;
+    ParserList columns_p(std::make_unique<ParserIdentifier>(), std::make_unique<ParserToken>(TokenType::Comma), false);
     ParserIdentifier type_p;
 
-    ASTPtr name;
-    ASTPtr column;
+    ASTPtr columns;
     ASTPtr type;
 
-    if (!name_p.parse(pos, name, expected))
+    if (!columns_p.parse(pos, columns, expected))
         return false;
 
     if (!s_type.ignore(pos, expected))
@@ -182,7 +181,7 @@ bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected &
         return false;
 
     auto stat = std::make_shared<ASTStatisticDeclaration>();
-    stat->column_name = name->as<ASTIdentifier &>().name();
+    stat->set(stat->columns, columns);
     stat->type = type->as<ASTIdentifier &>().name();
     node = stat;
 
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 003e39a738a..c1f7711fce7 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -242,9 +242,8 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
 
         const auto & ast_stat_decl = command_ast->statistic_decl->as<ASTStatisticDeclaration &>();
 
-        command.statistic_column_name = ast_stat_decl.column_name;
+        command.statistic_columns = ast_stat_decl.getColumnNames();
         command.statistic_type = ast_stat_decl.type;
-
         command.if_not_exists = command_ast->if_not_exists;
 
         return command;
@@ -315,7 +314,7 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
         command.type = AlterCommand::DROP_STATISTIC;
         const auto & ast_stat_decl = command_ast->statistic_decl->as<ASTStatisticDeclaration &>();
 
-        command.statistic_column_name = ast_stat_decl.column_name;
+        command.statistic_columns = ast_stat_decl.getColumnNames();
         command.statistic_type = ast_stat_decl.type;
         command.if_exists = command_ast->if_exists;
         command.clear = command_ast->clear_statistic;
@@ -589,45 +588,47 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
     }
     else if (type == ADD_STATISTIC)
     {
-        if (!if_not_exists && std::any_of(
-                metadata.statistics.cbegin(),
-                metadata.statistics.cend(),
-                [this](const auto & statistic)
-                {
-                    return statistic.column_name == statistic_column_name && statistic.type == statistic_type;
-                }))
+        /// TODO: Right now we assume there is only one type of statistics for simple implement.
+        for (const auto & statistic_column_name : statistic_columns)
         {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type);
+            if (!if_not_exists && std::any_of(
+                    metadata.statistics.cbegin(),
+                    metadata.statistics.cend(),
+                    [&](const auto & statistic)
+                    {
+                        return statistic.column_name == statistic_column_name;
+                    }))
+            {
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type);
+            }
         }
 
-        auto insert_it = metadata.statistics.end();
-
-        /// insert the index in the beginning of the indices list
-        if (first)
-            insert_it = metadata.statistics.begin();
-
-        metadata.statistics.emplace(insert_it, StatisticDescription::getStatisticFromAST(statistic_decl, metadata.columns, context));
+        auto stats = StatisticsDescriptions::getStatisticsFromAST(statistic_decl, metadata.columns, context);
+        metadata.statistics.merge(stats);
     }
     else if (type == DROP_STATISTIC)
     {
         if (!partition && !clear)
         {
-            auto erase_it = std::find_if(
-                    metadata.statistics.begin(),
-                    metadata.statistics.end(),
-                    [this](const auto & statistic)
-                    {
-                        return statistic.column_name == statistic_column_name && statistic.type == statistic_type;
-                    });
-
-            if (erase_it == metadata.statistics.end())
+            for (const auto & stat_column_name : statistic_columns)
             {
-                if (if_exists)
-                    return;
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(statistic_column_name), statistic_type);
-            }
+                auto erase_it = std::find_if(
+                        metadata.statistics.begin(),
+                        metadata.statistics.end(),
+                        [stat_column_name](const auto & statistic)
+                        {
+                            return statistic.column_name == stat_column_name;
+                        });
 
-            metadata.statistics.erase(erase_it);
+                if (erase_it == metadata.statistics.end())
+                {
+                    if (if_exists)
+                        return;
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(stat_column_name), statistic_type);
+                }
+                LOG_INFO(&Poco::Logger::get("drop_stat"), "dropping statistic {}", erase_it->column_name);
+                metadata.statistics.erase(erase_it);
+            }
         }
     }
     else if (type == ADD_CONSTRAINT)
@@ -958,7 +959,7 @@ std::optional<MutationCommand> AlterCommand::tryConvertToMutationCommand(Storage
     else if (type == DROP_STATISTIC)
     {
         result.type = MutationCommand::Type::DROP_STATISTIC;
-        result.column_name = statistic_column_name;
+        result.statistic_columns = statistic_columns;
 
         if (clear)
             result.clear = true;
diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h
index 03a6fcefe22..f5b7c1c0063 100644
--- a/src/Storages/AlterCommands.h
+++ b/src/Storages/AlterCommands.h
@@ -121,7 +121,7 @@ struct AlterCommand
     String projection_name;
 
     ASTPtr statistic_decl = nullptr;
-    String statistic_column_name;
+    std::vector<String> statistic_columns;
     String statistic_type;
 
     /// For MODIFY TTL
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index b93604bcac6..2093f667fcb 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -580,9 +580,11 @@ static StoragePtr create(const StorageFactory::Arguments & args)
                 metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, columns, context));
 
         if (args.query.columns_list && args.query.columns_list->stats)
-            for (const auto & stat : args.query.columns_list->stats->children)
-                metadata.statistics.push_back(
-                    StatisticDescription::getStatisticFromAST(stat, columns, args.getContext()));
+            for (const auto & stat_ast : args.query.columns_list->stats->children)
+            {
+                auto stats = StatisticsDescriptions::getStatisticsFromAST(stat_ast, columns, args.getContext());
+                metadata.statistics.merge(stats);
+            }
 
         if (args.query.columns_list && args.query.columns_list->projections)
             for (auto & projection_ast : args.query.columns_list->projections->children)
diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp
index 4c0f0d80a69..46322f3ef05 100644
--- a/src/Storages/MutationCommands.cpp
+++ b/src/Storages/MutationCommands.cpp
@@ -77,7 +77,11 @@ std::optional<MutationCommand> MutationCommand::parse(ASTAlterCommand * command,
         res.type = MATERIALIZE_STATISTIC;
         res.partition = command->partition;
         res.predicate = nullptr;
-        res.statistic_column_name = command->statistic_decl->as<ASTStatisticDeclaration &>().column_name;
+        for (const ASTPtr & column_ast : command->statistic_decl->as<ASTStatisticDeclaration &>().columns->children)
+        {
+            const auto & column = column_ast->as<ASTIdentifier &>().getColumnName();
+            res.statistic_columns.push_back(column);
+        }
         return res;
     }
     else if (command->type == ASTAlterCommand::MATERIALIZE_PROJECTION)
diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h
index 2600112f744..014a227dff3 100644
--- a/src/Storages/MutationCommands.h
+++ b/src/Storages/MutationCommands.h
@@ -53,7 +53,7 @@ struct MutationCommand
     /// For MATERIALIZE INDEX and PROJECTION and STATISTIC
     String index_name = {};
     String projection_name = {};
-    String statistic_column_name = {};
+    std::vector<String> statistic_columns = {};
 
     /// For MATERIALIZE INDEX, UPDATE and DELETE.
     ASTPtr partition = {};
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index 4ea2951b63d..17aa6a76f1c 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -135,7 +135,7 @@ StatisticPtr TDigestCreator(const StatisticDescription & stat)
     return StatisticPtr(new TDigestStatistic(stat));
 }
 
-void MergeTreeStatisticFactory::registerCreator(const std::string & stat_type, Creator creator)
+void MergeTreeStatisticFactory::registerCreator(StatisticType stat_type, Creator creator)
 {
     if (!creators.emplace(stat_type, std::move(creator)).second)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic creator type {} is not unique", stat_type);
@@ -143,7 +143,7 @@ void MergeTreeStatisticFactory::registerCreator(const std::string & stat_type, C
 
 MergeTreeStatisticFactory::MergeTreeStatisticFactory()
 {
-    registerCreator("tdigest", TDigestCreator);
+    registerCreator(TDigest, TDigestCreator);
 
     ///registerCreator("cm_sketch", CMSketchCreator);
 }
@@ -160,16 +160,7 @@ StatisticPtr MergeTreeStatisticFactory::get(const StatisticDescription & stat) c
     if (it == creators.end())
     {
         throw Exception(ErrorCodes::INCORRECT_QUERY,
-                "Unknown Statistic type '{}'. Available types: {}", stat.type,
-                std::accumulate(creators.cbegin(), creators.cend(), std::string{},
-                        [] (auto && left, const auto & right) -> std::string
-                        {
-                            if (left.empty())
-                                return right.first;
-                            else
-                                return left + ", " + right.first;
-                        })
-                );
+                "Unknown Statistic type '{}'. Available types: tdigest", stat.type);
     }
     return std::make_shared<TDigestStatistic>(stat);
 }
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index 7db4594eb5f..eb05649f0a6 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -43,10 +43,10 @@ public:
         return stat.column_name;
     }
 
-    const String & type() const
-    {
-        return stat.type;
-    }
+    //const String & type() const
+    //{
+    //    return stat.type;
+    //}
 
     virtual void serialize(WriteBuffer & buf) = 0;
 
@@ -118,13 +118,13 @@ public:
 
     Statistics getMany(const std::vector<StatisticDescription> & stats) const;
 
-    void registerCreator(const std::string & type, Creator creator);
+    void registerCreator(StatisticType type, Creator creator);
 
 protected:
     MergeTreeStatisticFactory();
 
 private:
-    using Creators = std::unordered_map<std::string, Creator>;
+    using Creators = std::unordered_map<StatisticType, Creator>;
     Creators creators;
 };
 
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index 2dd8d7ab8e6..0fff9581d57 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -10,6 +10,8 @@
 #include <Storages/extractKeyExpressionList.h>
 #include <Storages/StatisticsDescription.h>
 
+#include <Common/logger_useful.h>
+
 namespace DB
 {
 
@@ -19,58 +21,51 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 };
 
-StatisticDescription StatisticDescription::getStatisticFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context)
+StatisticType StatisticDescription::stringToType(String type)
+{
+    if (type.empty())
+        return TDigest;
+    if (type == "tdigest")
+        return TDigest;
+    throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}", type);
+}
+
+StatisticsDescriptions StatisticsDescriptions::getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context)
 {
     const auto * stat_definition = definition_ast->as<ASTStatisticDeclaration>();
     if (!stat_definition)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create statistic from non ASTStatisticDeclaration AST");
 
-    StatisticDescription stat;
-    stat.definition_ast = definition_ast->clone();
-    stat.type = Poco::toLower(stat_definition->type);
-    if (stat.type != "tdigest")
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect type name {}", stat.type);
-    String column_name = stat_definition->column_name;
+    LOG_INFO(&Poco::Logger::get("stats_desc"), "stat_def is like {}", stat_definition->dumpTree());
 
-    if (!columns.hasPhysical(column_name))
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", column_name);
+    StatisticsDescriptions stats;
+    for (const auto & column_ast : stat_definition->columns->children)
+    {
+        StatisticDescription stat;
+        stat.type = StatisticDescription::stringToType(Poco::toLower(stat_definition->type));
+        String column_name = column_ast->as<ASTIdentifier &>().name();
 
-    const auto & column = columns.getPhysical(column_name);
-    stat.column_name = column.name;
-    /// TODO: check if it is numeric.
-    stat.data_type = column.type;
+        if (!columns.hasPhysical(column_name))
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", column_name);
+
+        const auto & column = columns.getPhysical(column_name);
+        stat.column_name = column.name;
+        /// TODO: check if it is numeric.
+        stat.data_type = column.type;
+        stats.push_back(stat);
+    }
+    stats.definition_asts.push_back(definition_ast);
+
+    if (stats.empty())
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Empty statistic column list");
+
+    LOG_INFO(&Poco::Logger::get("stats_desc"), "there are {} stats", stats.size());
 
     UNUSED(context);
 
-    return stat;
+    return stats;
 }
 
-StatisticDescription::StatisticDescription(const StatisticDescription & other)
-    : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
-    , type(other.type)
-    , column_name(other.column_name)
-    , data_type(other.data_type)
-{
-}
-
-StatisticDescription & StatisticDescription::operator=(const StatisticDescription & other)
-{
-    if (&other == this)
-        return *this;
-
-    if (other.definition_ast)
-        definition_ast = other.definition_ast->clone();
-    else
-        definition_ast.reset();
-
-    type = other.type;
-    column_name = other.column_name;
-    data_type = other.data_type;
-
-    return *this;
-}
-
-
 bool StatisticsDescriptions::has(const String & name) const
 {
     for (const auto & statistic : *this)
@@ -79,31 +74,22 @@ bool StatisticsDescriptions::has(const String & name) const
     return false;
 }
 
+void StatisticsDescriptions::merge(const StatisticsDescriptions & other)
+{
+    insert(end(), other.begin(), other.end());
+    definition_asts.insert(definition_asts.end(), other.definition_asts.begin(), other.definition_asts.end());
+}
+
 String StatisticsDescriptions::toString() const
 {
     if (empty())
         return {};
 
     ASTExpressionList list;
-    for (const auto & statistic : *this)
-        list.children.push_back(statistic.definition_ast);
+    for (const auto & ast : definition_asts)
+        list.children.push_back(ast);
 
     return serializeAST(list);
 }
 
-StatisticsDescriptions StatisticsDescriptions::parse(const String & str, const ColumnsDescription & columns, ContextPtr context)
-{
-    StatisticsDescriptions result;
-    if (str.empty())
-        return result;
-
-    ParserStatisticDeclaration parser;
-    ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-
-    for (const auto & index : list->children)
-        result.emplace_back(StatisticDescription::getStatisticFromAST(index, columns, context));
-
-    return result;
-}
-
 }
diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h
index ea05260b4d8..b018ce26665 100644
--- a/src/Storages/StatisticsDescription.h
+++ b/src/Storages/StatisticsDescription.h
@@ -7,13 +7,15 @@
 namespace DB
 {
 
+enum StatisticType
+{
+    TDigest = 0,
+};
+
 struct StatisticDescription
 {
-    /// Definition AST of statistic
-    ASTPtr definition_ast;
-
     /// the type of statistic, right now it's only tdigest.
-    String type;
+    StatisticType type;
 
     /// Names of statistic columns
     String column_name;
@@ -21,24 +23,22 @@ struct StatisticDescription
     /// Data types of statistic columns
     DataTypePtr data_type;
 
-    static StatisticDescription getStatisticFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context);
-
     StatisticDescription() = default;
 
-    /// We need custom copy constructors because we don't want
-    /// unintentionaly share AST variables and modify them.
-    StatisticDescription(const StatisticDescription & other);
-    StatisticDescription & operator=(const StatisticDescription & other);
+    static StatisticType stringToType(String type);
 };
 
 struct StatisticsDescriptions : public std::vector<StatisticDescription>
 {
+    std::vector<ASTPtr> definition_asts;
     /// Stat with name exists
     bool has(const String & name) const;
+    /// merge with other Statistics
+    void merge(const StatisticsDescriptions & other);
     /// Convert description to string
     String toString() const;
     /// Parse description from string
-    static StatisticsDescriptions parse(const String & str, const ColumnsDescription & columns, ContextPtr context);
+    static StatisticsDescriptions getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context);
 };
 
 }
diff --git a/tests/queries/0_stateless/02864_statistic_operate.reference b/tests/queries/0_stateless/02864_statistic_operate.reference
new file mode 100644
index 00000000000..424b16cb8a5
--- /dev/null
+++ b/tests/queries/0_stateless/02864_statistic_operate.reference
@@ -0,0 +1,22 @@
+CREATE TABLE default.t1\n(\n    `a` Int64,\n    `b` Float64,\n    `pk` String,\n    STATISTIC a, b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192
+After insert
+SELECT count()
+FROM t1
+PREWHERE (a < 10) AND (b < 10)
+10
+After drop statistic
+SELECT count()
+FROM t1
+PREWHERE (b < 10) AND (a < 10)
+10
+After add statistic
+After materialize statistic
+SELECT count()
+FROM t1
+PREWHERE (a < 10) AND (b < 10)
+20
+After merge
+SELECT count()
+FROM t1
+PREWHERE (a < 10) AND (b < 10)
+20
diff --git a/tests/queries/0_stateless/02864_statistic_operate.sql b/tests/queries/0_stateless/02864_statistic_operate.sql
index 5358fd7dde9..6b74cc37e2a 100644
--- a/tests/queries/0_stateless/02864_statistic_operate.sql
+++ b/tests/queries/0_stateless/02864_statistic_operate.sql
@@ -8,8 +8,7 @@ CREATE TABLE t1
     a Int64,
     b Float64,
     pk String,
-    STATISTIC a TYPE tdigest,
-    STATISTIC b TYPE tdigest
+    STATISTIC a, b TYPE tdigest,
 ) Engine = MergeTree() ORDER BY pk;
 
 SHOW CREATE TABLE t1;
@@ -20,18 +19,21 @@ SELECT 'After insert';
 EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
 SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
 
-ALTER TABLE t1 DROP STATISTIC a TYPE tdigest;
-ALTER TABLE t1 DROP STATISTIC b TYPE tdigest;
+ALTER TABLE t1 DROP STATISTIC a, b TYPE tdigest;
 
 SELECT 'After drop statistic';
 EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
 SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
 
-ALTER TABLE t1 ADD STATISTIC a TYPE tdigest;
-ALTER TABLE t1 ADD STATISTIC b TYPE tdigest;
+--SHOW CREATE TABLE t1;
 
-ALTER TABLE t1 MATERIALIZE STATISTIC a TYPE tdigest;
-ALTER TABLE t1 MATERIALIZE STATISTIC b TYPE tdigest;
+ALTER TABLE t1 ADD STATISTIC a, b TYPE tdigest;
+
+SELECT 'After add statistic';
+
+--SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 MATERIALIZE STATISTIC a, b TYPE tdigest;
 INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000;
 
 SELECT 'After materialize statistic';

From f9abf164414ff2c69a135d7ee18b4f8f9a6b0bdc Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Sat, 9 Sep 2023 04:04:18 +0200
Subject: [PATCH 0036/1097] fix fast tests

---
 src/Interpreters/InterpreterCreateQuery.cpp   |  7 +--
 src/Storages/MergeTree/MergeTreeData.cpp      |  3 ++
 src/Storages/StatisticsDescription.cpp        | 43 ++++++++++++++++---
 src/Storages/StatisticsDescription.h          |  2 +
 .../02864_statistic_operate.reference         |  4 +-
 .../0_stateless/02864_statistic_operate.sql   |  4 +-
 .../aspell-ignore/en/aspell-dict.txt          |  1 +
 7 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index cf67b6c9231..349ac683db9 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -457,12 +457,7 @@ ASTPtr InterpreterCreateQuery::formatIndices(const IndicesDescription & indices)
 
 ASTPtr InterpreterCreateQuery::formatStatistics(const StatisticsDescriptions & statistics)
 {
-    auto res = std::make_shared<ASTExpressionList>();
-
-    for (const auto & definition_ast : statistics.definition_asts)
-        res->children.push_back(definition_ast->clone());
-
-    return res;
+    return statistics.getAST();
 }
 
 ASTPtr InterpreterCreateQuery::formatConstraints(const ConstraintsDescription & constraints)
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 467a68b7190..c2512144fea 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -438,6 +438,9 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const
 
 ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(const SelectQueryInfo & query_info, ContextPtr local_context) const
 {
+    if (!local_context->getSettings().allow_statistic_optimize)
+        return {};
+
     auto parts = getDataPartsVectorForInternalUsage();
 
     auto metadata_snapshot = getInMemoryMetadataPtr();
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index 0fff9581d57..097c2cd1ad5 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -30,6 +30,18 @@ StatisticType StatisticDescription::stringToType(String type)
     throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}", type);
 }
 
+namespace
+{
+
+String typeToString(StatisticType type)
+{
+    if (type == TDigest)
+        return "tdigest";
+    return "unknown";
+}
+
+}
+
 StatisticsDescriptions StatisticsDescriptions::getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context)
 {
     const auto * stat_definition = definition_ast->as<ASTStatisticDeclaration>();
@@ -54,7 +66,7 @@ StatisticsDescriptions StatisticsDescriptions::getStatisticsFromAST(const ASTPtr
         stat.data_type = column.type;
         stats.push_back(stat);
     }
-    stats.definition_asts.push_back(definition_ast);
+    /// stats.definition_asts.push_back(definition_ast);
 
     if (stats.empty())
         throw Exception(ErrorCodes::INCORRECT_QUERY, "Empty statistic column list");
@@ -77,7 +89,28 @@ bool StatisticsDescriptions::has(const String & name) const
 void StatisticsDescriptions::merge(const StatisticsDescriptions & other)
 {
     insert(end(), other.begin(), other.end());
-    definition_asts.insert(definition_asts.end(), other.definition_asts.begin(), other.definition_asts.end());
+    /// definition_asts.insert(definition_asts.end(), other.definition_asts.begin(), other.definition_asts.end());
+}
+
+ASTPtr StatisticsDescriptions::getAST() const
+{
+
+    auto list = std::make_shared<ASTExpressionList>();
+    /// for (const auto & ast : definition_asts)
+    ///    list.children.push_back(ast);
+
+    for (const auto & stat : *this)
+    {
+        auto stat_ast = std::make_shared<ASTStatisticDeclaration>();
+        auto cols_ast  = std::make_shared<ASTExpressionList>();
+        auto col_ast  = std::make_shared<ASTIdentifier>(stat.column_name);
+        cols_ast->children.push_back(col_ast);
+        stat_ast->set(stat_ast->columns, cols_ast);
+        stat_ast->type = typeToString(stat.type);
+
+        list->children.push_back(stat_ast);
+    }
+    return list;
 }
 
 String StatisticsDescriptions::toString() const
@@ -85,11 +118,7 @@ String StatisticsDescriptions::toString() const
     if (empty())
         return {};
 
-    ASTExpressionList list;
-    for (const auto & ast : definition_asts)
-        list.children.push_back(ast);
-
-    return serializeAST(list);
+    return serializeAST(*getAST());
 }
 
 }
diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h
index b018ce26665..70a6c6bb34f 100644
--- a/src/Storages/StatisticsDescription.h
+++ b/src/Storages/StatisticsDescription.h
@@ -35,6 +35,8 @@ struct StatisticsDescriptions : public std::vector<StatisticDescription>
     bool has(const String & name) const;
     /// merge with other Statistics
     void merge(const StatisticsDescriptions & other);
+
+    ASTPtr getAST() const;
     /// Convert description to string
     String toString() const;
     /// Parse description from string
diff --git a/tests/queries/0_stateless/02864_statistic_operate.reference b/tests/queries/0_stateless/02864_statistic_operate.reference
index 424b16cb8a5..26be9b47b8e 100644
--- a/tests/queries/0_stateless/02864_statistic_operate.reference
+++ b/tests/queries/0_stateless/02864_statistic_operate.reference
@@ -1,4 +1,4 @@
-CREATE TABLE default.t1\n(\n    `a` Int64,\n    `b` Float64,\n    `pk` String,\n    STATISTIC a, b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192
+CREATE TABLE default.t1\n(\n    `a` Int64,\n    `b` Float64,\n    `pk` String,\n    STATISTIC a TYPE tdigest,\n    STATISTIC b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192
 After insert
 SELECT count()
 FROM t1
@@ -9,7 +9,9 @@ SELECT count()
 FROM t1
 PREWHERE (b < 10) AND (a < 10)
 10
+CREATE TABLE default.t1\n(\n    `a` Int64,\n    `b` Float64,\n    `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192
 After add statistic
+CREATE TABLE default.t1\n(\n    `a` Int64,\n    `b` Float64,\n    `pk` String,\n    STATISTIC a TYPE tdigest,\n    STATISTIC b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192
 After materialize statistic
 SELECT count()
 FROM t1
diff --git a/tests/queries/0_stateless/02864_statistic_operate.sql b/tests/queries/0_stateless/02864_statistic_operate.sql
index 6b74cc37e2a..c0c97e60dd2 100644
--- a/tests/queries/0_stateless/02864_statistic_operate.sql
+++ b/tests/queries/0_stateless/02864_statistic_operate.sql
@@ -25,13 +25,13 @@ SELECT 'After drop statistic';
 EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
 SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
 
---SHOW CREATE TABLE t1;
+SHOW CREATE TABLE t1;
 
 ALTER TABLE t1 ADD STATISTIC a, b TYPE tdigest;
 
 SELECT 'After add statistic';
 
---SHOW CREATE TABLE t1;
+SHOW CREATE TABLE t1;
 
 ALTER TABLE t1 MATERIALIZE STATISTIC a, b TYPE tdigest;
 INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000;
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 2c29fd9369e..79a75665552 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -853,6 +853,7 @@ SystemReplicasThreadsActive
 TABLUM
 TCPConnection
 TCPThreads
+TDigest
 TINYINT
 TKSV
 TLSv

From 57b5f3ca7842520910b47953e4ea6f740a82ce94 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Mon, 11 Sep 2023 07:24:36 +0200
Subject: [PATCH 0037/1097] fix

---
 src/Storages/Statistic/Statistic.cpp   | 2 +-
 src/Storages/Statistic/Statistic.h     | 4 ++--
 src/Storages/StatisticsDescription.cpp | 3 ---
 src/Storages/StatisticsDescription.h   | 1 -
 4 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index 17aa6a76f1c..9b7ca3ab92d 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -165,7 +165,7 @@ StatisticPtr MergeTreeStatisticFactory::get(const StatisticDescription & stat) c
     return std::make_shared<TDigestStatistic>(stat);
 }
 
-Statistics MergeTreeStatisticFactory::getMany(const std::vector<StatisticDescription> & stats) const
+Statistics MergeTreeStatisticFactory::getMany(const StatisticsDescriptions & stats) const
 {
     Statistics result;
     for (const auto & stat : stats)
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index eb05649f0a6..6653e073df4 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -58,7 +58,7 @@ public:
 
 protected:
 
-    const StatisticDescription & stat;
+    StatisticDescription stat;
 
 };
 
@@ -116,7 +116,7 @@ public:
 
     StatisticPtr get(const StatisticDescription & stat) const;
 
-    Statistics getMany(const std::vector<StatisticDescription> & stats) const;
+    Statistics getMany(const StatisticsDescriptions & stats) const;
 
     void registerCreator(StatisticType type, Creator creator);
 
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index 097c2cd1ad5..3aee41f17e9 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -66,7 +66,6 @@ StatisticsDescriptions StatisticsDescriptions::getStatisticsFromAST(const ASTPtr
         stat.data_type = column.type;
         stats.push_back(stat);
     }
-    /// stats.definition_asts.push_back(definition_ast);
 
     if (stats.empty())
         throw Exception(ErrorCodes::INCORRECT_QUERY, "Empty statistic column list");
@@ -96,8 +95,6 @@ ASTPtr StatisticsDescriptions::getAST() const
 {
 
     auto list = std::make_shared<ASTExpressionList>();
-    /// for (const auto & ast : definition_asts)
-    ///    list.children.push_back(ast);
 
     for (const auto & stat : *this)
     {
diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h
index 70a6c6bb34f..2949cac3245 100644
--- a/src/Storages/StatisticsDescription.h
+++ b/src/Storages/StatisticsDescription.h
@@ -30,7 +30,6 @@ struct StatisticDescription
 
 struct StatisticsDescriptions : public std::vector<StatisticDescription>
 {
-    std::vector<ASTPtr> definition_asts;
     /// Stat with name exists
     bool has(const String & name) const;
     /// merge with other Statistics

From 430a4fda9cf03a304bace0c7f50dcdfa6afed52a Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 12 Sep 2023 18:30:55 +0200
Subject: [PATCH 0038/1097] fix some tests

---
 src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp |  3 ++-
 src/Storages/Statistic/Statistic.cpp               | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index f5f07bdb598..59902a6a115 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -274,7 +274,8 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
 
             cond.selectivity = estimator.estimateSelectivity(node);
 
-            LOG_DEBUG(log, "Condition {} has selectivity {}", node.getASTNode()->dumpTree(), cond.selectivity);
+            if (node.getASTNode() != nullptr)
+                LOG_DEBUG(log, "Condition {} has selectivity {}", node.getASTNode()->dumpTree(), cond.selectivity);
         }
 
         if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere)
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index 9b7ca3ab92d..7a909f599a9 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -104,11 +104,19 @@ Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node)
         return default_unknown_cond_factor;
     }
     auto it = column_estimators.find(col.value());
+
+    /// If there the estimator of the column is not found or there are no data at all,
+    /// we use dummy estimation.
+    bool dummy = total_count == 0;
     ColumnEstimator estimator;
     if (it != column_estimators.end())
     {
         estimator = it->second;
     }
+    else
+    {
+        dummy = true;
+    }
     auto [op, val] = extractBinaryOp(node, col.value());
     if (op == "equals")
     {
@@ -119,10 +127,14 @@ Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node)
     }
     else if (op == "less" || op == "lessThan")
     {
+        if (dummy)
+            return default_normal_cond_factor;
         return estimator.estimateLess(val) / total_count;
     }
     else if (op == "greater" || op == "greaterThan")
     {
+        if (dummy)
+            return default_normal_cond_factor;
         return estimator.estimateGreater(val) / total_count;
     }
     else
@@ -144,8 +156,6 @@ void MergeTreeStatisticFactory::registerCreator(StatisticType stat_type, Creator
 MergeTreeStatisticFactory::MergeTreeStatisticFactory()
 {
     registerCreator(TDigest, TDigestCreator);
-
-    ///registerCreator("cm_sketch", CMSketchCreator);
 }
 
 MergeTreeStatisticFactory & MergeTreeStatisticFactory::instance()

From 6970411c2e3f3d1e5899247cad39e3f7a64aee85 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Wed, 13 Sep 2023 08:26:25 +0200
Subject: [PATCH 0039/1097] fix wide part and fuzzer

---
 src/Storages/Statistic/Statistic.cpp                      | 2 ++
 .../queries/0_stateless/02864_statistic_operate.reference | 7 ++++---
 tests/queries/0_stateless/02864_statistic_operate.sql     | 8 +++++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index 7a909f599a9..1bd593be5a5 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -93,6 +93,8 @@ std::pair<std::string, Float64> ConditionEstimator::extractBinaryOp(const RPNBui
         value = output_value.get<UInt64>();
     else if (type == Field::Types::Float64)
         value = output_value.get<Float64>();
+    else
+        return {};
     return std::make_pair(function_name, value);
 }
 
diff --git a/tests/queries/0_stateless/02864_statistic_operate.reference b/tests/queries/0_stateless/02864_statistic_operate.reference
index 26be9b47b8e..67f9eb6de15 100644
--- a/tests/queries/0_stateless/02864_statistic_operate.reference
+++ b/tests/queries/0_stateless/02864_statistic_operate.reference
@@ -1,17 +1,18 @@
-CREATE TABLE default.t1\n(\n    `a` Int64,\n    `b` Float64,\n    `pk` String,\n    STATISTIC a TYPE tdigest,\n    STATISTIC b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192
+CREATE TABLE default.t1\n(\n    `a` Float64,\n    `b` Int64,\n    `pk` String,\n    STATISTIC a TYPE tdigest,\n    STATISTIC b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
 After insert
 SELECT count()
 FROM t1
 PREWHERE (a < 10) AND (b < 10)
 10
+0
 After drop statistic
 SELECT count()
 FROM t1
 PREWHERE (b < 10) AND (a < 10)
 10
-CREATE TABLE default.t1\n(\n    `a` Int64,\n    `b` Float64,\n    `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192
+CREATE TABLE default.t1\n(\n    `a` Float64,\n    `b` Int64,\n    `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
 After add statistic
-CREATE TABLE default.t1\n(\n    `a` Int64,\n    `b` Float64,\n    `pk` String,\n    STATISTIC a TYPE tdigest,\n    STATISTIC b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192
+CREATE TABLE default.t1\n(\n    `a` Float64,\n    `b` Int64,\n    `pk` String,\n    STATISTIC a TYPE tdigest,\n    STATISTIC b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
 After materialize statistic
 SELECT count()
 FROM t1
diff --git a/tests/queries/0_stateless/02864_statistic_operate.sql b/tests/queries/0_stateless/02864_statistic_operate.sql
index c0c97e60dd2..66fc4a9ec34 100644
--- a/tests/queries/0_stateless/02864_statistic_operate.sql
+++ b/tests/queries/0_stateless/02864_statistic_operate.sql
@@ -5,11 +5,12 @@ SET allow_statistic_optimize = 1;
 
 CREATE TABLE t1 
 (
-    a Int64,
-    b Float64,
+    a Float64,
+    b Int64,
     pk String,
     STATISTIC a, b TYPE tdigest,
-) Engine = MergeTree() ORDER BY pk;
+) Engine = MergeTree() ORDER BY pk
+SETTINGS min_bytes_for_wide_part = 0;
 
 SHOW CREATE TABLE t1;
 
@@ -18,6 +19,7 @@ INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMI
 SELECT 'After insert';
 EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
 SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
+SELECT count(*) FROM t1 WHERE b < NULL and a < '10';
 
 ALTER TABLE t1 DROP STATISTIC a, b TYPE tdigest;
 

From 83d1b44a3e71ab125ab7aa41cff3c0eac4c1391d Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 19 Sep 2023 17:08:15 +0200
Subject: [PATCH 0040/1097] validate

---
 src/Common/ErrorCodes.cpp                     |  1 +
 src/Interpreters/InterpreterAlterQuery.cpp    |  5 +++
 src/Interpreters/InterpreterCreateQuery.cpp   |  6 +++
 src/Storages/MergeTree/MergeTreeData.cpp      | 11 +++++
 src/Storages/Statistic/Statistic.cpp          | 27 ++++++++++++
 src/Storages/Statistic/Statistic.h            | 15 ++++---
 src/Storages/StatisticsDescription.cpp        |  9 ++--
 .../02864_statistic_exception.reference       |  0
 .../0_stateless/02864_statistic_exception.sql | 42 +++++++++++++++++++
 9 files changed, 108 insertions(+), 8 deletions(-)
 create mode 100644 tests/queries/0_stateless/02864_statistic_exception.reference
 create mode 100644 tests/queries/0_stateless/02864_statistic_exception.sql

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index f23685c37d1..4a808fd4c7f 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -585,6 +585,7 @@
     M(700, USER_SESSION_LIMIT_EXCEEDED)  \
     M(701, CLUSTER_DOESNT_EXIST) \
     M(702, CLIENT_INFO_DOES_NOT_MATCH) \
+    M(703, ILLEGAL_STATISTIC) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index c0cde4566c1..6911b1c9cc8 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -151,6 +151,11 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
         }
         else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query");
+        if (getContext()->getSettings().allow_experimental_statistic ||
+            command_ast->type == ASTAlterCommand::ADD_STATISTIC ||
+            command_ast->type == ASTAlterCommand::DROP_STATISTIC ||
+            command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTIC)
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Alter table with statistic is now disabled. Turn on allow_experimental_statistic");
     }
 
     if (typeid_cast<DatabaseReplicated *>(database.get()))
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 349ac683db9..5dcac7e98b2 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -714,12 +714,18 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
 
                 properties.indices.push_back(index_desc);
             }
+
         if (create.columns_list->stats)
+        {
+            const auto & settings = getContext()->getSettingsRef();
+            if (!settings.allow_experimental_statistic)
+                throw Exception(ErrorCodes::INCORRECT_QUERY, "Create table with statistic is now disabled. Turn on allow_experimental_statistic");
             for (const auto & statistic : create.columns_list->stats->children)
             {
                 auto stats = StatisticsDescriptions::getStatisticsFromAST(statistic->clone(), properties.columns, getContext());
                 properties.stats.merge(stats);
             }
+        }
 
         if (create.columns_list->projections)
             for (const auto & projection_ast : create.columns_list->projections->children)
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index c2512144fea..3de698cd81d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -651,6 +651,17 @@ void MergeTreeData::checkProperties(
         }
     }
 
+    if (!new_metadata.statistics.empty())
+    {
+        for (const auto & stat : new_metadata.statistics)
+        {
+            auto column = all_columns.tryGetByName(stat.column_name);
+            if (!column.has_value())
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "column {} is not found", stat.column_name);
+            MergeTreeStatisticFactory::instance().validate(stat, column->type);
+        }
+    }
+
     checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key);
 }
 
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index 1bd593be5a5..b72a1780772 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -1,6 +1,7 @@
 #include <optional>
 #include <numeric>
 
+#include <DataTypes/DataTypeNullable.h>
 #include <Storages/Statistic/Statistic.h>
 #include <Storages/StatisticsDescription.h>
 #include <Common/Exception.h>
@@ -13,6 +14,7 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int INCORRECT_QUERY;
+    extern const int ILLEGAL_STATISTIC;
 }
 
 
@@ -149,15 +151,30 @@ StatisticPtr TDigestCreator(const StatisticDescription & stat)
     return StatisticPtr(new TDigestStatistic(stat));
 }
 
+void TDigestValidator(const StatisticDescription &, DataTypePtr data_type)
+{
+    data_type = removeNullable(data_type);
+    if (!data_type->isValueRepresentedByNumber())
+        throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "TDigest does not support type {}", data_type->getName());
+}
+
 void MergeTreeStatisticFactory::registerCreator(StatisticType stat_type, Creator creator)
 {
     if (!creators.emplace(stat_type, std::move(creator)).second)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic creator type {} is not unique", stat_type);
 }
 
+void MergeTreeStatisticFactory::registerValidator(StatisticType stat_type, Validator validator)
+{
+    if (!validators.emplace(stat_type, std::move(validator)).second)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic validator type {} is not unique", stat_type);
+
+}
+
 MergeTreeStatisticFactory::MergeTreeStatisticFactory()
 {
     registerCreator(TDigest, TDigestCreator);
+    registerValidator(TDigest, TDigestValidator);
 }
 
 MergeTreeStatisticFactory & MergeTreeStatisticFactory::instance()
@@ -166,6 +183,16 @@ MergeTreeStatisticFactory & MergeTreeStatisticFactory::instance()
     return instance;
 }
 
+void MergeTreeStatisticFactory::validate(const StatisticDescription & stat, DataTypePtr data_type) const
+{
+    auto it = validators.find(stat.type);
+    if (it == validators.end())
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown Statistic type '{}'", stat.type);
+    }
+    it->second(stat, data_type);
+}
+
 StatisticPtr MergeTreeStatisticFactory::get(const StatisticDescription & stat) const
 {
     auto it = creators.find(stat.type);
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index 6653e073df4..ec912dcc39c 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -23,6 +23,9 @@ class IStatistic;
 using StatisticPtr = std::shared_ptr<IStatistic>;
 using Statistics = std::vector<StatisticPtr>;
 
+/// Statistic for a column
+/// right now we support
+/// - tdigest
 class IStatistic
 {
 public:
@@ -43,11 +46,6 @@ public:
         return stat.column_name;
     }
 
-    //const String & type() const
-    //{
-    //    return stat.type;
-    //}
-
     virtual void serialize(WriteBuffer & buf) = 0;
 
     virtual void deserialize(ReadBuffer & buf) = 0;
@@ -112,20 +110,27 @@ class MergeTreeStatisticFactory : private boost::noncopyable
 public:
     static MergeTreeStatisticFactory & instance();
 
+    void validate(const StatisticDescription & stat, DataTypePtr data_type) const;
+
     using Creator = std::function<StatisticPtr(const StatisticDescription & stat)>;
 
+    using Validator = std::function<void(const StatisticDescription & stat, DataTypePtr data_type)>;
+
     StatisticPtr get(const StatisticDescription & stat) const;
 
     Statistics getMany(const StatisticsDescriptions & stats) const;
 
     void registerCreator(StatisticType type, Creator creator);
+    void registerValidator(StatisticType type, Validator validator);
 
 protected:
     MergeTreeStatisticFactory();
 
 private:
     using Creators = std::unordered_map<StatisticType, Creator>;
+    using Validators = std::unordered_map<StatisticType, Validator>;
     Creators creators;
+    Validators validators;
 };
 
 class RPNBuilderTreeNode;
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index 3aee41f17e9..5d9ea6d670b 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -18,13 +18,12 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INCORRECT_QUERY;
+    extern const int ILLEGAL_STATISTIC;
     extern const int LOGICAL_ERROR;
 };
 
 StatisticType StatisticDescription::stringToType(String type)
 {
-    if (type.empty())
-        return TDigest;
     if (type == "tdigest")
         return TDigest;
     throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}", type);
@@ -87,8 +86,12 @@ bool StatisticsDescriptions::has(const String & name) const
 
 void StatisticsDescriptions::merge(const StatisticsDescriptions & other)
 {
+    /// Check duplicate
+    for (const auto & old_stat : * this)
+        for (const auto & new_stat : other)
+            if (old_stat.column_name == new_stat.column_name)
+                throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Statistic column {} has existed", old_stat.column_name);
     insert(end(), other.begin(), other.end());
-    /// definition_asts.insert(definition_asts.end(), other.definition_asts.begin(), other.definition_asts.end());
 }
 
 ASTPtr StatisticsDescriptions::getAST() const
diff --git a/tests/queries/0_stateless/02864_statistic_exception.reference b/tests/queries/0_stateless/02864_statistic_exception.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02864_statistic_exception.sql b/tests/queries/0_stateless/02864_statistic_exception.sql
new file mode 100644
index 00000000000..58b43768679
--- /dev/null
+++ b/tests/queries/0_stateless/02864_statistic_exception.sql
@@ -0,0 +1,42 @@
+DROP TABLE IF EXISTS t1;
+
+CREATE TABLE t1 
+(
+    a Float64,
+    b Int64,
+    pk String,
+    STATISTIC a, b TYPE tdigest,
+) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY }
+
+SET allow_experimental_statistic = 1;
+
+CREATE TABLE t1 
+(
+    a Float64,
+    b Int64,
+    pk String,
+    STATISTIC a, a TYPE tdigest,
+) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTIC }
+
+CREATE TABLE t1 
+(
+    a Float64,
+    b Int64,
+    pk String,
+    STATISTIC a, pk TYPE tdigest,
+) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTIC }
+
+CREATE TABLE t1 
+(
+    a Float64,
+    b Int64,
+    pk String,
+) Engine = MergeTree() ORDER BY pk; 
+
+ALTER TABLE t1 ADD STATISTIC a TYPE tdigest;
+ALTER TABLE t1 ADD STATISTIC a TYPE tdigest; -- { serverError INCORRECT_QUERY }
+ALTER TABLE t1 ADD STATISTIC pk tdigest; -- { serverError ILLEGAL_STATISTIC }
+ALTER TABLE t1 DROP STATISTIC b TYPE tdigest; -- { serverError INCORRECT_QUERY }
+ALTER TABLE t1 MATERIALIZE STATISTIC b TYPE tdigest; -- { serverError INCORRECT_QUERY }
+
+DROP TABLE t1;

From 7883ae0d8cf60657a7a225f5d00bbe3655ca8a70 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <canhld94@gmail.com>
Date: Fri, 22 Sep 2023 13:22:35 +0800
Subject: [PATCH 0041/1097] Revert "Revert "Avoid excessive calls to getifaddrs
 in isLocalAddress""

---
 src/Common/isLocalAddress.cpp | 43 ++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 7569c6fc14e..902505404a6 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -1,9 +1,14 @@
 #include <Common/isLocalAddress.h>
 
 #include <ifaddrs.h>
+#include <chrono>
 #include <cstring>
+#include <memory>
+#include <mutex>
+#include <shared_mutex>
 #include <optional>
 #include <base/types.h>
+#include <boost/core/noncopyable.hpp>
 #include <Common/Exception.h>
 #include <Poco/Net/IPAddress.h>
 #include <Poco/Net/SocketAddress.h>
@@ -20,7 +25,7 @@ namespace ErrorCodes
 namespace
 {
 
-struct NetworkInterfaces
+struct NetworkInterfaces : public boost::noncopyable
 {
     ifaddrs * ifaddr;
     NetworkInterfaces()
@@ -31,6 +36,13 @@ struct NetworkInterfaces
         }
     }
 
+    void swap(NetworkInterfaces && other)
+    {
+        auto * tmp = ifaddr;
+        ifaddr = other.ifaddr;
+        other.ifaddr = tmp;
+    }
+
     bool hasAddress(const Poco::Net::IPAddress & address) const
     {
         ifaddrs * iface;
@@ -74,6 +86,32 @@ struct NetworkInterfaces
     {
         freeifaddrs(ifaddr);
     }
+
+    static const NetworkInterfaces & instance()
+    {
+        static constexpr int NET_INTERFACE_VALID_PERIOD_MS = 30000;
+        static NetworkInterfaces nf;
+        static std::atomic<std::chrono::steady_clock::time_point> last_updated_time = std::chrono::steady_clock::now();
+        static std::shared_mutex nf_mtx;
+
+        auto now = std::chrono::steady_clock::now();
+        auto last_updated_time_snapshot = last_updated_time.load();
+
+        if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time_snapshot).count() > NET_INTERFACE_VALID_PERIOD_MS)
+        {
+            std::unique_lock lock(nf_mtx);
+            if (last_updated_time.load() != last_updated_time_snapshot) /// it's possible that last_updated_time after we get the snapshot
+                return nf;
+            nf.swap(NetworkInterfaces());
+            last_updated_time.store(now);
+            return nf;
+        }
+        else
+        {
+            std::shared_lock lock(nf_mtx);
+            return nf;
+        }
+    }
 };
 
 }
@@ -111,8 +149,7 @@ bool isLocalAddress(const Poco::Net::IPAddress & address)
         }
     }
 
-    NetworkInterfaces interfaces;
-    return interfaces.hasAddress(address);
+    return NetworkInterfaces::instance().hasAddress(address);
 }
 
 
From 3b48b5aa5e7706d27bb3692d27dee7898e5111c9 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Fri, 22 Sep 2023 04:57:29 +0000
Subject: [PATCH 0042/1097] extend lifetime of returned  network interface
 object

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Common/isLocalAddress.cpp | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 902505404a6..772e0363904 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -36,13 +36,6 @@ struct NetworkInterfaces : public boost::noncopyable
         }
     }
 
-    void swap(NetworkInterfaces && other)
-    {
-        auto * tmp = ifaddr;
-        ifaddr = other.ifaddr;
-        other.ifaddr = tmp;
-    }
-
     bool hasAddress(const Poco::Net::IPAddress & address) const
     {
         ifaddrs * iface;
@@ -87,23 +80,24 @@ struct NetworkInterfaces : public boost::noncopyable
         freeifaddrs(ifaddr);
     }
 
-    static const NetworkInterfaces & instance()
+    static std::shared_ptr<const NetworkInterfaces> instance()
     {
         static constexpr int NET_INTERFACE_VALID_PERIOD_MS = 30000;
-        static NetworkInterfaces nf;
+        static std::shared_ptr<const NetworkInterfaces> nf = std::make_shared<const NetworkInterfaces>();
         static std::atomic<std::chrono::steady_clock::time_point> last_updated_time = std::chrono::steady_clock::now();
         static std::shared_mutex nf_mtx;
 
         auto now = std::chrono::steady_clock::now();
-        auto last_updated_time_snapshot = last_updated_time.load();
 
-        if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time_snapshot).count() > NET_INTERFACE_VALID_PERIOD_MS)
+        if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time.load()).count() > NET_INTERFACE_VALID_PERIOD_MS)
         {
             std::unique_lock lock(nf_mtx);
-            if (last_updated_time.load() != last_updated_time_snapshot) /// it's possible that last_updated_time after we get the snapshot
-                return nf;
-            nf.swap(NetworkInterfaces());
-            last_updated_time.store(now);
+            /// It's possible that last_updated_time after we get lock
+            if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time.load()).count() > NET_INTERFACE_VALID_PERIOD_MS)
+            {
+                nf = std::make_shared<const NetworkInterfaces>();
+                last_updated_time.store(now);
+            }
             return nf;
         }
         else
@@ -149,7 +143,7 @@ bool isLocalAddress(const Poco::Net::IPAddress & address)
         }
     }
 
-    return NetworkInterfaces::instance().hasAddress(address);
+    return NetworkInterfaces::instance()->hasAddress(address);
 }
 
 
From 3b0579b33946835dc09d058d568271f66f54b822 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 22 Sep 2023 14:44:40 +0000
Subject: [PATCH 0043/1097] Fix splitting into buckets

---
 src/Processors/Transforms/ScatterByPartitionTransform.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.cpp b/src/Processors/Transforms/ScatterByPartitionTransform.cpp
index 336371dae8b..6e3cdc0fda1 100644
--- a/src/Processors/Transforms/ScatterByPartitionTransform.cpp
+++ b/src/Processors/Transforms/ScatterByPartitionTransform.cpp
@@ -115,11 +115,7 @@ void ScatterByPartitionTransform::generateOutputChunks()
     IColumn::Selector selector(num_rows);
 
     for (size_t row = 0; row < num_rows; ++row)
-    {
-        selector[row] = hash_data[row]; /// [0, 2^32)
-        selector[row] *= output_size; /// [0, output_size * 2^32), selector stores 64 bit values.
-        selector[row] >>= 32u; /// [0, output_size)
-    }
+        selector[row] = hash_data[row] % output_size;
 
     output_chunks.resize(output_size);
     for (const auto & column : columns)

From c1d84605016564eff3f8e3dc39a97df0f9cecea2 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 22 Sep 2023 15:10:40 +0000
Subject: [PATCH 0044/1097] Fix queries in the test

---
 .../0_stateless/01568_window_functions_distributed.sql      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.sql b/tests/queries/0_stateless/01568_window_functions_distributed.sql
index 140b02bfa6d..ef8e9541b05 100644
--- a/tests/queries/0_stateless/01568_window_functions_distributed.sql
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.sql
@@ -13,11 +13,11 @@ create table t_01568 engine Memory as
 select intDiv(number, 3) p, modulo(number, 3) o, number
 from numbers(9);
 
-select sum(number) over w, max(number) over w from t_01568 window w as (partition by p);
+select sum(number) over w, max(number) over w from t_01568 window w as (partition by p) order by p;
 
-select sum(number) over w, max(number) over w from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p);
+select sum(number) over w, max(number) over w from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by p;
 
-select distinct sum(number) over w, max(number) over w from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p);
+select distinct sum(number) over w, max(number) over w from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by p;
 
 -- window functions + aggregation w/shards
 select groupArray(groupArray(number)) over (rows unbounded preceding) from remote('127.0.0.{1,2}', '', t_01568) group by mod(number, 3);

From b7cfc4d82d8949a174dc3c08390f65d0531c748a Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 25 Sep 2023 15:34:00 +0000
Subject: [PATCH 0045/1097] Support in Planner

---
 src/Planner/Planner.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 9dab68e3f00..3c2f01f9881 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -813,6 +813,7 @@ void addWindowSteps(QueryPlan & query_plan,
             auto sorting_step = std::make_unique<SortingStep>(
                 query_plan.getCurrentDataStream(),
                 window_description.full_sort_description,
+                window_description.partition_by,
                 0 /*limit*/,
                 sort_settings,
                 settings.optimize_sorting_by_input_stream_properties);

From d4acd9ec61da01e8c5dad97196c6a35e5c794475 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 25 Sep 2023 15:34:35 +0000
Subject: [PATCH 0046/1097] Set correct stream sorting

---
 src/Processors/QueryPlan/SortingStep.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index 814d818d227..ec32f6f6a28 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -89,6 +89,9 @@ SortingStep::SortingStep(
     : SortingStep(input_stream, description_, limit_, settings_, optimize_sorting_by_input_stream_properties_)
 {
     partition_by_description = partition_by_description_;
+
+    output_stream->sort_description = result_description;
+    output_stream->sort_scope = DataStream::SortScope::Stream;
 }
 
 SortingStep::SortingStep(
@@ -132,7 +135,11 @@ void SortingStep::updateOutputStream()
 {
     output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
     output_stream->sort_description = result_description;
-    output_stream->sort_scope = DataStream::SortScope::Global;
+
+    if (partition_by_description.empty())
+        output_stream->sort_scope = DataStream::SortScope::Global;
+    else
+        output_stream->sort_scope = DataStream::SortScope::Stream;
 }
 
 void SortingStep::updateLimit(size_t limit_)

From 32dd65cb22f424e352ee8d41d13577b4d7e5bb6c Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 25 Sep 2023 15:35:02 +0000
Subject: [PATCH 0047/1097] Update reference file

---
 .../01568_window_functions_distributed.reference            | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.reference b/tests/queries/0_stateless/01568_window_functions_distributed.reference
index 0b439ef759a..1284624fad1 100644
--- a/tests/queries/0_stateless/01568_window_functions_distributed.reference
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.reference
@@ -12,7 +12,7 @@ drop table if exists t_01568;
 create table t_01568 engine Memory as
 select intDiv(number, 3) p, modulo(number, 3) o, number
 from numbers(9);
-select sum(number) over w, max(number) over w from t_01568 window w as (partition by p);
+select sum(number) over w, max(number) over w from t_01568 window w as (partition by p) order by p;
 3	2
 3	2
 3	2
@@ -22,7 +22,7 @@ select sum(number) over w, max(number) over w from t_01568 window w as (partitio
 21	8
 21	8
 21	8
-select sum(number) over w, max(number) over w from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p);
+select sum(number) over w, max(number) over w from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by p;
 6	2
 6	2
 6	2
@@ -41,7 +41,7 @@ select sum(number) over w, max(number) over w from remote('127.0.0.{1,2}', '', t
 42	8
 42	8
 42	8
-select distinct sum(number) over w, max(number) over w from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p);
+select distinct sum(number) over w, max(number) over w from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by p;
 6	2
 24	5
 42	8

From 8cb9fe0154ddeaa206b6dd593e67b0c90fe1db75 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 26 Sep 2023 19:16:01 +0200
Subject: [PATCH 0048/1097] refine code

---
 src/Databases/DatabasesCommon.cpp             |  2 -
 src/Interpreters/InterpreterAlterQuery.cpp    |  4 +-
 src/Interpreters/InterpreterCreateQuery.cpp   | 32 +++----
 src/Interpreters/InterpreterCreateQuery.h     |  2 -
 src/Interpreters/MutationsInterpreter.cpp     | 15 +---
 src/Parsers/ASTAlterQuery.cpp                 |  2 +-
 src/Parsers/ASTColumnDeclaration.cpp          | 12 +++
 src/Parsers/ASTColumnDeclaration.h            |  1 +
 src/Parsers/ASTCreateQuery.cpp                | 12 ---
 src/Parsers/ASTCreateQuery.h                  |  3 +-
 src/Parsers/ExpressionElementParsers.cpp      | 27 ++++++
 src/Parsers/ExpressionElementParsers.h        |  8 ++
 src/Parsers/ParserCreateQuery.cpp             | 12 ---
 src/Parsers/ParserCreateQuery.h               | 17 +++-
 src/Storages/AlterCommands.cpp                | 52 +++++------
 src/Storages/ColumnsDescription.cpp           |  6 ++
 src/Storages/ColumnsDescription.h             |  2 +
 src/Storages/IStorage.cpp                     |  6 ++
 src/Storages/IStorage.h                       |  5 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  2 +-
 src/Storages/MergeTree/MergeTask.cpp          |  2 +-
 src/Storages/MergeTree/MergeTreeData.cpp      | 11 +--
 .../MergeTree/MergeTreeDataPartInMemory.cpp   |  2 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |  2 +-
 src/Storages/MergeTree/MutateTask.cpp         | 54 +++++++----
 .../MergeTree/registerStorageMergeTree.cpp    | 12 +--
 src/Storages/MutationCommands.cpp             | 18 ++--
 src/Storages/Statistic/Statistic.cpp          |  8 +-
 src/Storages/Statistic/Statistic.h            |  6 +-
 src/Storages/StatisticsDescription.cpp        | 89 ++++++-------------
 src/Storages/StatisticsDescription.h          | 25 ++----
 src/Storages/StorageInMemoryMetadata.cpp      | 13 ---
 src/Storages/StorageInMemoryMetadata.h        |  8 --
 .../test_manipulate_statistic/__init__.py     |  0
 .../config/config.xml                         |  7 ++
 .../test_manipulate_statistic/test.py         | 78 ++++++++++++++++
 .../0_stateless/02864_statistic_exception.sql | 27 +++---
 .../02864_statistic_operate.reference         |  4 +-
 .../0_stateless/02864_statistic_operate.sql   |  5 +-
 39 files changed, 334 insertions(+), 259 deletions(-)
 create mode 100644 tests/integration/test_manipulate_statistic/__init__.py
 create mode 100644 tests/integration/test_manipulate_statistic/config/config.xml
 create mode 100644 tests/integration/test_manipulate_statistic/test.py

diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index 86419fcb207..4ba793d858d 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -46,13 +46,11 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo
     {
         ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
         ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
-        ASTPtr new_statistics = InterpreterCreateQuery::formatStatistics(metadata.statistics);
         ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
         ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections);
 
         ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
         ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
-        ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->stats, new_statistics);
         ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
         ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections);
     }
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 8053aa1af53..8adfcc5421c 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -151,10 +151,10 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
         }
         else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query");
-        if (getContext()->getSettings().allow_experimental_statistic ||
+        if (!getContext()->getSettings().allow_experimental_statistic && (
             command_ast->type == ASTAlterCommand::ADD_STATISTIC ||
             command_ast->type == ASTAlterCommand::DROP_STATISTIC ||
-            command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTIC)
+            command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTIC))
             throw Exception(ErrorCodes::INCORRECT_QUERY, "Alter table with statistic is now disabled. Turn on allow_experimental_statistic");
     }
 
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index b8c29b81c53..c9aff97cfe2 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -435,6 +435,12 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns)
             column_declaration->children.push_back(column_declaration->codec);
         }
 
+        if (column.stat)
+        {
+            column_declaration->stat_type = column.stat->ast;
+            column_declaration->children.push_back(column_declaration->stat_type);
+        }
+
         if (column.ttl)
         {
             column_declaration->ttl = column.ttl;
@@ -457,11 +463,6 @@ ASTPtr InterpreterCreateQuery::formatIndices(const IndicesDescription & indices)
     return res;
 }
 
-ASTPtr InterpreterCreateQuery::formatStatistics(const StatisticsDescriptions & statistics)
-{
-    return statistics.getAST();
-}
-
 ASTPtr InterpreterCreateQuery::formatConstraints(const ConstraintsDescription & constraints)
 {
     auto res = std::make_shared<ASTExpressionList>();
@@ -642,6 +643,13 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
                 col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec);
         }
 
+        if (col_decl.stat_type)
+        {
+            if (!context_->getSettingsRef().allow_experimental_statistic)
+                 throw Exception(ErrorCodes::INCORRECT_QUERY, "Create table with statistic is now disabled. Turn on allow_experimental_statistic");
+            column.stat = StatisticDescription::getStatisticFromColumnDeclaration(col_decl);
+        }
+
         if (col_decl.ttl)
             column.ttl = col_decl.ttl;
 
@@ -717,18 +725,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
                 properties.indices.push_back(index_desc);
             }
 
-        if (create.columns_list->stats)
-        {
-            const auto & settings = getContext()->getSettingsRef();
-            if (!settings.allow_experimental_statistic)
-                throw Exception(ErrorCodes::INCORRECT_QUERY, "Create table with statistic is now disabled. Turn on allow_experimental_statistic");
-            for (const auto & statistic : create.columns_list->stats->children)
-            {
-                auto stats = StatisticsDescriptions::getStatisticsFromAST(statistic->clone(), properties.columns, getContext());
-                properties.stats.merge(stats);
-            }
-        }
-
         if (create.columns_list->projections)
             for (const auto & projection_ast : create.columns_list->projections->children)
             {
@@ -814,13 +810,11 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
 
     ASTPtr new_columns = formatColumns(properties.columns);
     ASTPtr new_indices = formatIndices(properties.indices);
-    ASTPtr new_statistics = formatStatistics(properties.stats);
     ASTPtr new_constraints = formatConstraints(properties.constraints);
     ASTPtr new_projections = formatProjections(properties.projections);
 
     create.columns_list->setOrReplace(create.columns_list->columns, new_columns);
     create.columns_list->setOrReplace(create.columns_list->indices, new_indices);
-    create.columns_list->setOrReplace(create.columns_list->stats, new_statistics);
     create.columns_list->setOrReplace(create.columns_list->constraints, new_constraints);
     create.columns_list->setOrReplace(create.columns_list->projections, new_projections);
 
diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h
index 66ac0ebf89c..67339dea928 100644
--- a/src/Interpreters/InterpreterCreateQuery.h
+++ b/src/Interpreters/InterpreterCreateQuery.h
@@ -38,7 +38,6 @@ public:
     static ASTPtr formatColumns(const NamesAndTypesList & columns, const NamesAndAliases & alias_columns);
     static ASTPtr formatColumns(const ColumnsDescription & columns);
     static ASTPtr formatIndices(const IndicesDescription & indices);
-    static ASTPtr formatStatistics(const StatisticsDescriptions & statistics);
     static ASTPtr formatConstraints(const ConstraintsDescription & constraints);
     static ASTPtr formatProjections(const ProjectionsDescription & projections);
 
@@ -81,7 +80,6 @@ private:
     {
         ColumnsDescription columns;
         IndicesDescription indices;
-        StatisticsDescriptions stats;
         ConstraintsDescription constraints;
         ProjectionsDescription projections;
     };
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 961a4f7ac72..87924aa3825 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -54,6 +54,7 @@ namespace ErrorCodes
     extern const int CANNOT_UPDATE_COLUMN;
     extern const int UNEXPECTED_EXPRESSION;
     extern const int THERE_IS_NO_COLUMN;
+    extern const int ILLEGAL_STATISTIC;
 }
 
 namespace
@@ -485,7 +486,6 @@ void MutationsInterpreter::prepare(bool dry_run)
     /// TODO Should we get columns, indices and projections from the part itself? Table metadata may be different
     const ColumnsDescription & columns_desc = metadata_snapshot->getColumns();
     const IndicesDescription & indices_desc = metadata_snapshot->getSecondaryIndices();
-    const StatisticsDescriptions & statistics_desc = metadata_snapshot->getStatistics();
     const ProjectionsDescription & projections_desc = metadata_snapshot->getProjections();
 
     auto storage_snapshot = std::make_shared<StorageSnapshot>(*source.getStorage(), metadata_snapshot);
@@ -726,16 +726,9 @@ void MutationsInterpreter::prepare(bool dry_run)
             mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
             for (const auto & stat_column_name: command.statistic_columns)
             {
-                auto it = std::find_if(
-                        std::cbegin(statistics_desc), std::end(statistics_desc),
-                        [&](const StatisticDescription & statistic)
-                        {
-                            return statistic.column_name == stat_column_name;
-                        });
-                if (it == std::cend(statistics_desc))
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic column: {}", stat_column_name);
-
-                dependencies.emplace(it->column_name, ColumnDependency::STATISTIC);
+                if (!columns_desc.has(stat_column_name) || !columns_desc.get(stat_column_name).stat)
+                    throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Unknown statistic column: {}", stat_column_name);
+                dependencies.emplace(stat_column_name, ColumnDependency::STATISTIC);
                 materialized_statistics.emplace(stat_column_name);
             }
         }
diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index 2db7bb93e8b..57f82c05099 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -209,7 +209,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
     }
     else if (type == ASTAlterCommand::DROP_STATISTIC)
     {
-        settings.ostr << (settings.hilite ? hilite_keyword : "") << (clear_statistic ? "CLEAR " : "DROP ") << "INDEX "
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << (clear_statistic ? "CLEAR " : "DROP ") << "STATISTIC "
                       << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
         statistic_decl->formatImpl(settings, state, frame);
         if (partition)
diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp
index e9b490a1be3..b0d812eec6b 100644
--- a/src/Parsers/ASTColumnDeclaration.cpp
+++ b/src/Parsers/ASTColumnDeclaration.cpp
@@ -39,6 +39,12 @@ ASTPtr ASTColumnDeclaration::clone() const
         res->children.push_back(res->codec);
     }
 
+    if (stat_type)
+    {
+        res->stat_type = stat_type->clone();
+        res->children.push_back(res->stat_type);
+    }
+
     if (ttl)
     {
         res->ttl = ttl->clone();
@@ -99,6 +105,12 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta
         codec->formatImpl(settings, state, frame);
     }
 
+    if (stat_type)
+    {
+        settings.ostr << ' ';
+        stat_type->formatImpl(settings, state, frame);
+    }
+
     if (ttl)
     {
         settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' ';
diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h
index 9d486667911..a54abae97ea 100644
--- a/src/Parsers/ASTColumnDeclaration.h
+++ b/src/Parsers/ASTColumnDeclaration.h
@@ -19,6 +19,7 @@ public:
     bool ephemeral_default = false;
     ASTPtr comment;
     ASTPtr codec;
+    ASTPtr stat_type;
     ASTPtr ttl;
     ASTPtr collation;
     bool primary_key_specifier = false;
diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index d2bab2366aa..1562586bd93 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -134,8 +134,6 @@ ASTPtr ASTColumns::clone() const
         res->set(res->columns, columns->clone());
     if (indices)
         res->set(res->indices, indices->clone());
-    if (stats)
-        res->set(res->stats, stats->clone());
     if (constraints)
         res->set(res->constraints, constraints->clone());
     if (projections)
@@ -170,16 +168,6 @@ void ASTColumns::formatImpl(const FormatSettings & s, FormatState & state, Forma
             list.children.push_back(elem);
         }
     }
-    if (stats)
-    {
-        for (const auto & stat : stats->children)
-        {
-            auto elem = std::make_shared<ASTColumnsElement>();
-            elem->prefix = "STATISTIC";
-            elem->set(elem->elem, stat->clone());
-            list.children.push_back(elem);
-        }
-    }
     if (constraints)
     {
         for (const auto & constraint : constraints->children)
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index 0ef3975335b..28f5e05802b 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -53,7 +53,6 @@ class ASTColumns : public IAST
 public:
     ASTExpressionList * columns = nullptr;
     ASTExpressionList * indices = nullptr;
-    ASTExpressionList * stats = nullptr;
     ASTExpressionList * constraints = nullptr;
     ASTExpressionList * projections = nullptr;
     IAST              * primary_key = nullptr;
@@ -68,7 +67,7 @@ public:
     bool empty() const
     {
         return (!columns || columns->children.empty()) && (!indices || indices->children.empty()) && (!constraints || constraints->children.empty())
-            && (!projections || projections->children.empty()) && (!stats || stats->children.empty());
+            && (!projections || projections->children.empty());
     }
 
     void forEachPointerToChild(std::function<void(void**)> f) override
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index f25e7f3c7e2..c85ceec4120 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -685,6 +685,33 @@ bool ParserCodec::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     return true;
 }
 
+bool ParserStatisticType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    ParserList stat_type_parser(std::make_unique<ParserIdentifierWithOptionalParameters>(),
+        std::make_unique<ParserToken>(TokenType::Comma), false);
+
+    if (pos->type != TokenType::OpeningRoundBracket)
+        return false;
+    ASTPtr stat_type;
+
+    ++pos;
+
+    if (!stat_type_parser.parse(pos, stat_type, expected))
+        return false;
+
+    if (pos->type != TokenType::ClosingRoundBracket)
+        return false;
+    ++pos;
+
+    auto function_node = std::make_shared<ASTFunction>();
+    function_node->name = "STATISTIC";
+    function_node->arguments = stat_type;
+    function_node->children.push_back(function_node->arguments);
+
+    node = function_node;
+    return true;
+}
+
 bool ParserCollation::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTPtr collation;
diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h
index a7980c08671..2104a71cd0d 100644
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@@ -197,6 +197,14 @@ protected:
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 };
 
+/// STATISTIC(tdigest(200))
+class ParserStatisticType : public IParserBase
+{
+protected:
+    const char * getName() const override { return "statistic"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
 /** Parse collation
   * COLLATE utf8_unicode_ci NOT NULL
   */
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index f9b4d90e5a8..a674cb30f0a 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -307,14 +307,12 @@ bool ParserForeignKeyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected &
 bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ParserKeyword s_index("INDEX");
-    ParserKeyword s_stat("STATISTIC");
     ParserKeyword s_constraint("CONSTRAINT");
     ParserKeyword s_projection("PROJECTION");
     ParserKeyword s_foreign_key("FOREIGN KEY");
     ParserKeyword s_primary_key("PRIMARY KEY");
 
     ParserIndexDeclaration index_p;
-    ParserStatisticDeclaration stat_p;
     ParserConstraintDeclaration constraint_p;
     ParserProjectionDeclaration projection_p;
     ParserForeignKeyDeclaration foreign_key_p;
@@ -333,11 +331,6 @@ bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expecte
         if (!constraint_p.parse(pos, new_node, expected))
             return false;
     }
-    else if (s_stat.ignore(pos, expected))
-    {
-        if (!stat_p.parse(pos, new_node, expected))
-            return false;
-    }
     else if (s_projection.ignore(pos, expected))
     {
         if (!projection_p.parse(pos, new_node, expected))
@@ -392,7 +385,6 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E
 
     ASTPtr columns = std::make_shared<ASTExpressionList>();
     ASTPtr indices = std::make_shared<ASTExpressionList>();
-    ASTPtr stats = std::make_shared<ASTExpressionList>();
     ASTPtr constraints = std::make_shared<ASTExpressionList>();
     ASTPtr projections = std::make_shared<ASTExpressionList>();
     ASTPtr primary_key;
@@ -413,8 +405,6 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E
         }
         else if (elem->as<ASTIndexDeclaration>())
             indices->children.push_back(elem);
-        else if (elem->as<ASTStatisticDeclaration>())
-            stats->children.push_back(elem);
         else if (elem->as<ASTConstraintDeclaration>())
             constraints->children.push_back(elem);
         else if (elem->as<ASTProjectionDeclaration>())
@@ -443,8 +433,6 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E
         res->set(res->columns, columns);
     if (!indices->children.empty())
         res->set(res->indices, indices);
-    if (!stats->children.empty())
-        res->set(res->stats, stats);
     if (!constraints->children.empty())
         res->set(res->constraints, constraints);
     if (!projections->children.empty())
diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h
index 33d7a95c9ca..910ee048442 100644
--- a/src/Parsers/ParserCreateQuery.h
+++ b/src/Parsers/ParserCreateQuery.h
@@ -131,6 +131,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
     ParserKeyword s_auto_increment{"AUTO_INCREMENT"};
     ParserKeyword s_comment{"COMMENT"};
     ParserKeyword s_codec{"CODEC"};
+    ParserKeyword s_stat{"STATISTIC"};
     ParserKeyword s_ttl{"TTL"};
     ParserKeyword s_remove{"REMOVE"};
     ParserKeyword s_type{"TYPE"};
@@ -141,6 +142,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
     ParserLiteral literal_parser;
     ParserCodec codec_parser;
     ParserCollation collation_parser;
+    ParserStatisticType stat_type_parser;
     ParserExpression expression_parser;
 
     /// mandatory column name
@@ -176,6 +178,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
     ASTPtr default_expression;
     ASTPtr comment_expression;
     ASTPtr codec_expression;
+    ASTPtr stat_type_expression;
     ASTPtr ttl_expression;
     ASTPtr collation_expression;
     bool primary_key_specifier = false;
@@ -301,6 +304,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
             return false;
     }
 
+    if (s_stat.ignore(pos, expected))
+    {
+        if (!stat_type_parser.parse(pos, stat_type_expression, expected))
+            return false;
+    }
+
     if (s_ttl.ignore(pos, expected))
     {
         if (!expression_parser.parse(pos, ttl_expression, expected))
@@ -342,6 +351,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
         column_declaration->children.push_back(std::move(codec_expression));
     }
 
+    if (stat_type_expression)
+    {
+        column_declaration->stat_type = stat_type_expression;
+        column_declaration->children.push_back(std::move(stat_type_expression));
+    }
+
     if (ttl_expression)
     {
         column_declaration->ttl = ttl_expression;
@@ -389,8 +404,6 @@ protected:
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 };
 
-/** name BY columns TYPE typename(arg1, arg2, ...) */
-/** name BY columns */
 class ParserStatisticDeclaration : public IParserBase
 {
 public:
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 84d0ff74c6d..e18999e1e71 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -38,6 +38,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_STATISTIC;
     extern const int BAD_ARGUMENTS;
     extern const int NOT_FOUND_COLUMN_IN_BLOCK;
     extern const int LOGICAL_ERROR;
@@ -588,46 +589,37 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
     }
     else if (type == ADD_STATISTIC)
     {
-        /// TODO: Right now we assume there is only one type of statistics for simple implement.
         for (const auto & statistic_column_name : statistic_columns)
         {
-            if (!if_not_exists && std::any_of(
-                    metadata.statistics.cbegin(),
-                    metadata.statistics.cend(),
-                    [&](const auto & statistic)
-                    {
-                        return statistic.column_name == statistic_column_name;
-                    }))
+            if (!metadata.columns.has(statistic_column_name))
             {
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type);
+                throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Cannot add statistic {} with type {}: this column is not found", statistic_column_name, statistic_type);
             }
+            if (metadata.columns.get(statistic_column_name).stat)
+                throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type);
         }
 
-        auto stats = StatisticsDescriptions::getStatisticsFromAST(statistic_decl, metadata.columns, context);
-        metadata.statistics.merge(stats);
+        auto stats = StatisticDescription::getStatisticsFromAST(statistic_decl, metadata.columns);
+        for (auto && stat : stats)
+        {
+            metadata.columns.modify(stat.column_name,
+                [&](ColumnDescription & column) { column.stat = std::move(stat); });
+        }
     }
     else if (type == DROP_STATISTIC)
     {
-        if (!partition && !clear)
+        for (const auto & stat_column_name : statistic_columns)
         {
-            for (const auto & stat_column_name : statistic_columns)
+            if (!metadata.columns.has(stat_column_name) || !metadata.columns.get(stat_column_name).stat)
             {
-                auto erase_it = std::find_if(
-                        metadata.statistics.begin(),
-                        metadata.statistics.end(),
-                        [stat_column_name](const auto & statistic)
-                        {
-                            return statistic.column_name == stat_column_name;
-                        });
-
-                if (erase_it == metadata.statistics.end())
-                {
-                    if (if_exists)
-                        return;
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(stat_column_name), statistic_type);
-                }
-                LOG_INFO(&Poco::Logger::get("drop_stat"), "dropping statistic {}", erase_it->column_name);
-                metadata.statistics.erase(erase_it);
+                if (if_exists)
+                    return;
+                throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(stat_column_name), statistic_type);
+            }
+            if (!partition && !clear)
+            {
+                metadata.columns.modify(stat_column_name,
+                    [&](ColumnDescription & column) { column.stat = std::nullopt; });
             }
         }
     }
@@ -854,7 +846,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada
     if (isRemovingProperty() || type == REMOVE_TTL || type == REMOVE_SAMPLE_BY)
         return false;
 
-    if (type == DROP_INDEX || type == DROP_PROJECTION || type == RENAME_COLUMN)
+    if (type == DROP_INDEX || type == DROP_PROJECTION || type == RENAME_COLUMN || type == DROP_STATISTIC)
         return true;
 
     /// Drop alias is metadata alter, in other case mutation is required.
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 0c918bda5fd..500e7ae6d82 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -90,6 +90,12 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
         writeEscapedString(queryToString(codec), buf);
     }
 
+    if (stat)
+    {
+        writeChar('\t', buf);
+        writeEscapedString(queryToString(stat->ast), buf);
+    }
+
     if (ttl)
     {
         writeChar('\t', buf);
diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h
index ee0bb5efb66..9d92984d5d8 100644
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@@ -7,6 +7,7 @@
 #include <Core/NamesAndAliases.h>
 #include <Interpreters/Context_fwd.h>
 #include <Storages/ColumnDefault.h>
+#include <Storages/StatisticsDescription.h>
 #include <Common/Exception.h>
 
 #include <boost/multi_index/member.hpp>
@@ -83,6 +84,7 @@ struct ColumnDescription
     String comment;
     ASTPtr codec;
     ASTPtr ttl;
+    std::optional<StatisticDescription> stat;
 
     ColumnDescription() = default;
     ColumnDescription(ColumnDescription &&) = default;
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index ae7659e074f..47b0cb0df2a 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -12,6 +12,7 @@
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Storages/AlterCommands.h>
+#include <Storages/Statistic/Statistic.h>
 #include <Backups/RestorerFromBackup.h>
 #include <Backups/IBackup.h>
 
@@ -222,6 +223,11 @@ StorageID IStorage::getStorageID() const
     return storage_id;
 }
 
+ConditionEstimator IStorage::getConditionEstimatorByPredicate(const SelectQueryInfo &, ContextPtr) const
+{
+    return {};
+}
+
 void IStorage::renameInMemory(const StorageID & new_table_id)
 {
     std::lock_guard lock(id_mutex);
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 9e95f4edee1..40fdbe4a68c 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -11,7 +11,6 @@
 #include <Storages/IStorage_fwd.h>
 #include <Storages/SelectQueryDescription.h>
 #include <Storages/StorageInMemoryMetadata.h>
-#include <Storages/Statistic/Statistic.h>
 #include <Storages/TableLockHolder.h>
 #include <Storages/StorageSnapshot.h>
 #include <Common/ActionLock.h>
@@ -69,6 +68,8 @@ using DatabaseAndTableName = std::pair<String, String>;
 class BackupEntriesCollector;
 class RestorerFromBackup;
 
+class ConditionEstimator;
+
 struct ColumnSize
 {
     size_t marks = 0;
@@ -136,7 +137,7 @@ public:
     /// Returns true if the storage supports queries with the PREWHERE section.
     virtual bool supportsPrewhere() const { return false; }
 
-    virtual ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, ContextPtr) const { return {}; }
+    virtual ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, ContextPtr) const;
 
     /// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported.
     /// This is needed for engines whose aggregates data from multiple tables, like Merge.
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index c412744b18b..440e1e1b06d 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -638,7 +638,7 @@ Statistics IMergeTreeDataPart::loadStatistics() const
 {
     const auto & metadata_snaphost = storage.getInMemoryMetadata();
 
-    auto total_statistics = MergeTreeStatisticFactory::instance().getMany(metadata_snaphost.getStatistics());
+    auto total_statistics = MergeTreeStatisticFactory::instance().getMany(metadata_snaphost.getColumns());
 
     Statistics result;
     for (auto & stat : total_statistics)
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 30887ae1b7c..1c98c15107d 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -366,7 +366,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
         global_ctx->metadata_snapshot,
         global_ctx->merging_columns,
         MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()),
-        MergeTreeStatisticFactory::instance().getMany(global_ctx->metadata_snapshot->getStatistics()),
+        MergeTreeStatisticFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()),
         ctx->compression_codec,
         global_ctx->txn,
         /*reset_columns=*/ true,
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 9d43caa4765..3042eac075d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -651,15 +651,10 @@ void MergeTreeData::checkProperties(
         }
     }
 
-    if (!new_metadata.statistics.empty())
+    for (const auto & col : new_metadata.columns)
     {
-        for (const auto & stat : new_metadata.statistics)
-        {
-            auto column = all_columns.tryGetByName(stat.column_name);
-            if (!column.has_value())
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "column {} is not found", stat.column_name);
-            MergeTreeStatisticFactory::instance().validate(stat, column->type);
-        }
+        if (col.stat)
+            MergeTreeStatisticFactory::instance().validate(*col.stat, col.type);
     }
 
     checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key);
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
index 170ea797e70..42c36f2e510 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
@@ -94,7 +94,7 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String &
 
     auto compression_codec = storage.getContext()->chooseCompressionCodec(0, 0);
     auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices());
-    auto stats = MergeTreeStatisticFactory::instance().getMany(metadata_snapshot->getStatistics());
+    auto stats = MergeTreeStatisticFactory::instance().getMany(metadata_snapshot->getColumns());
     MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, stats, compression_codec, NO_TRANSACTION_PTR);
     out.write(block);
 
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index faa533bbc3d..5803cd6cc33 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -569,7 +569,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
         metadata_snapshot,
         columns,
         indices,
-        MergeTreeStatisticFactory::instance().getMany(metadata_snapshot->getStatistics()),
+        MergeTreeStatisticFactory::instance().getMany(metadata_snapshot->getColumns()),
         compression_codec,
         context->getCurrentTransaction(),
         false,
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 91a4ea3f702..445317defc0 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -88,7 +88,9 @@ static void splitAndModifyMutationCommands(
                 if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
                     mutated_columns.emplace(command.column_name);
             }
-            else if (command.type == MutationCommand::Type::DROP_INDEX || command.type == MutationCommand::Type::DROP_PROJECTION)
+            else if (command.type == MutationCommand::Type::DROP_INDEX
+                     || command.type == MutationCommand::Type::DROP_PROJECTION
+                     || command.type == MutationCommand::Type::DROP_STATISTIC)
             {
                 for_file_renames.push_back(command);
             }
@@ -204,7 +206,9 @@ static void splitAndModifyMutationCommands(
             {
                 for_interpreter.push_back(command);
             }
-            else if (command.type == MutationCommand::Type::DROP_INDEX || command.type == MutationCommand::Type::DROP_PROJECTION)
+            else if (command.type == MutationCommand::Type::DROP_INDEX
+                     || command.type == MutationCommand::Type::DROP_PROJECTION
+                     || command.type == MutationCommand::Type::DROP_STATISTIC)
             {
                 for_file_renames.push_back(command);
             }
@@ -456,12 +460,12 @@ static std::set<StatisticPtr> getStatisticsToRecalculate(const StorageMetadataPt
 {
     const auto & stats_factory = MergeTreeStatisticFactory::instance();
     std::set<StatisticPtr> stats_to_recalc;
-    const auto & stats = metadata_snapshot->getStatistics();
-    for (const auto & stat_desc : stats)
+    const auto & columns = metadata_snapshot->getColumns();
+    for (const auto & col_desc : columns)
     {
-        if (materialized_stats.contains(stat_desc.column_name))
+        if (col_desc.stat && materialized_stats.contains(col_desc.name))
         {
-            stats_to_recalc.insert(stats_factory.get(stat_desc));
+            stats_to_recalc.insert(stats_factory.get(*col_desc.stat));
         }
     }
     return stats_to_recalc;
@@ -667,11 +671,11 @@ static NameToNameVector collectFilesForRenames(
             if (source_part->checksums.has(command.column_name + ".proj"))
                 add_rename(command.column_name + ".proj", "");
         }
-        //else if (command.type == MutationCommand::Type::DROP_STATISTICS)
-        //{
-        //    if (source_part->checksums.has(command.column_name + ".stat"))
-        //        add_rename(command.column_name + ".stat", "");
-        //}
+        else if (command.type == MutationCommand::Type::DROP_STATISTIC)
+        {
+            if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX))
+                add_rename(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX, "");
+        }
         else if (isWidePart(source_part))
         {
             if (command.type == MutationCommand::Type::DROP_COLUMN)
@@ -1315,10 +1319,14 @@ private:
         NameSet entries_to_hardlink;
 
         NameSet removed_indices;
+        NameSet removed_stats;
         for (const auto & command : ctx->for_file_renames)
         {
             if (command.type == MutationCommand::DROP_INDEX)
                 removed_indices.insert(command.column_name);
+            else if (command.type == MutationCommand::DROP_STATISTIC)
+                for (const auto & column_name : command.statistic_columns)
+                    removed_stats.insert(column_name);
         }
 
         bool is_full_part_storage = isFullPartStorage(ctx->new_data_part->getDataPartStorage());
@@ -1354,17 +1362,23 @@ private:
             }
         }
 
+        for (auto stat_name : removed_stats)
+            LOG_INFO(ctx->log, "removed stat {}", stat_name);
+
         Statistics stats;
-        const auto & statistics = ctx->metadata_snapshot->getStatistics();
-        for (const auto & stat : statistics)
+        const auto & columns = ctx->metadata_snapshot->getColumns();
+        for (const auto & col : columns)
         {
-            if (ctx->materialized_statistics.contains(stat.column_name))
+            if (!col.stat || removed_stats.contains(col.name))
+                continue;
+
+            if (ctx->materialized_statistics.contains(col.name))
             {
-                stats.push_back(MergeTreeStatisticFactory::instance().get(stat));
+                stats.push_back(MergeTreeStatisticFactory::instance().get(*col.stat));
             }
             else
             {
-                auto prefix = fmt::format("{}{}.", STAT_FILE_PREFIX, stat.column_name);
+                auto prefix = fmt::format("{}{}.", STAT_FILE_PREFIX, col.name);
                 auto it = ctx->source_part->checksums.files.upper_bound(prefix);
                 while (it != ctx->source_part->checksums.files.end())
                 {
@@ -1378,6 +1392,9 @@ private:
             }
         }
 
+        for (auto hardlink : entries_to_hardlink)
+            LOG_INFO(ctx->log, "hard link {}", hardlink);
+
         NameSet removed_projections;
         for (const auto & command : ctx->for_file_renames)
         {
@@ -1639,6 +1656,10 @@ private:
             }
         }
 
+            LOG_INFO(ctx->log, "hard link size : {}", hardlinked_files.size());
+        for (auto name : hardlinked_files)
+            LOG_INFO(ctx->log, "hard link file : {}", name);
+
         /// Tracking of hardlinked files required for zero-copy replication.
         /// We don't remove them when we delete last copy of source part because
         /// new part can use them.
@@ -1941,7 +1962,6 @@ bool MutateTask::prepare()
 
         ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices();
         ctx->materialized_statistics = ctx->interpreter->grabMaterializedStatistics();
-        LOG_INFO(ctx->log, "stats number {}", ctx->materialized_statistics.size());
         ctx->materialized_projections = ctx->interpreter->grabMaterializedProjections();
         ctx->mutating_pipeline_builder = ctx->interpreter->execute();
         ctx->updated_header = ctx->interpreter->getUpdatedHeader();
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index b299a7aa873..7ec88eb2e7b 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -580,12 +580,12 @@ static StoragePtr create(const StorageFactory::Arguments & args)
             for (auto & index : args.query.columns_list->indices->children)
                 metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, columns, context));
 
-        if (args.query.columns_list && args.query.columns_list->stats)
-            for (const auto & stat_ast : args.query.columns_list->stats->children)
-            {
-                auto stats = StatisticsDescriptions::getStatisticsFromAST(stat_ast, columns, args.getContext());
-                metadata.statistics.merge(stats);
-            }
+        ///if (args.query.columns_list && args.query.columns_list->stats)
+        ///    for (const auto & stat_ast : args.query.columns_list->stats->children)
+        ///    {
+        ///        auto stats = StatisticsDescriptions::getStatisticsFromAST(stat_ast, columns, args.getContext());
+        ///        metadata.statistics.merge(stats);
+        ///    }
 
         if (args.query.columns_list && args.query.columns_list->projections)
             for (auto & projection_ast : args.query.columns_list->projections->children)
diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp
index 46322f3ef05..9a2e66d9948 100644
--- a/src/Storages/MutationCommands.cpp
+++ b/src/Storages/MutationCommands.cpp
@@ -77,11 +77,7 @@ std::optional<MutationCommand> MutationCommand::parse(ASTAlterCommand * command,
         res.type = MATERIALIZE_STATISTIC;
         res.partition = command->partition;
         res.predicate = nullptr;
-        for (const ASTPtr & column_ast : command->statistic_decl->as<ASTStatisticDeclaration &>().columns->children)
-        {
-            const auto & column = column_ast->as<ASTIdentifier &>().getColumnName();
-            res.statistic_columns.push_back(column);
-        }
+        res.statistic_columns = command->statistic_decl->as<ASTStatisticDeclaration &>().getColumnNames();
         return res;
     }
     else if (command->type == ASTAlterCommand::MATERIALIZE_PROJECTION)
@@ -138,6 +134,18 @@ std::optional<MutationCommand> MutationCommand::parse(ASTAlterCommand * command,
             res.clear = true;
         return res;
     }
+    else if (parse_alter_commands && command->type == ASTAlterCommand::DROP_STATISTIC)
+    {
+        MutationCommand res;
+        res.ast = command->ptr();
+        res.type = MutationCommand::Type::DROP_STATISTIC;
+        if (command->partition)
+            res.partition = command->partition;
+        if (command->clear_index)
+            res.clear = true;
+        res.statistic_columns = command->statistic_decl->as<ASTStatisticDeclaration &>().getColumnNames();
+        return res;
+    }
     else if (parse_alter_commands && command->type == ASTAlterCommand::DROP_PROJECTION)
     {
         MutationCommand res;
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index b72a1780772..661312d4865 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -4,6 +4,7 @@
 #include <DataTypes/DataTypeNullable.h>
 #include <Storages/Statistic/Statistic.h>
 #include <Storages/StatisticsDescription.h>
+#include <Storages/ColumnsDescription.h>
 #include <Common/Exception.h>
 #include <Storages/MergeTree/RPNBuilder.h>
 
@@ -204,11 +205,12 @@ StatisticPtr MergeTreeStatisticFactory::get(const StatisticDescription & stat) c
     return std::make_shared<TDigestStatistic>(stat);
 }
 
-Statistics MergeTreeStatisticFactory::getMany(const StatisticsDescriptions & stats) const
+Statistics MergeTreeStatisticFactory::getMany(const ColumnsDescription & columns) const
 {
     Statistics result;
-    for (const auto & stat : stats)
-        result.push_back(get(stat));
+    for (const auto & col : columns)
+        if (col.stat)
+            result.push_back(get(*col.stat));
     return result;
 }
 
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index ec912dcc39c..7c95fe48c11 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -4,10 +4,10 @@
 #include <memory>
 #include <optional>
 #include <AggregateFunctions/QuantileTDigest.h>
+#include <Core/Block.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
 #include <Storages/StatisticsDescription.h>
-#include "Common/Exception.h"
 #include <Common/logger_useful.h>
 
 #include <boost/core/noncopyable.hpp>
@@ -105,6 +105,8 @@ public:
     }
 };
 
+class ColumnsDescription;
+
 class MergeTreeStatisticFactory : private boost::noncopyable
 {
 public:
@@ -118,7 +120,7 @@ public:
 
     StatisticPtr get(const StatisticDescription & stat) const;
 
-    Statistics getMany(const StatisticsDescriptions & stats) const;
+    Statistics getMany(const ColumnsDescription & columns) const;
 
     void registerCreator(StatisticType type, Creator creator);
     void registerValidator(StatisticType type, Validator validator);
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index 5d9ea6d670b..9fdfaf9b9b8 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -9,6 +9,7 @@
 #include <Poco/Logger.h>
 #include <Storages/extractKeyExpressionList.h>
 #include <Storages/StatisticsDescription.h>
+#include <Storages/ColumnsDescription.h>
 
 #include <Common/logger_useful.h>
 
@@ -18,42 +19,35 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INCORRECT_QUERY;
-    extern const int ILLEGAL_STATISTIC;
     extern const int LOGICAL_ERROR;
 };
 
-StatisticType StatisticDescription::stringToType(String type)
+StatisticType stringToType(String type)
 {
     if (type == "tdigest")
         return TDigest;
     throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}", type);
 }
 
-namespace
-{
-
-String typeToString(StatisticType type)
+String StatisticDescription::getTypeName() const
 {
     if (type == TDigest)
         return "tdigest";
-    return "unknown";
+    throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}", type);
 }
 
-}
-
-StatisticsDescriptions StatisticsDescriptions::getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context)
+std::vector<StatisticDescription> StatisticDescription::getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
 {
     const auto * stat_definition = definition_ast->as<ASTStatisticDeclaration>();
     if (!stat_definition)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create statistic from non ASTStatisticDeclaration AST");
 
-    LOG_INFO(&Poco::Logger::get("stats_desc"), "stat_def is like {}", stat_definition->dumpTree());
-
-    StatisticsDescriptions stats;
+    std::vector<StatisticDescription> stats;
+    stats.reserve(stat_definition->columns->children.size());
     for (const auto & column_ast : stat_definition->columns->children)
     {
         StatisticDescription stat;
-        stat.type = StatisticDescription::stringToType(Poco::toLower(stat_definition->type));
+        stat.type = stringToType(Poco::toLower(stat_definition->type));
         String column_name = column_ast->as<ASTIdentifier &>().name();
 
         if (!columns.hasPhysical(column_name))
@@ -61,64 +55,39 @@ StatisticsDescriptions StatisticsDescriptions::getStatisticsFromAST(const ASTPtr
 
         const auto & column = columns.getPhysical(column_name);
         stat.column_name = column.name;
-        /// TODO: check if it is numeric.
-        stat.data_type = column.type;
+
+        auto function_node = std::make_shared<ASTFunction>();
+        function_node->name = "STATISTIC";
+        function_node->arguments = std::make_shared<ASTExpressionList>();
+        function_node->arguments->children.push_back(std::make_shared<ASTIdentifier>(stat_definition->type));
+        function_node->children.push_back(function_node->arguments);
+
+        stat.ast = function_node;
+
         stats.push_back(stat);
     }
 
     if (stats.empty())
         throw Exception(ErrorCodes::INCORRECT_QUERY, "Empty statistic column list");
 
-    LOG_INFO(&Poco::Logger::get("stats_desc"), "there are {} stats", stats.size());
-
-    UNUSED(context);
-
     return stats;
 }
 
-bool StatisticsDescriptions::has(const String & name) const
+String queryToString(const IAST & query);
+
+StatisticDescription StatisticDescription::getStatisticFromColumnDeclaration(const ASTColumnDeclaration & column)
 {
-    for (const auto & statistic : *this)
-        if (statistic.column_name == name)
-            return true;
-    return false;
-}
+    const auto & stat_type_list_ast = column.stat_type->as<ASTFunction &>().arguments;
+    if (stat_type_list_ast->children.size() != 1)
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect only one statistic type for column {}", queryToString(column));
+    const auto & stat_type = stat_type_list_ast->children[0]->as<ASTFunction &>().name;
 
-void StatisticsDescriptions::merge(const StatisticsDescriptions & other)
-{
-    /// Check duplicate
-    for (const auto & old_stat : * this)
-        for (const auto & new_stat : other)
-            if (old_stat.column_name == new_stat.column_name)
-                throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Statistic column {} has existed", old_stat.column_name);
-    insert(end(), other.begin(), other.end());
-}
+    StatisticDescription stat;
+    stat.type = stringToType(Poco::toLower(stat_type));
+    stat.column_name = column.name;
+    stat.ast = column.stat_type;
 
-ASTPtr StatisticsDescriptions::getAST() const
-{
-
-    auto list = std::make_shared<ASTExpressionList>();
-
-    for (const auto & stat : *this)
-    {
-        auto stat_ast = std::make_shared<ASTStatisticDeclaration>();
-        auto cols_ast  = std::make_shared<ASTExpressionList>();
-        auto col_ast  = std::make_shared<ASTIdentifier>(stat.column_name);
-        cols_ast->children.push_back(col_ast);
-        stat_ast->set(stat_ast->columns, cols_ast);
-        stat_ast->type = typeToString(stat.type);
-
-        list->children.push_back(stat_ast);
-    }
-    return list;
-}
-
-String StatisticsDescriptions::toString() const
-{
-    if (empty())
-        return {};
-
-    return serializeAST(*getAST());
+    return stat;
 }
 
 }
diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h
index 2949cac3245..0918433e553 100644
--- a/src/Storages/StatisticsDescription.h
+++ b/src/Storages/StatisticsDescription.h
@@ -1,8 +1,8 @@
 #pragma once
 
 #include <Parsers/IAST_fwd.h>
+#include <Parsers/ASTColumnDeclaration.h>
 #include <base/types.h>
-#include <Storages/ColumnsDescription.h>
 
 namespace DB
 {
@@ -12,6 +12,8 @@ enum StatisticType
     TDigest = 0,
 };
 
+class ColumnsDescription;
+
 struct StatisticDescription
 {
     /// the type of statistic, right now it's only tdigest.
@@ -20,26 +22,15 @@ struct StatisticDescription
     /// Names of statistic columns
     String column_name;
 
-    /// Data types of statistic columns
-    DataTypePtr data_type;
+    ASTPtr ast;
+
+    String getTypeName() const;
 
     StatisticDescription() = default;
 
-    static StatisticType stringToType(String type);
-};
+    static StatisticDescription getStatisticFromColumnDeclaration(const ASTColumnDeclaration & column);
 
-struct StatisticsDescriptions : public std::vector<StatisticDescription>
-{
-    /// Stat with name exists
-    bool has(const String & name) const;
-    /// merge with other Statistics
-    void merge(const StatisticsDescriptions & other);
-
-    ASTPtr getAST() const;
-    /// Convert description to string
-    String toString() const;
-    /// Parse description from string
-    static StatisticsDescriptions getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context);
+    static std::vector<StatisticDescription> getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
 };
 
 }
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index fdaeb87f3e0..af285a953dc 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -4,7 +4,6 @@
 #include <Common/HashTable/HashSet.h>
 #include <Common/quoteString.h>
 #include <Common/StringUtils/StringUtils.h>
-#include "Storages/StatisticsDescription.h"
 #include <Core/ColumnWithTypeAndName.h>
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeEnum.h>
@@ -29,7 +28,6 @@ namespace ErrorCodes
 StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & other)
     : columns(other.columns)
     , secondary_indices(other.secondary_indices)
-    , statistics(other.statistics)
     , constraints(other.constraints)
     , projections(other.projections.clone())
     , minmax_count_projection(
@@ -54,7 +52,6 @@ StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemo
 
     columns = other.columns;
     secondary_indices = other.secondary_indices;
-    statistics = other.statistics;
     constraints = other.constraints;
     projections = other.projections.clone();
     if (other.minmax_count_projection)
@@ -94,11 +91,6 @@ void StorageInMemoryMetadata::setSecondaryIndices(IndicesDescription secondary_i
     secondary_indices = std::move(secondary_indices_);
 }
 
-void StorageInMemoryMetadata::setStatistics(StatisticsDescriptions statistics_)
-{
-    statistics = std::move(statistics_);
-}
-
 void StorageInMemoryMetadata::setConstraints(ConstraintsDescription constraints_)
 {
     constraints = std::move(constraints_);
@@ -154,11 +146,6 @@ const IndicesDescription & StorageInMemoryMetadata::getSecondaryIndices() const
     return secondary_indices;
 }
 
-const StatisticsDescriptions & StorageInMemoryMetadata::getStatistics() const
-{
-    return statistics;
-}
-
 bool StorageInMemoryMetadata::hasSecondaryIndices() const
 {
     return !secondary_indices.empty();
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 1244737ccc6..30b2b303492 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -5,7 +5,6 @@
 #include <Storages/ColumnsDescription.h>
 #include <Storages/ConstraintsDescription.h>
 #include <Storages/IndicesDescription.h>
-#include <Storages/StatisticsDescription.h>
 #include <Storages/ProjectionsDescription.h>
 #include <Storages/KeyDescription.h>
 #include <Storages/SelectQueryDescription.h>
@@ -25,8 +24,6 @@ struct StorageInMemoryMetadata
     ColumnsDescription columns;
     /// Table indices. Currently supported for MergeTree only.
     IndicesDescription secondary_indices;
-
-    StatisticsDescriptions statistics;
     /// Table constraints. Currently supported for MergeTree only.
     ConstraintsDescription constraints;
     /// Table projections. Currently supported for MergeTree only.
@@ -78,9 +75,6 @@ struct StorageInMemoryMetadata
     /// Sets secondary indices
     void setSecondaryIndices(IndicesDescription secondary_indices_);
 
-    /// Sets statistics
-    void setStatistics(StatisticsDescriptions statistics_);
-
     /// Sets constraints
     void setConstraints(ConstraintsDescription constraints_);
 
@@ -111,8 +105,6 @@ struct StorageInMemoryMetadata
     /// Returns secondary indices
     const IndicesDescription & getSecondaryIndices() const;
 
-    const StatisticsDescriptions & getStatistics() const;
-
     /// Has at least one non primary index
     bool hasSecondaryIndices() const;
 
diff --git a/tests/integration/test_manipulate_statistic/__init__.py b/tests/integration/test_manipulate_statistic/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_manipulate_statistic/config/config.xml b/tests/integration/test_manipulate_statistic/config/config.xml
new file mode 100644
index 00000000000..b47f8123499
--- /dev/null
+++ b/tests/integration/test_manipulate_statistic/config/config.xml
@@ -0,0 +1,7 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <allow_experimental_statistic>1</allow_experimental_statistic>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_manipulate_statistic/test.py b/tests/integration/test_manipulate_statistic/test.py
new file mode 100644
index 00000000000..c48bfac19c8
--- /dev/null
+++ b/tests/integration/test_manipulate_statistic/test.py
@@ -0,0 +1,78 @@
+import pytest
+import logging
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance("node1", user_configs=["config/config.xml"], with_zookeeper=True)
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+def check_stat_file_on_disk(node, table, part_name, column_name, exist):
+    part_path = node.query(
+        "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
+            table, part_name
+        )
+    ).strip()
+    output = node.exec_in_container(
+        [
+            "bash",
+            "-c",
+            "find {p} -type f -name statistic_{col}.stat".format(
+                p=part_path, col=column_name
+            ),
+        ],
+        privileged=True,
+    )
+    logging.debug(f"stat file ls in {part_path} for column {column_name}, shows {output}")
+    if exist:
+        assert len(output) != 0
+    else:
+        assert len(output) == 0
+
+def test_single_node(started_cluster):
+    node1.query("DROP TABLE IF EXISTS test_stat")
+
+    node1.query("""
+        CREATE TABLE test_stat(a Int64 STATISTIC(tdigest), b Int64 STATISTIC(tdigest), c Int64 STATISTIC(tdigest))
+        ENGINE = MergeTree() ORDER BY a;
+    """)
+
+    node1.query("INSERT INTO test_stat VALUES (1,2,3), (4,5,6)")
+
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "a", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "b", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "c", True)
+
+    node1.query("ALTER TABLE test_stat DROP STATISTIC a type tdigest")
+
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "a", False)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "b", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "c", True)
+
+    node1.query("ALTER TABLE test_stat CLEAR STATISTIC b, c type tdigest")
+
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_3", "a", False)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_3", "b", False)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_3", "c", False)
+
+    node1.query("ALTER TABLE test_stat MATERIALIZE STATISTIC b, c type tdigest")
+
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "a", False)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "b", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "c", True)
+
+    node1.query("ALTER TABLE test_stat ADD STATISTIC a type tdigest")
+
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "a", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "b", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "c", True)
diff --git a/tests/queries/0_stateless/02864_statistic_exception.sql b/tests/queries/0_stateless/02864_statistic_exception.sql
index 58b43768679..32ac2021d29 100644
--- a/tests/queries/0_stateless/02864_statistic_exception.sql
+++ b/tests/queries/0_stateless/02864_statistic_exception.sql
@@ -2,29 +2,25 @@ DROP TABLE IF EXISTS t1;
 
 CREATE TABLE t1 
 (
-    a Float64,
-    b Int64,
+    a Float64 STATISTIC(tdigest),
+    b Int64 STATISTIC(tdigest),
     pk String,
-    STATISTIC a, b TYPE tdigest,
 ) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY }
 
 SET allow_experimental_statistic = 1;
 
 CREATE TABLE t1 
 (
-    a Float64,
+    a Float64 STATISTIC(tdigest),
     b Int64,
-    pk String,
-    STATISTIC a, a TYPE tdigest,
+    pk String STATISTIC(tdigest),
 ) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTIC }
 
 CREATE TABLE t1 
 (
-    a Float64,
+    a Float64 STATISTIC(tdigest, tdigest(10)),
     b Int64,
-    pk String,
-    STATISTIC a, pk TYPE tdigest,
-) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTIC }
+) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY }
 
 CREATE TABLE t1 
 (
@@ -34,9 +30,12 @@ CREATE TABLE t1
 ) Engine = MergeTree() ORDER BY pk; 
 
 ALTER TABLE t1 ADD STATISTIC a TYPE tdigest;
-ALTER TABLE t1 ADD STATISTIC a TYPE tdigest; -- { serverError INCORRECT_QUERY }
-ALTER TABLE t1 ADD STATISTIC pk tdigest; -- { serverError ILLEGAL_STATISTIC }
-ALTER TABLE t1 DROP STATISTIC b TYPE tdigest; -- { serverError INCORRECT_QUERY }
-ALTER TABLE t1 MATERIALIZE STATISTIC b TYPE tdigest; -- { serverError INCORRECT_QUERY }
+ALTER TABLE t1 ADD STATISTIC a TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }
+ALTER TABLE t1 ADD STATISTIC pk TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }
+ALTER TABLE t1 DROP STATISTIC b TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }
+ALTER TABLE t1 DROP STATISTIC a TYPE tdigest;
+ALTER TABLE t1 DROP STATISTIC a TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }
+ALTER TABLE t1 CLEAR STATISTIC a TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }
+ALTER TABLE t1 MATERIALIZE STATISTIC b TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }
 
 DROP TABLE t1;
diff --git a/tests/queries/0_stateless/02864_statistic_operate.reference b/tests/queries/0_stateless/02864_statistic_operate.reference
index 67f9eb6de15..2726064be00 100644
--- a/tests/queries/0_stateless/02864_statistic_operate.reference
+++ b/tests/queries/0_stateless/02864_statistic_operate.reference
@@ -1,4 +1,4 @@
-CREATE TABLE default.t1\n(\n    `a` Float64,\n    `b` Int64,\n    `pk` String,\n    STATISTIC a TYPE tdigest,\n    STATISTIC b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
+CREATE TABLE default.t1\n(\n    `a` Float64 STATISTIC(tdigest),\n    `b` Int64 STATISTIC(tdigest),\n    `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
 After insert
 SELECT count()
 FROM t1
@@ -12,7 +12,7 @@ PREWHERE (b < 10) AND (a < 10)
 10
 CREATE TABLE default.t1\n(\n    `a` Float64,\n    `b` Int64,\n    `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
 After add statistic
-CREATE TABLE default.t1\n(\n    `a` Float64,\n    `b` Int64,\n    `pk` String,\n    STATISTIC a TYPE tdigest,\n    STATISTIC b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
+CREATE TABLE default.t1\n(\n    `a` Float64 STATISTIC(tdigest),\n    `b` Int64 STATISTIC(tdigest),\n    `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
 After materialize statistic
 SELECT count()
 FROM t1
diff --git a/tests/queries/0_stateless/02864_statistic_operate.sql b/tests/queries/0_stateless/02864_statistic_operate.sql
index 66fc4a9ec34..e0a13afab84 100644
--- a/tests/queries/0_stateless/02864_statistic_operate.sql
+++ b/tests/queries/0_stateless/02864_statistic_operate.sql
@@ -5,10 +5,9 @@ SET allow_statistic_optimize = 1;
 
 CREATE TABLE t1 
 (
-    a Float64,
-    b Int64,
+    a Float64 STATISTIC(tdigest),
+    b Int64 STATISTIC(tdigest),
     pk String,
-    STATISTIC a, b TYPE tdigest,
 ) Engine = MergeTree() ORDER BY pk
 SETTINGS min_bytes_for_wide_part = 0;
 

From 6c4474d3d123979723a81314b201087443050680 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 26 Sep 2023 22:22:11 +0200
Subject: [PATCH 0049/1097] refine code structure

---
 .../sql-reference/statements/alter/index.md   |   1 +
 .../statements/alter/statistic.md             |  25 ++++
 src/Storages/IStorage.cpp                     |   2 +-
 src/Storages/MergeTree/MergeTreeData.cpp      |   1 +
 .../MergeTree/MergeTreeWhereOptimizer.h       |   2 +-
 src/Storages/Statistic/Estimator.cpp          | 135 ++++++++++++++++++
 src/Storages/Statistic/Estimator.h            | 106 ++++++++++++++
 src/Storages/Statistic/Statistic.cpp          | 129 -----------------
 src/Storages/Statistic/Statistic.h            | 101 -------------
 9 files changed, 270 insertions(+), 232 deletions(-)
 create mode 100644 docs/en/sql-reference/statements/alter/statistic.md
 create mode 100644 src/Storages/Statistic/Estimator.cpp
 create mode 100644 src/Storages/Statistic/Estimator.h

diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md
index dca34d16f25..e76e42a424c 100644
--- a/docs/en/sql-reference/statements/alter/index.md
+++ b/docs/en/sql-reference/statements/alter/index.md
@@ -16,6 +16,7 @@ Most `ALTER TABLE` queries modify table settings or data:
 - [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md)
 - [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md)
 - [TTL](/docs/en/sql-reference/statements/alter/ttl.md)
+- [STATISTIC](../../../sql-reference/statements/alter/statistic.md)
 
 :::note    
 Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md).
diff --git a/docs/en/sql-reference/statements/alter/statistic.md b/docs/en/sql-reference/statements/alter/statistic.md
new file mode 100644
index 00000000000..a4719325997
--- /dev/null
+++ b/docs/en/sql-reference/statements/alter/statistic.md
@@ -0,0 +1,25 @@
+---
+toc_hidden_folder: true
+sidebar_position: 42
+sidebar_label: STATISTIC
+---
+
+# Manipulating Column Statistics
+
+The following operations are available:
+
+-   `ALTER TABLE [db].table ADD STATISTIC (columns list) TYPE type` - Adds statistic description to tables metadata.
+
+-   `ALTER TABLE [db].table DROP STATISTIC (columns list) TYPE type` - Removes statistic description from tables metadata and deletes statistic files from disk.
+
+-   `ALTER TABLE [db].table CLEAR STATISTIC (columns list) TYPE type` - Deletes statistic files from disk.
+
+-   `ALTER TABLE [db.]table MATERIALIZE STATISTIC (columns list) TYPE type` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../../sql-reference/statements/alter/index.md#mutations). 
+
+The first two commands are lightweight in a sense that they only change metadata or remove files.
+
+Also, they are replicated, syncing statistics metadata via ZooKeeper.
+
+:::note    
+Statistic manipulation is supported only for tables with [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../../engines/table-engines/mergetree-family/replication.md) variants).
+:::
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index 47b0cb0df2a..2c9e339cb48 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -12,7 +12,7 @@
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Storages/AlterCommands.h>
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistic/Estimator.h>
 #include <Backups/RestorerFromBackup.h>
 #include <Backups/IBackup.h>
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 290f1d1d987..d909d5adef5 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -73,6 +73,7 @@
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergeTreeDataPartWide.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
+#include <Storages/Statistic/Estimator.h>
 #include <Storages/StorageMergeTree.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/VirtualColumnUtils.h>
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index b08fd627a29..0e0205b2b1e 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -4,9 +4,9 @@
 #include <Interpreters/Context_fwd.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/MergeTree/RPNBuilder.h>
+#include <Storages/Statistic/Estimator.h>
 
 #include <boost/noncopyable.hpp>
-#include "Storages/Statistic/Statistic.h"
 
 #include <memory>
 #include <set>
diff --git a/src/Storages/Statistic/Estimator.cpp b/src/Storages/Statistic/Estimator.cpp
new file mode 100644
index 00000000000..031a38a4171
--- /dev/null
+++ b/src/Storages/Statistic/Estimator.cpp
@@ -0,0 +1,135 @@
+#include <Storages/Statistic/Estimator.h>
+#include <Storages/MergeTree/RPNBuilder.h>
+
+namespace DB
+{
+
+std::optional<std::string> ConditionEstimator::extractSingleColumn(const RPNBuilderTreeNode & node) const
+{
+    if (node.isConstant())
+    {
+        return std::nullopt;
+    }
+
+    if (!node.isFunction())
+    {
+        auto column_name = node.getColumnName();
+        return {column_name};
+    }
+
+    auto function_node = node.toFunctionNode();
+    size_t arguments_size = function_node.getArgumentsSize();
+    std::optional<std::string> result;
+    for (size_t i = 0; i < arguments_size; ++i)
+    {
+        auto function_argument = function_node.getArgumentAt(i);
+        auto subresult = extractSingleColumn(function_argument);
+        if (subresult == std::nullopt)
+            continue;
+        else if (subresult == "")
+            return "";
+        else if (result == std::nullopt)
+            result = subresult;
+        else if (result.value() != subresult.value())
+            return "";
+    }
+    return result;
+}
+
+std::pair<std::string, Float64> ConditionEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const
+{
+    if (!node.isFunction())
+        return {};
+
+    auto function_node = node.toFunctionNode();
+    if (function_node.getArgumentsSize() != 2)
+        return {};
+
+    std::string function_name = function_node.getFunctionName();
+
+    auto lhs_argument = function_node.getArgumentAt(0);
+    auto rhs_argument = function_node.getArgumentAt(1);
+
+    auto lhs_argument_column_name = lhs_argument.getColumnName();
+    auto rhs_argument_column_name = rhs_argument.getColumnName();
+
+    bool lhs_argument_is_column = column_name == (lhs_argument_column_name);
+    bool rhs_argument_is_column = column_name == (rhs_argument_column_name);
+
+    bool lhs_argument_is_constant = lhs_argument.isConstant();
+    bool rhs_argument_is_constant = rhs_argument.isConstant();
+
+    RPNBuilderTreeNode * constant_node = nullptr;
+
+    if (lhs_argument_is_column && rhs_argument_is_constant)
+        constant_node = &rhs_argument;
+    else if (lhs_argument_is_constant && rhs_argument_is_column)
+        constant_node = &lhs_argument;
+    else
+        return {};
+
+    Field output_value;
+    DataTypePtr output_type;
+    if (!constant_node->tryGetConstant(output_value, output_type))
+        return {};
+
+    const auto type = output_value.getType();
+    Float64 value;
+    if (type == Field::Types::Int64)
+        value = output_value.get<Int64>();
+    else if (type == Field::Types::UInt64)
+        value = output_value.get<UInt64>();
+    else if (type == Field::Types::Float64)
+        value = output_value.get<Float64>();
+    else
+        return {};
+    return std::make_pair(function_name, value);
+}
+
+Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node) const
+{
+    auto col = extractSingleColumn(node);
+    if (col == std::nullopt || col == "")
+    {
+        return default_unknown_cond_factor;
+    }
+    auto it = column_estimators.find(col.value());
+
+    /// If there the estimator of the column is not found or there are no data at all,
+    /// we use dummy estimation.
+    bool dummy = total_count == 0;
+    ColumnEstimator estimator;
+    if (it != column_estimators.end())
+    {
+        estimator = it->second;
+    }
+    else
+    {
+        dummy = true;
+    }
+    auto [op, val] = extractBinaryOp(node, col.value());
+    if (op == "equals")
+    {
+        if (val < - threshold || val > threshold)
+            return default_normal_cond_factor;
+        else
+            return default_good_cond_factor;
+    }
+    else if (op == "less" || op == "lessThan")
+    {
+        if (dummy)
+            return default_normal_cond_factor;
+        return estimator.estimateLess(val) / total_count;
+    }
+    else if (op == "greater" || op == "greaterThan")
+    {
+        if (dummy)
+            return default_normal_cond_factor;
+        return estimator.estimateGreater(val) / total_count;
+    }
+    else
+        return default_unknown_cond_factor;
+}
+
+
+}
diff --git a/src/Storages/Statistic/Estimator.h b/src/Storages/Statistic/Estimator.h
new file mode 100644
index 00000000000..a52351f4879
--- /dev/null
+++ b/src/Storages/Statistic/Estimator.h
@@ -0,0 +1,106 @@
+#pragma once
+
+#include <Storages/Statistic/Statistic.h>
+
+namespace DB
+{
+
+class RPNBuilderTreeNode;
+
+class ConditionEstimator
+{
+private:
+
+    static constexpr auto default_good_cond_factor = 0.1;
+    static constexpr auto default_normal_cond_factor = 0.5;
+    static constexpr auto default_unknown_cond_factor = 1.0;
+    /// Conditions like "x = N" are considered good if abs(N) > threshold.
+    /// This is used to assume that condition is likely to have good selectivity.
+    static constexpr auto threshold = 2;
+
+    UInt64 total_count = 0;
+
+    struct PartColumnEstimator
+    {
+        UInt64 part_count = 0;
+
+        std::shared_ptr<TDigestStatistic> t_digest;
+
+        void merge(StatisticPtr statistic)
+        {
+            UInt64 cur_part_count = statistic->count();
+            if (part_count == 0)
+                part_count = cur_part_count;
+
+            if (typeid_cast<TDigestStatistic *>(statistic.get()))
+            {
+                t_digest = std::static_pointer_cast<TDigestStatistic>(statistic);
+            }
+        }
+
+        Float64 estimateLess(Float64 val) const
+        {
+            if (t_digest != nullptr)
+                return t_digest -> estimateLess(val);
+            return part_count * default_normal_cond_factor;
+        }
+
+        Float64 estimateGreator(Float64 val) const
+        {
+            if (t_digest != nullptr)
+                return part_count - t_digest -> estimateLess(val);
+            return part_count * default_normal_cond_factor;
+        }
+    };
+
+    struct ColumnEstimator
+    {
+        std::map<std::string, PartColumnEstimator> estimators;
+
+        void merge(std::string part_name, StatisticPtr statistic)
+        {
+            estimators[part_name].merge(statistic);
+        }
+
+        Float64 estimateLess(Float64 val) const
+        {
+            if (estimators.empty())
+                return default_normal_cond_factor;
+            Float64 result = 0;
+            for (const auto & [key, estimator] : estimators)
+                result += estimator.estimateLess(val);
+            return result;
+        }
+
+        Float64 estimateGreater(Float64 val) const
+        {
+            if (estimators.empty())
+                return default_normal_cond_factor;
+            Float64 result = 0;
+            for (const auto & [key, estimator] : estimators)
+                result += estimator.estimateGreator(val);
+            return result;
+        }
+    };
+
+    std::map<String, ColumnEstimator> column_estimators;
+    std::optional<std::string> extractSingleColumn(const RPNBuilderTreeNode & node) const;
+    std::pair<std::string, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const;
+
+public:
+    ConditionEstimator() = default;
+
+    /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
+    /// Right now we only support simple condition like col = val / col < val
+    Float64 estimateSelectivity(const RPNBuilderTreeNode & node) const;
+
+    void merge(std::string part_name, UInt64 part_count, StatisticPtr statistic)
+    {
+        total_count += part_count;
+        if (statistic != nullptr)
+            column_estimators[statistic->columnName()].merge(part_name, statistic);
+    }
+};
+
+
+}
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index 661312d4865..311172bffc0 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -6,7 +6,6 @@
 #include <Storages/StatisticsDescription.h>
 #include <Storages/ColumnsDescription.h>
 #include <Common/Exception.h>
-#include <Storages/MergeTree/RPNBuilder.h>
 
 namespace DB
 {
@@ -18,134 +17,6 @@ namespace ErrorCodes
     extern const int ILLEGAL_STATISTIC;
 }
 
-
-std::optional<std::string> ConditionEstimator::extractSingleColumn(const RPNBuilderTreeNode & node) const
-{
-    if (node.isConstant())
-    {
-        return std::nullopt;
-    }
-
-    if (!node.isFunction())
-    {
-        auto column_name = node.getColumnName();
-        return {column_name};
-    }
-
-    auto function_node = node.toFunctionNode();
-    size_t arguments_size = function_node.getArgumentsSize();
-    std::optional<std::string> result;
-    for (size_t i = 0; i < arguments_size; ++i)
-    {
-        auto function_argument = function_node.getArgumentAt(i);
-        auto subresult = extractSingleColumn(function_argument);
-        if (subresult == std::nullopt)
-            continue;
-        else if (subresult == "")
-            return "";
-        else if (result == std::nullopt)
-            result = subresult;
-        else if (result.value() != subresult.value())
-            return "";
-    }
-    return result;
-}
-
-std::pair<std::string, Float64> ConditionEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const
-{
-    if (!node.isFunction())
-        return {};
-
-    auto function_node = node.toFunctionNode();
-    if (function_node.getArgumentsSize() != 2)
-        return {};
-
-    std::string function_name = function_node.getFunctionName();
-
-    auto lhs_argument = function_node.getArgumentAt(0);
-    auto rhs_argument = function_node.getArgumentAt(1);
-
-    auto lhs_argument_column_name = lhs_argument.getColumnName();
-    auto rhs_argument_column_name = rhs_argument.getColumnName();
-
-    bool lhs_argument_is_column = column_name == (lhs_argument_column_name);
-    bool rhs_argument_is_column = column_name == (rhs_argument_column_name);
-
-    bool lhs_argument_is_constant = lhs_argument.isConstant();
-    bool rhs_argument_is_constant = rhs_argument.isConstant();
-
-    RPNBuilderTreeNode * constant_node = nullptr;
-
-    if (lhs_argument_is_column && rhs_argument_is_constant)
-        constant_node = &rhs_argument;
-    else if (lhs_argument_is_constant && rhs_argument_is_column)
-        constant_node = &lhs_argument;
-    else
-        return {};
-
-    Field output_value;
-    DataTypePtr output_type;
-    if (!constant_node->tryGetConstant(output_value, output_type))
-        return {};
-
-    const auto type = output_value.getType();
-    Float64 value;
-    if (type == Field::Types::Int64)
-        value = output_value.get<Int64>();
-    else if (type == Field::Types::UInt64)
-        value = output_value.get<UInt64>();
-    else if (type == Field::Types::Float64)
-        value = output_value.get<Float64>();
-    else
-        return {};
-    return std::make_pair(function_name, value);
-}
-
-Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node) const
-{
-    auto col = extractSingleColumn(node);
-    if (col == std::nullopt || col == "")
-    {
-        return default_unknown_cond_factor;
-    }
-    auto it = column_estimators.find(col.value());
-
-    /// If there the estimator of the column is not found or there are no data at all,
-    /// we use dummy estimation.
-    bool dummy = total_count == 0;
-    ColumnEstimator estimator;
-    if (it != column_estimators.end())
-    {
-        estimator = it->second;
-    }
-    else
-    {
-        dummy = true;
-    }
-    auto [op, val] = extractBinaryOp(node, col.value());
-    if (op == "equals")
-    {
-        if (val < - threshold || val > threshold)
-            return default_normal_cond_factor;
-        else
-            return default_good_cond_factor;
-    }
-    else if (op == "less" || op == "lessThan")
-    {
-        if (dummy)
-            return default_normal_cond_factor;
-        return estimator.estimateLess(val) / total_count;
-    }
-    else if (op == "greater" || op == "greaterThan")
-    {
-        if (dummy)
-            return default_normal_cond_factor;
-        return estimator.estimateGreater(val) / total_count;
-    }
-    else
-        return default_unknown_cond_factor;
-}
-
 StatisticPtr TDigestCreator(const StatisticDescription & stat)
 {
     /// TODO: check column data types.
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index 7c95fe48c11..0bb416b9feb 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -76,7 +76,6 @@ public:
     void serialize(WriteBuffer & buf) override
     {
         data.serialize(buf);
-        LOG_DEBUG(&Poco::Logger::get("t-digest"), "serialize into {} data", buf.offset());
     }
 
     void deserialize(ReadBuffer & buf) override
@@ -95,8 +94,6 @@ public:
             Float64 value = column_with_type.column->getFloat64(i);
             data.add(value, 1);
         }
-
-        LOG_DEBUG(&Poco::Logger::get("t-digest"), "write into {} data", size);
     }
 
     UInt64 count() override
@@ -135,102 +132,4 @@ private:
     Validators validators;
 };
 
-class RPNBuilderTreeNode;
-
-class ConditionEstimator
-{
-private:
-
-    static constexpr auto default_good_cond_factor = 0.1;
-    static constexpr auto default_normal_cond_factor = 0.5;
-    static constexpr auto default_unknown_cond_factor = 1.0;
-    /// Conditions like "x = N" are considered good if abs(N) > threshold.
-    /// This is used to assume that condition is likely to have good selectivity.
-    static constexpr auto threshold = 2;
-
-    UInt64 total_count = 0;
-
-    struct PartColumnEstimator
-    {
-        UInt64 part_count = 0;
-
-        std::shared_ptr<TDigestStatistic> t_digest;
-
-        void merge(StatisticPtr statistic)
-        {
-            UInt64 cur_part_count = statistic->count();
-            if (part_count == 0)
-                part_count = cur_part_count;
-
-            if (typeid_cast<TDigestStatistic *>(statistic.get()))
-            {
-                t_digest = std::static_pointer_cast<TDigestStatistic>(statistic);
-            }
-        }
-
-        Float64 estimateLess(Float64 val) const
-        {
-            if (t_digest != nullptr)
-                return t_digest -> estimateLess(val);
-            return part_count * default_normal_cond_factor;
-        }
-
-        Float64 estimateGreator(Float64 val) const
-        {
-            if (t_digest != nullptr)
-                return part_count - t_digest -> estimateLess(val);
-            return part_count * default_normal_cond_factor;
-        }
-    };
-
-    struct ColumnEstimator
-    {
-        std::map<std::string, PartColumnEstimator> estimators;
-
-        void merge(std::string part_name, StatisticPtr statistic)
-        {
-            estimators[part_name].merge(statistic);
-        }
-
-        Float64 estimateLess(Float64 val) const
-        {
-            if (estimators.empty())
-                return default_normal_cond_factor;
-            Float64 result = 0;
-            for (const auto & [key, estimator] : estimators)
-                result += estimator.estimateLess(val);
-            return result;
-        }
-
-        Float64 estimateGreater(Float64 val) const
-        {
-            if (estimators.empty())
-                return default_normal_cond_factor;
-            Float64 result = 0;
-            for (const auto & [key, estimator] : estimators)
-                result += estimator.estimateGreator(val);
-            return result;
-        }
-    };
-
-    std::map<String, ColumnEstimator> column_estimators;
-    std::optional<std::string> extractSingleColumn(const RPNBuilderTreeNode & node) const;
-    std::pair<std::string, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const;
-
-public:
-    ConditionEstimator() = default;
-
-    /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
-    /// Right now we only support simple condition like col = val / col < val
-    Float64 estimateSelectivity(const RPNBuilderTreeNode & node) const;
-
-    void merge(std::string part_name, UInt64 part_count, StatisticPtr statistic)
-    {
-        total_count += part_count;
-        if (statistic != nullptr)
-            column_estimators[statistic->columnName()].merge(part_name, statistic);
-    }
-};
-
-
 }

From 76b9d43dc44e33271500767ebf7d8dc09e8b650b Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Wed, 27 Sep 2023 00:48:47 +0200
Subject: [PATCH 0050/1097] make code beautiful

---
 .../table-engines/mergetree-family/mergetree.md |  2 +-
 docs/en/sql-reference/statements/alter/index.md |  2 +-
 .../sql-reference/statements/alter/statistic.md |  8 ++++----
 src/Access/Common/AccessType.h                  |  2 +-
 src/Storages/MergeTree/MutateTask.cpp           | 11 -----------
 .../MergeTree/registerStorageMergeTree.cpp      |  7 -------
 src/Storages/MutationCommands.cpp               |  1 -
 .../test_manipulate_statistic/test.py           | 17 +++++++++++++----
 8 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index bf68153e2a2..16a9e977c6b 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -44,7 +44,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
     INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
     ...
     PROJECTION projection_name_1 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]),
-    PROJECTION projection_name_2 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]),
+    PROJECTION projection_name_2 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY])
 ) ENGINE = MergeTree()
 ORDER BY expr
 [PARTITION BY expr]
diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md
index e76e42a424c..d28542e0a43 100644
--- a/docs/en/sql-reference/statements/alter/index.md
+++ b/docs/en/sql-reference/statements/alter/index.md
@@ -16,7 +16,7 @@ Most `ALTER TABLE` queries modify table settings or data:
 - [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md)
 - [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md)
 - [TTL](/docs/en/sql-reference/statements/alter/ttl.md)
-- [STATISTIC](../../../sql-reference/statements/alter/statistic.md)
+- [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md)
 
 :::note    
 Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md).
diff --git a/docs/en/sql-reference/statements/alter/statistic.md b/docs/en/sql-reference/statements/alter/statistic.md
index a4719325997..1c2e45b23fd 100644
--- a/docs/en/sql-reference/statements/alter/statistic.md
+++ b/docs/en/sql-reference/statements/alter/statistic.md
@@ -1,6 +1,6 @@
 ---
-toc_hidden_folder: true
-sidebar_position: 42
+slug: /en/sql-reference/statements/alter/statistic
+sidebar_position: 45
 sidebar_label: STATISTIC
 ---
 
@@ -14,12 +14,12 @@ The following operations are available:
 
 -   `ALTER TABLE [db].table CLEAR STATISTIC (columns list) TYPE type` - Deletes statistic files from disk.
 
--   `ALTER TABLE [db.]table MATERIALIZE STATISTIC (columns list) TYPE type` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../../sql-reference/statements/alter/index.md#mutations). 
+-   `ALTER TABLE [db.]table MATERIALIZE STATISTIC (columns list) TYPE type` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). 
 
 The first two commands are lightweight in a sense that they only change metadata or remove files.
 
 Also, they are replicated, syncing statistics metadata via ZooKeeper.
 
 :::note    
-Statistic manipulation is supported only for tables with [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../../engines/table-engines/mergetree-family/replication.md) variants).
+Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
 :::
diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index 4b0ec66043d..2590a50b0dd 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -54,7 +54,7 @@ enum class AccessType
     M(ALTER_ADD_STATISTIC, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTIC) \
     M(ALTER_DROP_STATISTIC, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTIC) \
     M(ALTER_MATERIALIZE_STATISTIC, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTIC) \
-    M(ALTER_STATISTIC, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER ORDER BY or ALTER {ADD|DROP...} INDEX */\
+    M(ALTER_STATISTIC, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\
     \
     M(ALTER_ADD_PROJECTION, "ADD PROJECTION", TABLE, ALTER_PROJECTION) \
     M(ALTER_DROP_PROJECTION, "DROP PROJECTION", TABLE, ALTER_PROJECTION) \
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index f9099cab08b..ebe34aa0702 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -1377,9 +1377,6 @@ private:
             }
         }
 
-        for (auto stat_name : removed_stats)
-            LOG_INFO(ctx->log, "removed stat {}", stat_name);
-
         Statistics stats;
         const auto & columns = ctx->metadata_snapshot->getColumns();
         for (const auto & col : columns)
@@ -1407,9 +1404,6 @@ private:
             }
         }
 
-        for (auto hardlink : entries_to_hardlink)
-            LOG_INFO(ctx->log, "hard link {}", hardlink);
-
         NameSet removed_projections;
         for (const auto & command : ctx->for_file_renames)
         {
@@ -1498,7 +1492,6 @@ private:
 
         ctx->minmax_idx = std::make_shared<IMergeTreeDataPart::MinMaxIndex>();
 
-        LOG_TRACE(ctx->log, "going to write {} stats", stats.size());
         ctx->out = std::make_shared<MergedBlockOutputStream>(
             ctx->new_data_part,
             ctx->metadata_snapshot,
@@ -1671,10 +1664,6 @@ private:
             }
         }
 
-            LOG_INFO(ctx->log, "hard link size : {}", hardlinked_files.size());
-        for (auto name : hardlinked_files)
-            LOG_INFO(ctx->log, "hard link file : {}", name);
-
         /// Tracking of hardlinked files required for zero-copy replication.
         /// We don't remove them when we delete last copy of source part because
         /// new part can use them.
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 7ec88eb2e7b..0a182789311 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -580,13 +580,6 @@ static StoragePtr create(const StorageFactory::Arguments & args)
             for (auto & index : args.query.columns_list->indices->children)
                 metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, columns, context));
 
-        ///if (args.query.columns_list && args.query.columns_list->stats)
-        ///    for (const auto & stat_ast : args.query.columns_list->stats->children)
-        ///    {
-        ///        auto stats = StatisticsDescriptions::getStatisticsFromAST(stat_ast, columns, args.getContext());
-        ///        metadata.statistics.merge(stats);
-        ///    }
-
         if (args.query.columns_list && args.query.columns_list->projections)
             for (auto & projection_ast : args.query.columns_list->projections->children)
             {
diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp
index 9a2e66d9948..03200d0d9fa 100644
--- a/src/Storages/MutationCommands.cpp
+++ b/src/Storages/MutationCommands.cpp
@@ -11,7 +11,6 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Common/typeid_cast.h>
 #include <Common/quoteString.h>
-#include "Parsers/ASTAlterQuery.h"
 #include <Core/Defines.h>
 #include <DataTypes/DataTypeFactory.h>
 
diff --git a/tests/integration/test_manipulate_statistic/test.py b/tests/integration/test_manipulate_statistic/test.py
index c48bfac19c8..f3f059ce4a2 100644
--- a/tests/integration/test_manipulate_statistic/test.py
+++ b/tests/integration/test_manipulate_statistic/test.py
@@ -5,7 +5,10 @@ from helpers.cluster import ClickHouseCluster
 
 cluster = ClickHouseCluster(__file__)
 
-node1 = cluster.add_instance("node1", user_configs=["config/config.xml"], with_zookeeper=True)
+node1 = cluster.add_instance(
+    "node1", user_configs=["config/config.xml"], with_zookeeper=True
+)
+
 
 @pytest.fixture(scope="module")
 def started_cluster():
@@ -17,6 +20,7 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
+
 def check_stat_file_on_disk(node, table, part_name, column_name, exist):
     part_path = node.query(
         "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
@@ -33,19 +37,24 @@ def check_stat_file_on_disk(node, table, part_name, column_name, exist):
         ],
         privileged=True,
     )
-    logging.debug(f"stat file ls in {part_path} for column {column_name}, shows {output}")
+    logging.debug(
+        f"stat file ls in {part_path} for column {column_name}, shows {output}"
+    )
     if exist:
         assert len(output) != 0
     else:
         assert len(output) == 0
 
+
 def test_single_node(started_cluster):
     node1.query("DROP TABLE IF EXISTS test_stat")
 
-    node1.query("""
+    node1.query(
+        """
         CREATE TABLE test_stat(a Int64 STATISTIC(tdigest), b Int64 STATISTIC(tdigest), c Int64 STATISTIC(tdigest))
         ENGINE = MergeTree() ORDER BY a;
-    """)
+    """
+    )
 
     node1.query("INSERT INTO test_stat VALUES (1,2,3), (4,5,6)")
 

From 15fe392fed647ece1d18fea8d2a6b8197c1a9ae6 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 27 Sep 2023 10:13:23 +0000
Subject: [PATCH 0051/1097] no update network_interfaces

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Common/isLocalAddress.cpp | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 772e0363904..1e6cfb1c592 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -79,33 +79,6 @@ struct NetworkInterfaces : public boost::noncopyable
     {
         freeifaddrs(ifaddr);
     }
-
-    static std::shared_ptr<const NetworkInterfaces> instance()
-    {
-        static constexpr int NET_INTERFACE_VALID_PERIOD_MS = 30000;
-        static std::shared_ptr<const NetworkInterfaces> nf = std::make_shared<const NetworkInterfaces>();
-        static std::atomic<std::chrono::steady_clock::time_point> last_updated_time = std::chrono::steady_clock::now();
-        static std::shared_mutex nf_mtx;
-
-        auto now = std::chrono::steady_clock::now();
-
-        if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time.load()).count() > NET_INTERFACE_VALID_PERIOD_MS)
-        {
-            std::unique_lock lock(nf_mtx);
-            /// It's possible that last_updated_time after we get lock
-            if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time.load()).count() > NET_INTERFACE_VALID_PERIOD_MS)
-            {
-                nf = std::make_shared<const NetworkInterfaces>();
-                last_updated_time.store(now);
-            }
-            return nf;
-        }
-        else
-        {
-            std::shared_lock lock(nf_mtx);
-            return nf;
-        }
-    }
 };
 
 }
@@ -143,7 +116,8 @@ bool isLocalAddress(const Poco::Net::IPAddress & address)
         }
     }
 
-    return NetworkInterfaces::instance()->hasAddress(address);
+    static NetworkInterfaces network_interfaces;
+    return network_interfaces.hasAddress(address);
 }
 
 
From 2ce527364dd6ee143882b02b2348b89eb4bfcb6c Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 27 Sep 2023 18:21:15 +0800
Subject: [PATCH 0052/1097] remove unnecessary headers

---
 src/Common/isLocalAddress.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 1e6cfb1c592..86259eb1ba4 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -1,11 +1,7 @@
 #include <Common/isLocalAddress.h>
 
 #include <ifaddrs.h>
-#include <chrono>
 #include <cstring>
-#include <memory>
-#include <mutex>
-#include <shared_mutex>
 #include <optional>
 #include <base/types.h>
 #include <boost/core/noncopyable.hpp>

From 6ce77b07f72961c0f5b47c7a2113134c00db102d Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Wed, 27 Sep 2023 17:59:53 +0200
Subject: [PATCH 0053/1097] refine exception messages

---
 src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp         | 2 +-
 src/Storages/Statistic/tests/gtest_stats.cpp               | 4 ++--
 src/Storages/StatisticsDescription.cpp                     | 4 ++--
 .../00002_log_and_exception_messages_formatting.sql        | 1 +
 tests/queries/0_stateless/02864_statistic_exception.sql    | 7 +++++++
 5 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 59902a6a115..0cac051bb2c 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -275,7 +275,7 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
             cond.selectivity = estimator.estimateSelectivity(node);
 
             if (node.getASTNode() != nullptr)
-                LOG_DEBUG(log, "Condition {} has selectivity {}", node.getASTNode()->dumpTree(), cond.selectivity);
+                LOG_TEST(log, "Condition {} has selectivity {}", node.getASTNode()->dumpTree(), cond.selectivity);
         }
 
         if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere)
diff --git a/src/Storages/Statistic/tests/gtest_stats.cpp b/src/Storages/Statistic/tests/gtest_stats.cpp
index b3070040c5c..52e60a7b030 100644
--- a/src/Storages/Statistic/tests/gtest_stats.cpp
+++ b/src/Storages/Statistic/tests/gtest_stats.cpp
@@ -20,8 +20,8 @@ TEST(Statistic, TDigestLessThan)
 
         DB::QuantileTDigest<Int64> t_digest;
 
-        for (int i = 0; i < data1.size(); i++)
-            t_digest.add(data1[i]);
+        for (Int64 i : data1)
+            t_digest.add(i);
 
         t_digest.compress();
 
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index 9fdfaf9b9b8..a427fb6a7cd 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -26,14 +26,14 @@ StatisticType stringToType(String type)
 {
     if (type == "tdigest")
         return TDigest;
-    throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}", type);
+    throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}. We only support statistic type `tdigest` right now.", type);
 }
 
 String StatisticDescription::getTypeName() const
 {
     if (type == TDigest)
         return "tdigest";
-    throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}", type);
+    throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}. We only support statistic type `tdigest` right now.", type);
 }
 
 std::vector<StatisticDescription> StatisticDescription::getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index f4ec9b79a4c..aa7603f8c35 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -46,6 +46,7 @@ create temporary table known_short_messages (s String) as select * from (select
 'Attempt to read after eof', 'String size is too big ({}), maximum: {}',
 'Processed: {}%', 'Creating {}: {}', 'Table {}.{} doesn''t exist', 'Invalid cache key hex: {}',
 'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64'
+'Unknown statistic column: {}'
 ] as arr) array join arr;
 
 -- Check that we don't have too many short meaningless message patterns.
diff --git a/tests/queries/0_stateless/02864_statistic_exception.sql b/tests/queries/0_stateless/02864_statistic_exception.sql
index 32ac2021d29..95b94a9bca3 100644
--- a/tests/queries/0_stateless/02864_statistic_exception.sql
+++ b/tests/queries/0_stateless/02864_statistic_exception.sql
@@ -22,6 +22,12 @@ CREATE TABLE t1
     b Int64,
 ) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY }
 
+CREATE TABLE t1 
+(
+    a Float64 STATISTIC(xyz),
+    b Int64,
+) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY }
+
 CREATE TABLE t1 
 (
     a Float64,
@@ -29,6 +35,7 @@ CREATE TABLE t1
     pk String,
 ) Engine = MergeTree() ORDER BY pk; 
 
+ALTER TABLE t1 ADD STATISTIC a TYPE xyz; -- { serverError INCORRECT_QUERY }
 ALTER TABLE t1 ADD STATISTIC a TYPE tdigest;
 ALTER TABLE t1 ADD STATISTIC a TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }
 ALTER TABLE t1 ADD STATISTIC pk TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }

From 18f6fd1e5f8f676cc6d8850b28bdde35f3702289 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Wed, 27 Sep 2023 21:46:20 +0200
Subject: [PATCH 0054/1097] more comments

---
 src/Storages/Statistic/Estimator.h              | 17 +++++++++++------
 src/Storages/Statistic/Statistic.h              |  3 ++-
 ...02_log_and_exception_messages_formatting.sql |  2 +-
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/Storages/Statistic/Estimator.h b/src/Storages/Statistic/Estimator.h
index a52351f4879..53ea46cbfd5 100644
--- a/src/Storages/Statistic/Estimator.h
+++ b/src/Storages/Statistic/Estimator.h
@@ -7,6 +7,7 @@ namespace DB
 
 class RPNBuilderTreeNode;
 
+/// It estimates the selectivity of a condition.
 class ConditionEstimator
 {
 private:
@@ -20,11 +21,13 @@ private:
 
     UInt64 total_count = 0;
 
+    /// Minimum estimator for values in a part. It can contains multiple types of statistics.
+    /// But right now we only have tdigest;
     struct PartColumnEstimator
     {
         UInt64 part_count = 0;
 
-        std::shared_ptr<TDigestStatistic> t_digest;
+        std::shared_ptr<TDigestStatistic> tdigest;
 
         void merge(StatisticPtr statistic)
         {
@@ -34,25 +37,27 @@ private:
 
             if (typeid_cast<TDigestStatistic *>(statistic.get()))
             {
-                t_digest = std::static_pointer_cast<TDigestStatistic>(statistic);
+                tdigest = std::static_pointer_cast<TDigestStatistic>(statistic);
             }
         }
 
         Float64 estimateLess(Float64 val) const
         {
-            if (t_digest != nullptr)
-                return t_digest -> estimateLess(val);
+            if (tdigest != nullptr)
+                return tdigest -> estimateLess(val);
             return part_count * default_normal_cond_factor;
         }
 
         Float64 estimateGreator(Float64 val) const
         {
-            if (t_digest != nullptr)
-                return part_count - t_digest -> estimateLess(val);
+            if (tdigest != nullptr)
+                return part_count - tdigest -> estimateLess(val);
             return part_count * default_normal_cond_factor;
         }
     };
 
+    /// An estimator for a column consists of several PartColumnEstimator.
+    /// We simply get selectivity for every part estimator and combine the result.
     struct ColumnEstimator
     {
         std::map<std::string, PartColumnEstimator> estimators;
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index 0bb416b9feb..f474fffafa5 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -23,7 +23,7 @@ class IStatistic;
 using StatisticPtr = std::shared_ptr<IStatistic>;
 using Statistics = std::vector<StatisticPtr>;
 
-/// Statistic for a column
+/// Statistic contains the distribution of values in a column.
 /// right now we support
 /// - tdigest
 class IStatistic
@@ -60,6 +60,7 @@ protected:
 
 };
 
+/// TDigestStatistic is a kind of histogram.
 class TDigestStatistic : public IStatistic
 {
     QuantileTDigest<Float64> data;
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index aa7603f8c35..38626fc2928 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -45,7 +45,7 @@ create temporary table known_short_messages (s String) as select * from (select
 'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}',
 'Attempt to read after eof', 'String size is too big ({}), maximum: {}',
 'Processed: {}%', 'Creating {}: {}', 'Table {}.{} doesn''t exist', 'Invalid cache key hex: {}',
-'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64'
+'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64',
 'Unknown statistic column: {}'
 ] as arr) array join arr;
 

From d6c1c0e805f024a5f4c7e00ef4d65c6bedd2bfa0 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Thu, 28 Sep 2023 18:18:14 +0200
Subject: [PATCH 0055/1097] refine tests and support drop columns with
 statistic

---
 src/Storages/MergeTree/MutateTask.cpp         | 24 ++++----
 .../test_manipulate_statistic/test.py         | 59 +++++++++++++------
 2 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index ebe34aa0702..ed6bf8f5c7e 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -675,8 +675,9 @@ static NameToNameVector collectFilesForRenames(
         }
         else if (command.type == MutationCommand::Type::DROP_STATISTIC)
         {
-            if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX))
-                add_rename(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX, "");
+            for (const auto & statistic_column_name : command.statistic_columns)
+                if (source_part->checksums.has(STAT_FILE_PREFIX + statistic_column_name + STAT_FILE_SUFFIX))
+                    add_rename(STAT_FILE_PREFIX + statistic_column_name + STAT_FILE_SUFFIX, "");
         }
         else if (isWidePart(source_part))
         {
@@ -696,6 +697,10 @@ static NameToNameVector collectFilesForRenames(
 
                 if (auto serialization = source_part->tryGetSerialization(command.column_name))
                     serialization->enumerateStreams(callback);
+
+                /// if we drop a column with statistic, we should also drop the stat file.
+                if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX))
+                    add_rename(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX, "");
             }
             else if (command.type == MutationCommand::Type::RENAME_COLUMN)
             {
@@ -1377,7 +1382,7 @@ private:
             }
         }
 
-        Statistics stats;
+        Statistics stats_to_rewrite;
         const auto & columns = ctx->metadata_snapshot->getColumns();
         for (const auto & col : columns)
         {
@@ -1386,20 +1391,19 @@ private:
 
             if (ctx->materialized_statistics.contains(col.name))
             {
-                stats.push_back(MergeTreeStatisticFactory::instance().get(*col.stat));
+                stats_to_rewrite.push_back(MergeTreeStatisticFactory::instance().get(*col.stat));
             }
             else
             {
+                /// We only hard-link statistics which
+                /// 1. not in `DROP STATISTIC` statement. It is filtered by `removed_stats`
+                /// 2. not in column list anymore, including `DROP COLUMN`. It is not touched by this loop.
                 auto prefix = fmt::format("{}{}.", STAT_FILE_PREFIX, col.name);
                 auto it = ctx->source_part->checksums.files.upper_bound(prefix);
-                while (it != ctx->source_part->checksums.files.end())
+                if (it != ctx->source_part->checksums.files.end() && startsWith(it->first, prefix))
                 {
-                    if (!startsWith(it->first, prefix))
-                        break;
-
                     entries_to_hardlink.insert(it->first);
                     ctx->existing_indices_checksums.addFile(it->first, it->second.file_size, it->second.file_hash);
-                    ++it;
                 }
             }
         }
@@ -1497,7 +1501,7 @@ private:
             ctx->metadata_snapshot,
             ctx->new_data_part->getColumns(),
             skip_indices,
-            stats,
+            stats_to_rewrite,
             ctx->compression_codec,
             ctx->txn,
             /*reset_columns=*/ true,
diff --git a/tests/integration/test_manipulate_statistic/test.py b/tests/integration/test_manipulate_statistic/test.py
index f3f059ce4a2..8bb4d19a23a 100644
--- a/tests/integration/test_manipulate_statistic/test.py
+++ b/tests/integration/test_manipulate_statistic/test.py
@@ -27,6 +27,9 @@ def check_stat_file_on_disk(node, table, part_name, column_name, exist):
             table, part_name
         )
     ).strip()
+
+    assert len(part_path) != 0
+
     output = node.exec_in_container(
         [
             "bash",
@@ -38,7 +41,7 @@ def check_stat_file_on_disk(node, table, part_name, column_name, exist):
         privileged=True,
     )
     logging.debug(
-        f"stat file ls in {part_path} for column {column_name}, shows {output}"
+        f"Checking stat file in {part_path} for column {column_name}, got {output}"
     )
     if exist:
         assert len(output) != 0
@@ -46,16 +49,7 @@ def check_stat_file_on_disk(node, table, part_name, column_name, exist):
         assert len(output) == 0
 
 
-def test_single_node(started_cluster):
-    node1.query("DROP TABLE IF EXISTS test_stat")
-
-    node1.query(
-        """
-        CREATE TABLE test_stat(a Int64 STATISTIC(tdigest), b Int64 STATISTIC(tdigest), c Int64 STATISTIC(tdigest))
-        ENGINE = MergeTree() ORDER BY a;
-    """
-    )
-
+def run_test_single_node(started_cluster):
     node1.query("INSERT INTO test_stat VALUES (1,2,3), (4,5,6)")
 
     check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "a", True)
@@ -76,12 +70,43 @@ def test_single_node(started_cluster):
 
     node1.query("ALTER TABLE test_stat MATERIALIZE STATISTIC b, c type tdigest")
 
-    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "a", False)
-    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "b", True)
-    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "c", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_4", "a", False)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_4", "b", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_4", "c", True)
 
     node1.query("ALTER TABLE test_stat ADD STATISTIC a type tdigest")
+    node1.query("ALTER TABLE test_stat MATERIALIZE STATISTIC a type tdigest")
 
-    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "a", True)
-    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "b", True)
-    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "c", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_5", "a", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_5", "b", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_5", "c", True)
+
+    node1.query("ALTER TABLE test_stat DROP COLUMN c")
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_6", "a", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_6", "b", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_6", "c", False)
+
+
+def test_single_node_wide(started_cluster):
+    node1.query("DROP TABLE IF EXISTS test_stat")
+
+    node1.query(
+        """
+        CREATE TABLE test_stat(a Int64 STATISTIC(tdigest), b Int64 STATISTIC(tdigest), c Int64 STATISTIC(tdigest))
+        ENGINE = MergeTree() ORDER BY a
+        SETTINGS min_bytes_for_wide_part = 0;
+    """
+    )
+    run_test_single_node(started_cluster)
+
+
+def test_single_node_normal(started_cluster):
+    node1.query("DROP TABLE IF EXISTS test_stat")
+
+    node1.query(
+        """
+        CREATE TABLE test_stat(a Int64 STATISTIC(tdigest), b Int64 STATISTIC(tdigest), c Int64 STATISTIC(tdigest))
+        ENGINE = MergeTree() ORDER BY a;
+    """
+    )
+    run_test_single_node(started_cluster)

From a72541d93f1cf46aa0fb577bcb59e5f2845c7a81 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 29 Sep 2023 17:39:38 +0200
Subject: [PATCH 0056/1097] Improvement for big reads

---
 src/Common/ProfileEvents.cpp                  |  1 +
 src/Core/Settings.h                           |  1 +
 .../IO/CachedOnDiskReadBufferFromFile.cpp     | 42 ++++++++++++-------
 src/Disks/IO/CachedOnDiskReadBufferFromFile.h |  6 ++-
 src/IO/ReadSettings.h                         |  1 +
 src/Interpreters/Cache/FileCache.cpp          | 14 ++++++-
 src/Interpreters/Cache/FileCache.h            |  4 +-
 src/Interpreters/Cache/FileCache_fwd.h        |  2 +-
 src/Interpreters/Cache/FileSegment.cpp        |  5 +--
 src/Interpreters/Cache/FileSegment.h          |  5 +--
 src/Interpreters/Context.cpp                  |  1 +
 11 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 0df0bc89b38..033335bd5ec 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -415,6 +415,7 @@ The server successfully detected this situation and will download merged part fr
     M(FilesystemCacheEvictMicroseconds, "Filesystem cache eviction time") \
     M(FilesystemCacheGetOrSetMicroseconds, "Filesystem cache getOrSet() time") \
     M(FilesystemCacheGetMicroseconds, "Filesystem cache get() time") \
+    M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \
     M(FileSegmentWaitMicroseconds, "Wait on DOWNLOADING state") \
     M(FileSegmentCompleteMicroseconds, "Duration of FileSegment::complete() in filesystem cache") \
     M(FileSegmentLockMicroseconds, "Lock file segment time") \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 1fada4ae569..416d8f481b1 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -717,6 +717,7 @@ class IColumn;
     M(Bool, skip_download_if_exceeds_query_cache, true, "Skip download from remote filesystem if exceeds query cache size", 0) \
     M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \
     M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \
+    M(UInt64, filesystem_cache_getorset_batch_size, 100, "A batch size for holding file segments for a single read range", 0) \
     \
     M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \
     M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 1cfdd96b271..3c16d3d9ae2 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -114,30 +114,40 @@ void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog(
     cache_log->add(std::move(elem));
 }
 
-void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
+bool CachedOnDiskReadBufferFromFile::nextFileSegmentsBatch()
+{
+    size_t size = getRemainingSizeToRead();
+    if (!size)
+        return false;
+
+    if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
+    {
+        file_segments = cache->get(cache_key, file_offset_of_buffer_end, size, settings.filesystem_cache_getorset_batch_size);
+    }
+    else
+    {
+        CreateFileSegmentSettings create_settings(FileSegmentKind::Regular);
+        file_segments = cache->getOrSet(cache_key, file_offset_of_buffer_end, size, file_size.value(), settings.filesystem_cache_getorset_batch_size, create_settings);
+    }
+    return !file_segments->empty();
+}
+
+void CachedOnDiskReadBufferFromFile::initialize()
 {
     if (initialized)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Caching buffer already initialized");
 
     implementation_buffer.reset();
 
-    if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
-    {
-        file_segments = cache->get(cache_key, offset, size);
-    }
-    else
-    {
-        CreateFileSegmentSettings create_settings(FileSegmentKind::Regular);
-        file_segments = cache->getOrSet(cache_key, offset, size, file_size.value(), create_settings);
-    }
-
     /**
      * Segments in returned list are ordered in ascending order and represent a full contiguous
      * interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY.
      */
-    if (file_segments->empty())
+    if (!nextFileSegmentsBatch())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "List of file segments cannot be empty");
 
+    chassert(!file_segments->empty());
+
     LOG_TEST(
         log,
         "Having {} file segments to read: {}, current offset: {}",
@@ -512,7 +522,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext()
     cache_file_reader.reset();
 
     file_segments->popFront();
-    if (file_segments->empty())
+    if (file_segments->empty() && !nextFileSegmentsBatch())
         return false;
 
     current_file_segment = &file_segments->front();
@@ -788,9 +798,9 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
         return false;
 
     if (!initialized)
-        initialize(file_offset_of_buffer_end, getTotalSizeToRead());
+        initialize();
 
-    if (file_segments->empty())
+    if (file_segments->empty() && !nextFileSegmentsBatch())
         return false;
 
     const size_t original_buffer_size = internal_buffer.size();
@@ -1159,7 +1169,7 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
     return new_pos;
 }
 
-size_t CachedOnDiskReadBufferFromFile::getTotalSizeToRead()
+size_t CachedOnDiskReadBufferFromFile::getRemainingSizeToRead()
 {
     /// Last position should be guaranteed to be set, as at least we always know file size.
     if (!read_until_position)
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
index 0b9b01b8a94..f1eea66d41d 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
@@ -63,7 +63,7 @@ public:
 private:
     using ImplementationBufferPtr = std::shared_ptr<ReadBufferFromFileBase>;
 
-    void initialize(size_t offset, size_t size);
+    void initialize();
 
     /**
      * Return a list of file segments ordered in ascending order. This list represents
@@ -85,7 +85,7 @@ private:
 
     bool nextImplStep();
 
-    size_t getTotalSizeToRead();
+    size_t getRemainingSizeToRead();
 
     bool completeFileSegmentAndGetNext();
 
@@ -95,6 +95,8 @@ private:
 
     static bool canStartFromCache(size_t current_offset, const FileSegment & file_segment);
 
+    bool nextFileSegmentsBatch();
+
     Poco::Logger * log;
     FileCache::Key cache_key;
     String source_file_path;
diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h
index 87f249823b2..197ae563d25 100644
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@@ -100,6 +100,7 @@ struct ReadSettings
     bool enable_filesystem_cache_log = false;
     /// Don't populate cache when the read is not part of query execution (e.g. background thread).
     bool avoid_readthrough_cache_outside_query_context = true;
+    size_t filesystem_cache_getorset_batch_size = 100;
 
     size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024);
     bool skip_download_if_exceeds_query_cache = true;
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 3ed2c9c2dd6..bef1f3086df 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -400,6 +400,7 @@ FileCache::getOrSet(
     size_t offset,
     size_t size,
     size_t file_size,
+    size_t file_segments_limit,
     const CreateFileSegmentSettings & settings)
 {
     ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FilesystemCacheGetOrSetMicroseconds);
@@ -432,11 +433,17 @@ FileCache::getOrSet(
     while (!file_segments.empty() && file_segments.back()->range().left >= offset + size)
         file_segments.pop_back();
 
+    if (file_segments_limit)
+    {
+        while (file_segments.size() > file_segments_limit)
+            file_segments.pop_back();
+    }
+
     chassert(!file_segments.empty());
     return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
 }
 
-FileSegmentsHolderPtr FileCache::get(const Key & key, size_t offset, size_t size)
+FileSegmentsHolderPtr FileCache::get(const Key & key, size_t offset, size_t size, size_t file_segments_limit)
 {
     ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FilesystemCacheGetMicroseconds);
 
@@ -454,6 +461,11 @@ FileSegmentsHolderPtr FileCache::get(const Key & key, size_t offset, size_t size
             fillHolesWithEmptyFileSegments(
                 *locked_key, file_segments, range, /* fill_with_detached */true, CreateFileSegmentSettings{});
 
+            if (file_segments_limit)
+            {
+                while (file_segments.size() > file_segments_limit)
+                    file_segments.pop_back();
+            }
             return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
         }
     }
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index b5c2fa28f4b..1a1a25cd9c1 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -85,7 +85,7 @@ public:
      * it is guaranteed that these file segments are not removed from cache.
      */
     FileSegmentsHolderPtr
-    getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, const CreateFileSegmentSettings & settings);
+    getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, size_t file_segments_limit, const CreateFileSegmentSettings & settings);
 
     /**
      * Segments in returned list are ordered in ascending order and represent a full contiguous
@@ -96,7 +96,7 @@ public:
      * with the destruction of the holder, while in getOrSet() EMPTY file segments can eventually change
      * it's state (and become DOWNLOADED).
      */
-    FileSegmentsHolderPtr get(const Key & key, size_t offset, size_t size);
+    FileSegmentsHolderPtr get(const Key & key, size_t offset, size_t size, size_t file_segments_limit);
 
     FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
 
diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h
index 3e7150ad253..1f61617668e 100644
--- a/src/Interpreters/Cache/FileCache_fwd.h
+++ b/src/Interpreters/Cache/FileCache_fwd.h
@@ -4,7 +4,7 @@
 namespace DB
 {
 
-static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi
+static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 16 * 1024 * 1024; /// 16Mi
 static constexpr int FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT = 4 * 1024 * 1024; /// 4Mi
 static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 2;
 static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 1;
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index bb3216cb20e..a351df33b4b 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -23,6 +23,7 @@ namespace ProfileEvents
     extern const Event FileSegmentWriteMicroseconds;
     extern const Event FileSegmentUseMicroseconds;
     extern const Event FileSegmentHolderCompleteMicroseconds;
+    extern const Event FilesystemCacheUnusedHoldFileSegments;
 }
 
 namespace DB
@@ -916,11 +917,9 @@ FileSegments::iterator FileSegmentsHolder::completeAndPopFrontImpl()
 
 FileSegmentsHolder::~FileSegmentsHolder()
 {
+    ProfileEvents::increment(ProfileEvents::FilesystemCacheUnusedHoldFileSegments, file_segments.size());
     ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FileSegmentHolderCompleteMicroseconds);
 
-    if (!complete_on_dtor)
-        return;
-
     for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
         file_segment_it = completeAndPopFrontImpl();
 }
diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index 8948b67fe2a..7c145664fe0 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -320,8 +320,8 @@ struct FileSegmentsHolder : private boost::noncopyable
 {
     FileSegmentsHolder() = default;
 
-    explicit FileSegmentsHolder(FileSegments && file_segments_, bool complete_on_dtor_ = true)
-        : file_segments(std::move(file_segments_)), complete_on_dtor(complete_on_dtor_) {}
+    explicit FileSegmentsHolder(FileSegments && file_segments_)
+        : file_segments(std::move(file_segments_)) {}
 
     ~FileSegmentsHolder();
 
@@ -351,7 +351,6 @@ struct FileSegmentsHolder : private boost::noncopyable
 
 private:
     FileSegments file_segments{};
-    const bool complete_on_dtor = true;
 
     FileSegments::iterator completeAndPopFrontImpl();
 };
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 58d60c640e7..807215412c2 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4591,6 +4591,7 @@ ReadSettings Context::getReadSettings() const
     res.enable_filesystem_cache = settings.enable_filesystem_cache;
     res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache;
     res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log;
+    res.filesystem_cache_getorset_batch_size = settings.filesystem_cache_getorset_batch_size;
 
     res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size;
     res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache;

From ba878fb43a6f830361800e8496e16625230031b7 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Wed, 4 Oct 2023 00:58:26 +0200
Subject: [PATCH 0057/1097] support rename

---
 src/Storages/AlterCommands.cpp                |  2 +
 .../MergeTreeDataPartWriterOnDisk.cpp         |  2 +-
 .../MergeTree/MergeTreeWhereOptimizer.cpp     |  2 +-
 src/Storages/MergeTree/MutateTask.cpp         | 44 +++++++++++++------
 src/Storages/Statistic/Statistic.cpp          | 13 +++++-
 src/Storages/Statistic/Statistic.h            | 16 +------
 .../test_manipulate_statistic/test.py         | 12 +++++
 .../02864_statistic_operate.reference         |  6 +++
 .../0_stateless/02864_statistic_operate.sql   |  7 +++
 9 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index febeab5ad93..6e9e034c2a9 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -727,6 +727,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
                     rename_visitor.visit(column_to_modify.default_desc.expression);
                 if (column_to_modify.ttl)
                     rename_visitor.visit(column_to_modify.ttl);
+                if (column_to_modify.name == column_name && column_to_modify.stat)
+                    column_to_modify.stat->column_name = rename_to;
             });
         }
         if (metadata.table_ttl.definition_ast)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index a5a86256c9d..6e544b4a35a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -331,7 +331,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block
 {
     for (const auto & stat_ptr : stats)
     {
-        stat_ptr->update(block);
+        stat_ptr->update(block.getByName(stat_ptr->columnName()).column);
     }
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 0cac051bb2c..2322fcc9867 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -275,7 +275,7 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
             cond.selectivity = estimator.estimateSelectivity(node);
 
             if (node.getASTNode() != nullptr)
-                LOG_TEST(log, "Condition {} has selectivity {}", node.getASTNode()->dumpTree(), cond.selectivity);
+                LOG_TRACE(log, "Condition {} has selectivity {}", node.getASTNode()->dumpTree(), cond.selectivity);
         }
 
         if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere)
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index ed6bf8f5c7e..58f73033a3b 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -733,6 +733,10 @@ static NameToNameVector collectFilesForRenames(
 
                 if (auto serialization = source_part->tryGetSerialization(command.column_name))
                     serialization->enumerateStreams(callback);
+
+                /// if we rename a column with statistic, we should also rename the stat file.
+                if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX))
+                    add_rename(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX, STAT_FILE_PREFIX + command.rename_to + STAT_FILE_SUFFIX);
             }
             else if (command.type == MutationCommand::Type::READ_COLUMN)
             {
@@ -919,7 +923,7 @@ struct MutationContext
     std::set<MergeTreeIndexPtr> indices_to_recalc;
     std::set<StatisticPtr> stats_to_recalc;
     std::set<ProjectionDescriptionRawPtr> projections_to_recalc;
-    MergeTreeData::DataPart::Checksums existing_indices_checksums;
+    MergeTreeData::DataPart::Checksums existing_indices_stats_checksums;
     NameSet files_to_skip;
     NameToNameVector files_to_rename;
 
@@ -1340,6 +1344,8 @@ private:
 
         NameSet removed_indices;
         NameSet removed_stats;
+        /// A stat file need to be renamed iff the column is renamed.
+        NameToNameMap renamed_stats;
         for (const auto & command : ctx->for_file_renames)
         {
             if (command.type == MutationCommand::DROP_INDEX)
@@ -1347,6 +1353,9 @@ private:
             else if (command.type == MutationCommand::DROP_STATISTIC)
                 for (const auto & column_name : command.statistic_columns)
                     removed_stats.insert(column_name);
+            else if (command.type == MutationCommand::RENAME_COLUMN
+                     && ctx->source_part->checksums.files.contains(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX))
+                renamed_stats[STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX] = STAT_FILE_PREFIX + command.rename_to + STAT_FILE_SUFFIX;
         }
 
         bool is_full_part_storage = isFullPartStorage(ctx->new_data_part->getDataPartStorage());
@@ -1376,7 +1385,7 @@ private:
                         break;
 
                     entries_to_hardlink.insert(it->first);
-                    ctx->existing_indices_checksums.addFile(it->first, it->second.file_size, it->second.file_hash);
+                    ctx->existing_indices_stats_checksums.addFile(it->first, it->second.file_size, it->second.file_hash);
                     ++it;
                 }
             }
@@ -1395,15 +1404,15 @@ private:
             }
             else
             {
-                /// We only hard-link statistics which
-                /// 1. not in `DROP STATISTIC` statement. It is filtered by `removed_stats`
-                /// 2. not in column list anymore, including `DROP COLUMN`. It is not touched by this loop.
-                auto prefix = fmt::format("{}{}.", STAT_FILE_PREFIX, col.name);
-                auto it = ctx->source_part->checksums.files.upper_bound(prefix);
-                if (it != ctx->source_part->checksums.files.end() && startsWith(it->first, prefix))
+                /// We do not hard-link statistics which
+                /// 1. In `DROP STATISTIC` statement. It is filtered by `removed_stats`
+                /// 2. Not in column list anymore, including `DROP COLUMN`. It is not touched by this loop.
+                String stat_file_name = STAT_FILE_PREFIX + col.name + STAT_FILE_SUFFIX;
+                auto it = ctx->source_part->checksums.files.find(stat_file_name);
+                if (it != ctx->source_part->checksums.files.end())
                 {
                     entries_to_hardlink.insert(it->first);
-                    ctx->existing_indices_checksums.addFile(it->first, it->second.file_size, it->second.file_hash);
+                    ctx->existing_indices_stats_checksums.addFile(it->first, it->second.file_size, it->second.file_hash);
                 }
             }
         }
@@ -1441,9 +1450,18 @@ private:
         for (auto it = ctx->source_part->getDataPartStorage().iterate(); it->isValid(); it->next())
         {
             if (!entries_to_hardlink.contains(it->name()))
-                continue;
-
-            if (it->isFile())
+            {
+                if (renamed_stats.contains(it->name()))
+                {
+                    ctx->new_data_part->getDataPartStorage().createHardLinkFrom(
+                        ctx->source_part->getDataPartStorage(), it->name(), renamed_stats.at(it->name()));
+                    hardlinked_files.insert(it->name());
+                    /// Also we need to "rename" checksums to finalize correctly.
+                    const auto & check_sum = ctx->source_part->checksums.files.at(it->name());
+                    ctx->existing_indices_stats_checksums.addFile(renamed_stats.at(it->name()), check_sum.file_size, check_sum.file_hash);
+                }
+            }
+            else if (it->isFile())
             {
                 ctx->new_data_part->getDataPartStorage().createHardLinkFrom(
                     ctx->source_part->getDataPartStorage(), it->name(), it->name());
@@ -1526,7 +1544,7 @@ private:
         ctx->mutating_pipeline.reset();
 
         static_pointer_cast<MergedBlockOutputStream>(ctx->out)->finalizePart(
-            ctx->new_data_part, ctx->need_sync, nullptr, &ctx->existing_indices_checksums);
+            ctx->new_data_part, ctx->need_sync, nullptr, &ctx->existing_indices_stats_checksums);
         ctx->out.reset();
     }
 
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index 311172bffc0..d3edcf73422 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -17,9 +17,20 @@ namespace ErrorCodes
     extern const int ILLEGAL_STATISTIC;
 }
 
+void TDigestStatistic::update(const ColumnPtr & column)
+{
+    size_t size = column->size();
+
+    for (size_t i = 0; i < size; ++i)
+    {
+        /// TODO: support more types.
+        Float64 value = column->getFloat64(i);
+        data.add(value, 1);
+    }
+}
+
 StatisticPtr TDigestCreator(const StatisticDescription & stat)
 {
-    /// TODO: check column data types.
     return StatisticPtr(new TDigestStatistic(stat));
 }
 
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index f474fffafa5..0fa8ff8ff3d 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -35,7 +35,6 @@ public:
     }
     virtual ~IStatistic() = default;
 
-    /// statistic_[col_name]_[type]
     String getFileName() const
     {
         return STAT_FILE_PREFIX + columnName();
@@ -50,7 +49,7 @@ public:
 
     virtual void deserialize(ReadBuffer & buf) = 0;
 
-    virtual void update(const Block & block) = 0;
+    virtual void update(const ColumnPtr & column) = 0;
 
     virtual UInt64 count() = 0;
 
@@ -84,18 +83,7 @@ public:
         data.deserialize(buf);
     }
 
-    void update(const Block & block) override
-    {
-        const auto & column_with_type = block.getByName(columnName());
-        size_t size = block.rows();
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            /// TODO: support more types.
-            Float64 value = column_with_type.column->getFloat64(i);
-            data.add(value, 1);
-        }
-    }
+    void update(const ColumnPtr & column) override;
 
     UInt64 count() override
     {
diff --git a/tests/integration/test_manipulate_statistic/test.py b/tests/integration/test_manipulate_statistic/test.py
index 8bb4d19a23a..f1c00a61b07 100644
--- a/tests/integration/test_manipulate_statistic/test.py
+++ b/tests/integration/test_manipulate_statistic/test.py
@@ -86,6 +86,18 @@ def run_test_single_node(started_cluster):
     check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_6", "b", True)
     check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_6", "c", False)
 
+    node1.query("ALTER TABLE test_stat RENAME COLUMN b TO c")
+
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_7", "a", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_7", "b", False)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_7", "c", True)
+
+    node1.query("ALTER TABLE test_stat RENAME COLUMN c TO b")
+
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_8", "a", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_8", "b", True)
+    check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_8", "c", False)
+
 
 def test_single_node_wide(started_cluster):
     node1.query("DROP TABLE IF EXISTS test_stat")
diff --git a/tests/queries/0_stateless/02864_statistic_operate.reference b/tests/queries/0_stateless/02864_statistic_operate.reference
index 2726064be00..7fad7c810c1 100644
--- a/tests/queries/0_stateless/02864_statistic_operate.reference
+++ b/tests/queries/0_stateless/02864_statistic_operate.reference
@@ -23,3 +23,9 @@ SELECT count()
 FROM t1
 PREWHERE (a < 10) AND (b < 10)
 20
+CREATE TABLE default.t1\n(\n    `a` Float64 STATISTIC(tdigest),\n    `c` Int64 STATISTIC(tdigest),\n    `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
+After rename
+SELECT count()
+FROM t1
+PREWHERE (a < 10) AND (c < 10)
+20
diff --git a/tests/queries/0_stateless/02864_statistic_operate.sql b/tests/queries/0_stateless/02864_statistic_operate.sql
index e0a13afab84..29bd213f04a 100644
--- a/tests/queries/0_stateless/02864_statistic_operate.sql
+++ b/tests/queries/0_stateless/02864_statistic_operate.sql
@@ -47,4 +47,11 @@ SELECT 'After merge';
 EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
 SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
 
+ALTER TABLE t1 RENAME COLUMN b TO c;
+SHOW CREATE TABLE t1;
+
+SELECT 'After rename';
+EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE c < 10 and a < 10;
+SELECT count(*) FROM t1 WHERE c < 10 and a < 10;
+
 DROP TABLE IF EXISTS t1;

From 020d76a383b3a489a3868c3a7c1d770cfaba5549 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 24 May 2023 21:59:46 +0000
Subject: [PATCH 0058/1097] merge_row_policy: initial

---
 src/Interpreters/InterpreterSelectQuery.cpp | 14 ++++++++-
 src/Storages/StorageMerge.cpp               | 33 ++++++++++++++++++++-
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index e1faa8c8958..6d8028f628b 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -93,6 +93,7 @@
 #include <Common/typeid_cast.h>
 #include <Common/ProfileEvents.h>
 
+#include <Common/logger_useful.h>
 
 namespace ProfileEvents
 {
@@ -546,7 +547,18 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     std::shared_ptr<TableJoin> table_join = joined_tables.makeTableJoin(query);
 
     if (storage)
+    {
+
+        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), "table name: {}", table_id.getTableName());
+
         row_policy_filter = context->getRowPolicyFilter(table_id.getDatabaseName(), table_id.getTableName(), RowPolicyFilterType::SELECT_FILTER);
+    }
+    else
+    {
+        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), "no storage");
+    }
+
+
 
     StorageView * view = nullptr;
     if (storage)
@@ -832,7 +844,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     if (query.prewhere() && !query.where())
         analysis_result.prewhere_info->need_filter = true;
 
-    if (table_id && got_storage_from_query && !joined_tables.isLeftTableFunction())
+    if (table_id && got_storage_from_query /* && !joined_tables.isLeftTableFunction() */)
     {
         /// The current user should have the SELECT privilege. If this table_id is for a table
         /// function we don't check access rights here because in this case they have been already
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index c14abfc9ab2..e917553d704 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -43,6 +43,8 @@
 #include <base/range.h>
 #include <algorithm>
 
+#include <Common/logger_useful.h>
+
 
 namespace
 {
@@ -515,6 +517,8 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
             }
         }
 
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"), "table name: {}", storage->getStorageID().getTableName());
+
         auto source_pipeline = createSources(
             nested_storage_snaphsot,
             modified_query_info,
@@ -574,6 +578,8 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & quer
         modified_query_info.table_expression = replacement_table_expression;
         modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression);
 
+
+
         auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
         if (storage_snapshot->storage.supportsSubcolumns())
             get_column_options.withSubcolumns();
@@ -594,6 +600,10 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & quer
         }
 
         modified_query_info.query = queryNodeToSelectQuery(modified_query_info.query_tree);
+        TreeRewriterResult new_analyzer_res = *modified_query_info.syntax_analyzer_result;
+        new_analyzer_res.has_explicit_columns = false;
+
+        modified_query_info.syntax_analyzer_result = std::make_shared<TreeRewriterResult>(std::move(new_analyzer_res));
     }
     else
     {
@@ -656,8 +666,9 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         QueryPlan & plan = child_plans.emplace_back();
 
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
-        if (!view || allow_experimental_analyzer)
+        if (/* !view || */ allow_experimental_analyzer)
         {
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "direct storage->read");
             storage->read(plan,
                 real_column_names,
                 storage_snapshot,
@@ -667,6 +678,24 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 max_block_size,
                 UInt32(streams_num));
         }
+        else if (!view)
+        {
+            /// For view storage, we need to rewrite the `modified_query_info.view_query` to optimize read.
+            /// The most intuitive way is to use InterpreterSelectQuery.
+
+            /// Intercept the settings
+            modified_context->setSetting("max_threads", streams_num);
+            modified_context->setSetting("max_streams_to_max_threads_ratio", 1);
+            modified_context->setSetting("max_block_size", max_block_size);
+
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "creating InterpreterSelectQuery 1.0");
+            InterpreterSelectQuery interpreter(modified_query_info.query,
+                modified_context,
+                storage,
+                storage->getInMemoryMetadataPtr(), // view->getInMemoryMetadataPtr(),
+                SelectQueryOptions(/* processed_stage*/));
+            interpreter.buildQueryPlan(plan);
+        }
         else
         {
             /// For view storage, we need to rewrite the `modified_query_info.view_query` to optimize read.
@@ -677,6 +706,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             modified_context->setSetting("max_streams_to_max_threads_ratio", 1);
             modified_context->setSetting("max_block_size", max_block_size);
 
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "creating InterpreterSelectQuery 1.5");
             InterpreterSelectQuery interpreter(modified_query_info.query,
                 modified_context,
                 storage,
@@ -719,6 +749,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         {
             modified_select.replaceDatabaseAndTable(database_name, table_name);
             /// TODO: Find a way to support projections for StorageMerge
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "creating InterpreterSelectQuery 2");
             InterpreterSelectQuery interpreter{modified_query_info.query,
                 modified_context,
                 SelectQueryOptions(processed_stage).ignoreProjections()};

From 1e318599360840e0a581cba4ec064f3a907d7746 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Thu, 25 May 2023 10:51:45 +0000
Subject: [PATCH 0059/1097] merge_row_policy: tiny cleanup

---
 src/Interpreters/InterpreterSelectQuery.cpp | 4 +---
 src/Storages/StorageMerge.cpp               | 7 -------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 6d8028f628b..2b010228c9a 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -548,9 +548,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 
     if (storage)
     {
-
         LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), "table name: {}", table_id.getTableName());
-
         row_policy_filter = context->getRowPolicyFilter(table_id.getDatabaseName(), table_id.getTableName(), RowPolicyFilterType::SELECT_FILTER);
     }
     else
@@ -844,7 +842,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     if (query.prewhere() && !query.where())
         analysis_result.prewhere_info->need_filter = true;
 
-    if (table_id && got_storage_from_query /* && !joined_tables.isLeftTableFunction() */)
+    if (table_id && got_storage_from_query && !joined_tables.isLeftTableFunction())
     {
         /// The current user should have the SELECT privilege. If this table_id is for a table
         /// function we don't check access rights here because in this case they have been already
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index e917553d704..19792aeca9f 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -45,7 +45,6 @@
 
 #include <Common/logger_useful.h>
 
-
 namespace
 {
 
@@ -578,8 +577,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & quer
         modified_query_info.table_expression = replacement_table_expression;
         modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression);
 
-
-
         auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
         if (storage_snapshot->storage.supportsSubcolumns())
             get_column_options.withSubcolumns();
@@ -600,10 +597,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & quer
         }
 
         modified_query_info.query = queryNodeToSelectQuery(modified_query_info.query_tree);
-        TreeRewriterResult new_analyzer_res = *modified_query_info.syntax_analyzer_result;
-        new_analyzer_res.has_explicit_columns = false;
-
-        modified_query_info.syntax_analyzer_result = std::make_shared<TreeRewriterResult>(std::move(new_analyzer_res));
     }
     else
     {

From 0464b4fd061b806c2e79f58d114423f7244dbd70 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Sat, 27 May 2023 22:23:19 +0000
Subject: [PATCH 0060/1097] merge_row_policy: with
 QueryProcessingStage::Complete

---
 src/Interpreters/InterpreterSelectQuery.cpp |  9 +++--
 src/Storages/StorageMerge.cpp               | 38 +++++++++++++++++++--
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 2b010228c9a..d2d13bacac8 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -100,6 +100,7 @@ namespace ProfileEvents
     extern const Event SelectQueriesWithSubqueries;
     extern const Event QueriesWithSubqueries;
 }
+#pragma GCC diagnostic ignored "-Wold-style-cast"
 
 namespace DB
 {
@@ -548,12 +549,12 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 
     if (storage)
     {
-        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), "table name: {}", table_id.getTableName());
+        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), " {}, table name: {}", (void*)this, table_id.getTableName());
         row_policy_filter = context->getRowPolicyFilter(table_id.getDatabaseName(), table_id.getTableName(), RowPolicyFilterType::SELECT_FILTER);
     }
     else
     {
-        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), "no storage");
+        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), " {}, no storage", (void*)this);
     }
 
 
@@ -1451,7 +1452,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
         /// Read the data from Storage. from_stage - to what stage the request was completed in Storage.
         executeFetchColumns(from_stage, query_plan);
 
-        LOG_TRACE(log, "{} -> {}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage));
+        LOG_TRACE(log, "executeImpl {}, {} -> {}", (void*) this, QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage));
     }
 
     if (query_info.projection && query_info.projection->input_order_info && query_info.input_order_info)
@@ -1523,6 +1524,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                     expressions.filter_info->column_name,
                     expressions.filter_info->do_remove_column);
 
+                LOG_TRACE(log, "executeImpl,  adding Row-level security filter");
+
                 row_level_security_step->setStepDescription("Row-level security filter");
                 query_plan.addStep(std::move(row_level_security_step));
             }
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 19792aeca9f..6be1ce7db3f 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -45,6 +45,10 @@
 
 #include <Common/logger_useful.h>
 
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+
+
 namespace
 {
 
@@ -264,6 +268,8 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
     /// (see removeJoin())
     ///
     /// And for this we need to return FetchColumns.
+    LOG_TRACE(&Poco::Logger::get("StorageMerge::getQueryProcessingStage"), "to_stage {}", to_stage);
+
     if (const auto * select = query_info.query->as<ASTSelectQuery>(); select && hasJoin(*select))
         return QueryProcessingStage::FetchColumns;
 
@@ -287,13 +293,15 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
                     stage_in_source_tables,
                     table->getQueryProcessingStage(local_context, to_stage,
                         table->getStorageSnapshot(table->getInMemoryMetadataPtr(), local_context), query_info));
+                LOG_TRACE(&Poco::Logger::get("StorageMerge::getQueryProcessingStage"), "stage_in_source_tables {}", stage_in_source_tables);
             }
 
             iterator->next();
         }
     }
 
-    return selected_table_size == 1 ? stage_in_source_tables : std::min(stage_in_source_tables, QueryProcessingStage::WithMergeableState);
+    // return selected_table_size == 1 ? stage_in_source_tables : std::min(stage_in_source_tables, QueryProcessingStage::WithMergeableState);
+    return QueryProcessingStage::Complete;
 }
 
 void StorageMerge::read(
@@ -312,6 +320,9 @@ void StorageMerge::read(
     auto modified_context = Context::createCopy(local_context);
     modified_context->setSetting("optimize_move_to_prewhere", false);
 
+    LOG_TRACE(&Poco::Logger::get("StorageMerge::read"), "processed_stage {}", QueryProcessingStage::toString(processed_stage));
+
+
     bool has_database_virtual_column = false;
     bool has_table_virtual_column = false;
     Names real_column_names;
@@ -324,7 +335,10 @@ void StorageMerge::read(
         else if (column_name == "_table" && isVirtualColumn(column_name, storage_snapshot->metadata))
             has_table_virtual_column = true;
         else
+        {
             real_column_names.push_back(column_name);
+            LOG_TRACE(&Poco::Logger::get("StorageMerge::read"), "column_name {}", column_name);
+        }
     }
 
     StorageListWithLocks selected_tables
@@ -353,7 +367,7 @@ void StorageMerge::read(
     query_plan.addInterpreterContext(modified_context);
 
     /// What will be result structure depending on query processed stage in source tables?
-    Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage);
+    Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, QueryProcessingStage::Complete /* processed_stage */);
 
     auto step = std::make_unique<ReadFromMerge>(
         common_header,
@@ -477,6 +491,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
             bool with_aliases = common_processed_stage == QueryProcessingStage::FetchColumns && !storage_columns.getAliases().empty();
             if (with_aliases)
             {
+                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"), "with_aliases");
                 ASTPtr required_columns_expr_list = std::make_shared<ASTExpressionList>();
                 ASTPtr column_expr;
 
@@ -650,6 +665,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         storage_snapshot,
         modified_query_info);
 
+#pragma GCC diagnostic ignored "-Wunreachable-code"
     if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns))
     {
         /// If there are only virtual columns in query, you must request at least one other column.
@@ -660,6 +676,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
         if (/* !view || */ allow_experimental_analyzer)
+        // if (!view ||  allow_experimental_analyzer)
         {
             LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "direct storage->read");
             storage->read(plan,
@@ -687,6 +704,8 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 storage,
                 storage->getInMemoryMetadataPtr(), // view->getInMemoryMetadataPtr(),
                 SelectQueryOptions(/* processed_stage*/));
+                // SelectQueryOptions(processed_stage));
+                // SelectQueryOptions(QueryProcessingStage::WithMergeableState));
             interpreter.buildQueryPlan(plan);
         }
         else
@@ -807,6 +826,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
+
         convertingSourceStream(header, storage_snapshot->metadata, aliases, modified_context, *builder, processed_stage);
     }
 
@@ -1018,6 +1038,20 @@ void ReadFromMerge::convertingSourceStream(
     if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)
         convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position;
 
+
+    for (const auto & column_with_type_and_name : builder.getHeader().getColumnsWithTypeAndName())
+    {
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "column name: {} (builder.getHeader().getColumnsWithTypeAndName())", column_with_type_and_name.name);
+    }
+
+    for (const auto & column_with_type_and_name : header.getColumnsWithTypeAndName())
+    {
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "column name: {} (header.getColumnsWithTypeAndName())", column_with_type_and_name.name);
+    }
+
+
+
+
     auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(),
                                                                 header.getColumnsWithTypeAndName(),
                                                                 convert_actions_match_columns_mode);

From 2acc4c223d9644da82d100163875de413bcea730 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Mon, 29 May 2023 11:15:42 +0000
Subject: [PATCH 0061/1097] merge_row_policy - extra debug

---
 src/Interpreters/InterpreterSelectQuery.cpp | 2 ++
 src/Storages/StorageMerge.cpp               | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index d2d13bacac8..54b7b2a3137 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -861,6 +861,8 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     /// Add prewhere actions with alias columns and record needed columns from storage.
     if (storage)
     {
+        LOG_TRACE(log, "calling addPrewhereAliasActions");
+
         addPrewhereAliasActions();
         analysis_result.required_columns = required_columns;
     }
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 6be1ce7db3f..098d0992738 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -732,9 +732,13 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 
         if (auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(plan.getRootNode()->step.get()))
         {
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "ReadFromMergeTree detected");
             size_t filters_dags_size = filter_dags.size();
             for (size_t i = 0; i < filters_dags_size; ++i)
+            {
+                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "adding filter");
                 read_from_merge_tree->addFilter(filter_dags[i], filter_nodes.nodes[i]);
+            }
         }
 
         builder = plan.buildQueryPipeline(

From eebdff472e73bb64d04b82edea3cb98cd176291d Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 31 May 2023 11:48:20 +0000
Subject: [PATCH 0062/1097] merge_row_policy: original behavior restored +
 extra debug

---
 src/Interpreters/InterpreterSelectQuery.cpp | 10 +++++++++-
 src/Storages/StorageMerge.cpp               | 13 +++++++------
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 54b7b2a3137..499dedf2ff5 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -134,6 +134,9 @@ FilterDAGInfoPtr generateFilterActions(
     Names & prerequisite_columns,
     PreparedSetsPtr prepared_sets)
 {
+    LOG_TRACE(&Poco::Logger::get("generateFilterActions"), "top of");
+
+
     auto filter_info = std::make_shared<FilterDAGInfo>();
 
     const auto & db_name = table_id.getDatabaseName();
@@ -549,7 +552,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 
     if (storage)
     {
-        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), " {}, table name: {}", (void*)this, table_id.getTableName());
+        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), " {}, table name: {}, calling getRowPolicyFilter", (void*)this, table_id.getTableName());
         row_policy_filter = context->getRowPolicyFilter(table_id.getDatabaseName(), table_id.getTableName(), RowPolicyFilterType::SELECT_FILTER);
     }
     else
@@ -2065,11 +2068,15 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
     auto & expressions = analysis_result;
     if (expressions.filter_info)
     {
+        LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info", (void*)this);
+
         if (!expressions.prewhere_info)
         {
+            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info 1", (void*)this);
             const bool does_storage_support_prewhere = !input_pipe && storage && storage->supportsPrewhere();
             if (does_storage_support_prewhere && shouldMoveToPrewhere())
             {
+                LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info 1.5", (void*)this);
                 /// Execute row level filter in prewhere as a part of "move to prewhere" optimization.
                 expressions.prewhere_info = std::make_shared<PrewhereInfo>(
                     std::move(expressions.filter_info->actions),
@@ -2082,6 +2089,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
         }
         else
         {
+            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info 2", (void*)this);
             /// Add row level security actions to prewhere.
             expressions.prewhere_info->row_level_filter = std::move(expressions.filter_info->actions);
             expressions.prewhere_info->row_level_column_name = std::move(expressions.filter_info->column_name);
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 098d0992738..c0a0ffec7fd 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -300,8 +300,8 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
         }
     }
 
-    // return selected_table_size == 1 ? stage_in_source_tables : std::min(stage_in_source_tables, QueryProcessingStage::WithMergeableState);
-    return QueryProcessingStage::Complete;
+    return selected_table_size == 1 ? stage_in_source_tables : std::min(stage_in_source_tables, QueryProcessingStage::WithMergeableState);
+    // return QueryProcessingStage::Complete;
 }
 
 void StorageMerge::read(
@@ -367,7 +367,8 @@ void StorageMerge::read(
     query_plan.addInterpreterContext(modified_context);
 
     /// What will be result structure depending on query processed stage in source tables?
-    Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, QueryProcessingStage::Complete /* processed_stage */);
+    // Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, QueryProcessingStage::Complete /* processed_stage */);
+    Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage );
 
     auto step = std::make_unique<ReadFromMerge>(
         common_header,
@@ -703,8 +704,8 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 modified_context,
                 storage,
                 storage->getInMemoryMetadataPtr(), // view->getInMemoryMetadataPtr(),
-                SelectQueryOptions(/* processed_stage*/));
-                // SelectQueryOptions(processed_stage));
+                // SelectQueryOptions(/* processed_stage*/));
+                SelectQueryOptions(processed_stage));
                 // SelectQueryOptions(QueryProcessingStage::WithMergeableState));
             interpreter.buildQueryPlan(plan);
         }
@@ -732,8 +733,8 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 
         if (auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(plan.getRootNode()->step.get()))
         {
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "ReadFromMergeTree detected");
             size_t filters_dags_size = filter_dags.size();
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "ReadFromMergeTree detected, DAG size {}", filters_dags_size);
             for (size_t i = 0; i < filters_dags_size; ++i)
             {
                 LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "adding filter");

From b6f682dc3fe9d9f6ca541d6c06a96711b50b7342 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Fri, 2 Jun 2023 23:06:10 +0000
Subject: [PATCH 0063/1097] merge_row_policy: FilterTransform

---
 src/Interpreters/InterpreterSelectQuery.cpp   | 21 +++-
 .../MergeTree/MergeTreeSequentialSource.cpp   |  2 +-
 src/Storages/StorageMerge.cpp                 | 98 ++++++++++++++++++-
 src/Storages/StorageMerge.h                   |  4 +-
 4 files changed, 117 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 499dedf2ff5..f4931afe89c 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -961,6 +961,8 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
 
     analysis_result = ExpressionAnalysisResult(
         *query_analyzer, metadata_snapshot, first_stage, second_stage, options.only_analyze, filter_info, additional_filter_info, source_header);
+    LOG_TRACE(log, "getSampleBlockImpl {} : source_header after ExpressionAnalysisResult {}", (void*) this, source_header.dumpStructure());
+
 
     if (options.to_stage == QueryProcessingStage::Enum::FetchColumns)
     {
@@ -970,8 +972,12 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
         {
             header = analysis_result.prewhere_info->prewhere_actions->updateHeader(header);
             if (analysis_result.prewhere_info->remove_prewhere_column)
+            {
+                LOG_TRACE(log, "getSampleBlockImpl {} : erasing column {}", (void*) this, analysis_result.prewhere_info->prewhere_column_name);
                 header.erase(analysis_result.prewhere_info->prewhere_column_name);
+            }
         }
+        LOG_TRACE(log, "getSampleBlockImpl {} : returning header", (void*) this);
         return header;
     }
 
@@ -1523,13 +1529,15 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
             // Thus, we don't actually need to check if projection is active.
             if (!query_info.projection && expressions.filter_info)
             {
+                LOG_TRACE(log, "executeImpl,  adding Row-level security filter; column_name {}, block {}",
+                    expressions.filter_info->column_name, query_plan.getCurrentDataStream().header.dumpStructure());
+
                 auto row_level_security_step = std::make_unique<FilterStep>(
                     query_plan.getCurrentDataStream(),
                     expressions.filter_info->actions,
                     expressions.filter_info->column_name,
                     expressions.filter_info->do_remove_column);
 
-                LOG_TRACE(log, "executeImpl,  adding Row-level security filter");
 
                 row_level_security_step->setStepDescription("Row-level security filter");
                 query_plan.addStep(std::move(row_level_security_step));
@@ -2072,8 +2080,9 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
 
         if (!expressions.prewhere_info)
         {
-            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info 1", (void*)this);
             const bool does_storage_support_prewhere = !input_pipe && storage && storage->supportsPrewhere();
+            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info 1 - does_storage_support_prewhere {} shouldMoveToPrewhere() {}",
+                (void*)this, does_storage_support_prewhere, shouldMoveToPrewhere());
             if (does_storage_support_prewhere && shouldMoveToPrewhere())
             {
                 LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info 1.5", (void*)this);
@@ -2096,6 +2105,14 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
             expressions.prewhere_info->row_level_filter->projectInput(false);
             expressions.filter_info = nullptr;
         }
+        if (expressions.prewhere_info)
+        {
+            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {} dump: {}", (void*)this, expressions.prewhere_info->dump());
+        }
+        else
+        {
+            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " no prewhere_info");
+        }
     }
 
     auto & prewhere_info = analysis_result.prewhere_info;
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index a586997360a..927c8fb7440 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -312,7 +312,7 @@ public:
             const auto & primary_key = storage_snapshot->metadata->getPrimaryKey();
             const Names & primary_key_column_names = primary_key.column_names;
             KeyCondition key_condition(filter, context, primary_key_column_names, primary_key.expression, NameSet{});
-            LOG_DEBUG(log, "Key condition: {}", key_condition.toString());
+            LOG_DEBUG(log, "ReadFromPart (MergeTreeSequentialSource) Key condition: {}", key_condition.toString());
 
             if (!key_condition.alwaysFalse())
                 mark_ranges = MergeTreeDataSelectExecutor::markRangesFromPKRange(
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index c0a0ffec7fd..5d13d844eb7 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -37,6 +37,7 @@
 #include <Processors/Transforms/MaterializingTransform.h>
 #include <Processors/ConcatProcessor.h>
 #include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/Transforms/FilterTransform.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Databases/IDatabase.h>
@@ -318,7 +319,7 @@ void StorageMerge::read(
       * since there is no certainty that it works when one of table is MergeTree and other is not.
       */
     auto modified_context = Context::createCopy(local_context);
-    modified_context->setSetting("optimize_move_to_prewhere", false);
+    // modified_context->setSetting("optimize_move_to_prewhere", false);
 
     LOG_TRACE(&Poco::Logger::get("StorageMerge::read"), "processed_stage {}", QueryProcessingStage::toString(processed_stage));
 
@@ -832,7 +833,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
 
-        convertingSourceStream(header, storage_snapshot->metadata, aliases, modified_context, *builder, processed_stage);
+        convertingSourceStream(header, storage_snapshot->metadata, aliases, modified_context, *builder, processed_stage, database_name, table_name);
     }
 
     return builder;
@@ -1014,7 +1015,9 @@ void ReadFromMerge::convertingSourceStream(
     const Aliases & aliases,
     ContextPtr local_context,
     QueryPipelineBuilder & builder,
-    const QueryProcessingStage::Enum & processed_stage)
+    const QueryProcessingStage::Enum & processed_stage,
+    const String & database_name,
+    const String & table_name)
 {
     Block before_block_header = builder.getHeader();
 
@@ -1051,7 +1054,7 @@ void ReadFromMerge::convertingSourceStream(
 
     for (const auto & column_with_type_and_name : header.getColumnsWithTypeAndName())
     {
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "column name: {} (header.getColumnsWithTypeAndName())", column_with_type_and_name.name);
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "column name: {} (header.getColumnsWithTypeAndName())", column_with_type_and_name.name);
     }
 
 
@@ -1068,6 +1071,93 @@ void ReadFromMerge::convertingSourceStream(
     {
         return std::make_shared<ExpressionTransform>(stream_header, actions);
     });
+
+
+    bool explicit_row_policy_filter_needed = true;
+
+    if (explicit_row_policy_filter_needed)
+    {
+
+        auto row_policy_filter = local_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
+
+        // row_policy_filter->expression
+        // auto pipe_columns = builder.getHeader().getNamesAndTypesList();
+
+
+        ASTPtr expr = row_policy_filter->expression;
+
+        // auto * select_ast = expr /* query_ast */ ->as<ASTSelectQuery>();
+        // assert(select_ast);
+
+        // select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
+        // auto expr_list = select_ast->select();
+        // expr_list->children.push_back(expr);
+        // String filter_column_name = expr_list->children.at(0)->getColumnName();
+        // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column_name: {} ", filter_column_name);
+
+        auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
+        // auto syntax_result = TreeRewriter(local_context).analyze(expr, NamesAndTypesList());
+        auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, local_context};
+
+        auto actions_dag = expression_analyzer.getActionsDAG(true, false);
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "actions_dag: {},<> {}", actions_dag->dumpNames(), actions_dag->dumpDAG());
+
+
+
+        auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+        auto required_columns = filter_actions->getRequiredColumns();
+        for (const auto & req_col : required_columns)
+        {
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "req_col: {}", req_col);
+        }
+
+
+
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
+            filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
+
+
+        auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
+        std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
+
+        Names required_columns_sorted = required_columns;
+        std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
+
+        Names filter_columns;
+
+
+        std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
+            required_columns.begin(), required_columns.end(),
+            std::inserter(filter_columns, filter_columns.begin()));
+
+        for (const auto & filter_column : filter_columns)
+        {
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column: {}", filter_column);
+        }
+
+        // Block block;
+        // block = filter_actions->getActionsDAG().updateHeader(std::move(block));
+        // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "block from updateHeader {}", block.dumpStructure());
+
+
+
+        builder.addSimpleTransform([&](const Block & stream_header)
+        {
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "stream_header {}", stream_header.dumpStructure());
+            return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_columns.front(), true /* remove fake column */);
+        });
+
+
+        // auto row_level_filter_step = std::make_unique<FilterStep>(
+        //     query_plan.getCurrentDataStream(),
+        //     expressions.prewhere_info->row_level_filter,
+        //     expressions.prewhere_info->row_level_column_name,
+        //     true);
+
+        // row_level_filter_step->setStepDescription("Row-level security filter (PREWHERE)");
+        // query_plan.addStep(std::move(row_level_filter_step));
+
+    }
 }
 
 bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index babf0dd92e8..fbe6dcec298 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -201,7 +201,9 @@ private:
         const Aliases & aliases,
         ContextPtr context,
         QueryPipelineBuilder & builder,
-        const QueryProcessingStage::Enum & processed_stage);
+        const QueryProcessingStage::Enum & processed_stage,
+        const String & database_name,
+        const String & table_name);
 };
 
 }

From 657c39c79e39c9df61f094114cf89e212b0671d5 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Mon, 5 Jun 2023 16:00:31 +0000
Subject: [PATCH 0064/1097] merge_row_policy: row_policy_storage_merge test

---
 src/Storages/StorageMerge.cpp                 | 152 +++++++++---------
 .../02763_row_policy_storage_merge.reference  | 114 +++++++++++++
 .../02763_row_policy_storage_merge.sql.j2     |  60 +++++++
 3 files changed, 253 insertions(+), 73 deletions(-)
 create mode 100644 tests/queries/0_stateless/02763_row_policy_storage_merge.reference
 create mode 100644 tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 5d13d844eb7..6d93d88804c 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -1080,83 +1080,89 @@ void ReadFromMerge::convertingSourceStream(
 
         auto row_policy_filter = local_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
 
-        // row_policy_filter->expression
-        // auto pipe_columns = builder.getHeader().getNamesAndTypesList();
-
-
-        ASTPtr expr = row_policy_filter->expression;
-
-        // auto * select_ast = expr /* query_ast */ ->as<ASTSelectQuery>();
-        // assert(select_ast);
-
-        // select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
-        // auto expr_list = select_ast->select();
-        // expr_list->children.push_back(expr);
-        // String filter_column_name = expr_list->children.at(0)->getColumnName();
-        // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column_name: {} ", filter_column_name);
-
-        auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
-        // auto syntax_result = TreeRewriter(local_context).analyze(expr, NamesAndTypesList());
-        auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, local_context};
-
-        auto actions_dag = expression_analyzer.getActionsDAG(true, false);
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "actions_dag: {},<> {}", actions_dag->dumpNames(), actions_dag->dumpDAG());
-
-
-
-        auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
-        auto required_columns = filter_actions->getRequiredColumns();
-        for (const auto & req_col : required_columns)
+        if (row_policy_filter)
         {
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "req_col: {}", req_col);
+
+
+            // row_policy_filter->expression
+            // auto pipe_columns = builder.getHeader().getNamesAndTypesList();
+
+
+            ASTPtr expr = row_policy_filter->expression;
+
+            // auto * select_ast = expr /* query_ast */ ->as<ASTSelectQuery>();
+            // assert(select_ast);
+
+            // select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
+            // auto expr_list = select_ast->select();
+            // expr_list->children.push_back(expr);
+            // String filter_column_name = expr_list->children.at(0)->getColumnName();
+            // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column_name: {} ", filter_column_name);
+
+            auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
+            // auto syntax_result = TreeRewriter(local_context).analyze(expr, NamesAndTypesList());
+            auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, local_context};
+
+            auto actions_dag = expression_analyzer.getActionsDAG(true, false);
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "actions_dag: {},<> {}", actions_dag->dumpNames(), actions_dag->dumpDAG());
+
+
+
+            auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+            auto required_columns = filter_actions->getRequiredColumns();
+            for (const auto & req_col : required_columns)
+            {
+                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "req_col: {}", req_col);
+            }
+
+
+
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
+                filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
+
+
+            auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
+            std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
+
+            Names required_columns_sorted = required_columns;
+            std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
+
+            Names filter_columns;
+
+
+            std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
+                required_columns.begin(), required_columns.end(),
+                std::inserter(filter_columns, filter_columns.begin()));
+
+            for (const auto & filter_column : filter_columns)
+            {
+                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column: {}", filter_column);
+            }
+
+            // Block block;
+            // block = filter_actions->getActionsDAG().updateHeader(std::move(block));
+            // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "block from updateHeader {}", block.dumpStructure());
+
+
+
+            builder.addSimpleTransform([&](const Block & stream_header)
+            {
+                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "stream_header {}", stream_header.dumpStructure());
+                return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_columns.front(), true /* remove fake column */);
+            });
+
+
+            // auto row_level_filter_step = std::make_unique<FilterStep>(
+            //     query_plan.getCurrentDataStream(),
+            //     expressions.prewhere_info->row_level_filter,
+            //     expressions.prewhere_info->row_level_column_name,
+            //     true);
+
+            // row_level_filter_step->setStepDescription("Row-level security filter (PREWHERE)");
+            // query_plan.addStep(std::move(row_level_filter_step));
         }
 
 
-
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
-            filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
-
-
-        auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
-        std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
-
-        Names required_columns_sorted = required_columns;
-        std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
-
-        Names filter_columns;
-
-
-        std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
-            required_columns.begin(), required_columns.end(),
-            std::inserter(filter_columns, filter_columns.begin()));
-
-        for (const auto & filter_column : filter_columns)
-        {
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column: {}", filter_column);
-        }
-
-        // Block block;
-        // block = filter_actions->getActionsDAG().updateHeader(std::move(block));
-        // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "block from updateHeader {}", block.dumpStructure());
-
-
-
-        builder.addSimpleTransform([&](const Block & stream_header)
-        {
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "stream_header {}", stream_header.dumpStructure());
-            return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_columns.front(), true /* remove fake column */);
-        });
-
-
-        // auto row_level_filter_step = std::make_unique<FilterStep>(
-        //     query_plan.getCurrentDataStream(),
-        //     expressions.prewhere_info->row_level_filter,
-        //     expressions.prewhere_info->row_level_column_name,
-        //     true);
-
-        // row_level_filter_step->setStepDescription("Row-level security filter (PREWHERE)");
-        // query_plan.addStep(std::move(row_level_filter_step));
-
     }
 }
 
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
new file mode 100644
index 00000000000..ab531c5e300
--- /dev/null
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
@@ -0,0 +1,114 @@
+1
+1
+1
+1
+2
+2
+2
+2
+3
+3
+3
+3
+4
+4
+4
+4
+1
+2
+3
+4
+SETTINGS optimize_move_to_prewhere= 0
+SELECT * FROM 02763_merge_log_1
+3
+SELECT * FROM merge(currentDatabase(), 02763_merge_log_1)
+3
+SELECT * FROM merge(currentDatabase(), 02763_merge_log)
+1
+2
+3
+3
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>2
+3
+3
+4
+SELECT * FROM 02763_merge_merge_1
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge)
+1
+2
+3
+4
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>2
+3
+4
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge)
+1
+1
+2
+2
+3
+3
+3
+4
+4
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge) WHEER x>2
+3
+3
+3
+4
+4
+4
+SETTINGS optimize_move_to_prewhere= 1
+SELECT * FROM 02763_merge_log_1
+3
+SELECT * FROM merge(currentDatabase(), 02763_merge_log_1)
+3
+SELECT * FROM merge(currentDatabase(), 02763_merge_log)
+1
+2
+3
+3
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>2
+3
+3
+4
+SELECT * FROM 02763_merge_merge_1
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge)
+1
+2
+3
+4
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>2
+3
+4
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge)
+1
+1
+2
+2
+3
+3
+3
+4
+4
+4
+SELECT * FROM merge(currentDatabase(), 02763_merge) WHEER x>2
+3
+3
+3
+4
+4
+4
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
new file mode 100644
index 00000000000..3883b357602
--- /dev/null
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -0,0 +1,60 @@
+DROP TABLE IF EXISTS 02763_merge_log_1;
+DROP TABLE IF EXISTS 02763_merge_log_2;
+DROP TABLE IF EXISTS 02763_merge_merge_1;
+DROP TABLE IF EXISTS 02763_merge_merge_2;
+DROP ROW POLICY IF EXISTS 02763_filter_1 ON 02763_merge_log_1;
+DROP ROW POLICY IF EXISTS 02763_filter_2 ON 02763_merge_merge_1;
+
+
+CREATE TABLE 02763_merge_log_1 (x UInt8) ENGINE = Log;
+CREATE TABLE 02763_merge_log_2 (x UInt8) ENGINE = Log;
+
+CREATE TABLE 02763_merge_merge_1 (x UInt8) ENGINE = MergeTree ORDER BY x;
+CREATE TABLE 02763_merge_merge_2 (x UInt8) ENGINE = MergeTree ORDER BY x;
+
+INSERT INTO 02763_merge_log_1 VALUES (1), (2), (3), (4);
+INSERT INTO 02763_merge_log_2 VALUES (1), (2), (3), (4);
+INSERT INTO 02763_merge_merge_1 VALUES (1), (2), (3), (4);
+INSERT INTO 02763_merge_merge_2 VALUES (1), (2), (3), (4);
+
+SELECT * FROM merge(currentDatabase(), '02763_merge') ORDER BY x;
+
+SELECT * FROM 02763_merge_log_1 ORDER BY x;
+
+
+{% for prew in [0 , 1] -%}
+
+SELECT 'SETTINGS optimize_move_to_prewhere= {{prew}}';
+
+CREATE ROW POLICY 02763_filter_1 ON 02763_merge_log_1 USING x=3 AS permissive TO ALL;
+
+SELECT 'SELECT * FROM 02763_merge_log_1';
+SELECT * FROM 02763_merge_log_1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_log_1)';
+SELECT * FROM merge(currentDatabase(), '02763_merge_log_1') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_log)';
+SELECT * FROM merge(currentDatabase(), '02763_merge_log') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>2';
+SELECT * FROM merge(currentDatabase(), '02763_merge_log') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+
+CREATE ROW POLICY 02763_filter_2 ON 02763_merge_merge_1 USING x=4 AS permissive TO ALL;
+
+SELECT 'SELECT * FROM 02763_merge_merge_1';
+SELECT * FROM 02763_merge_merge_1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1';
+SELECT * FROM merge(currentDatabase(), '02763_merge_merge_1') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge)';
+SELECT * FROM merge(currentDatabase(), '02763_merge_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>2';
+SELECT * FROM merge(currentDatabase(), '02763_merge_merge') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+
+
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge)';
+SELECT * FROM merge(currentDatabase(), '02763_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge) WHEER x>2';
+SELECT * FROM merge(currentDatabase(), '02763_merge') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+
+DROP ROW POLICY 02763_filter_1 ON 02763_merge_log_1;
+DROP ROW POLICY 02763_filter_2 ON 02763_merge_merge_1;
+
+{% endfor %}

From 359cd4d32ae292392aeccadf2d2dadbe689f2f38 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Tue, 6 Jun 2023 20:45:48 +0000
Subject: [PATCH 0065/1097] merge_row_policy: style fixes and back to
 storage->read

---
 src/Interpreters/InterpreterSelectQuery.cpp |  2 +-
 src/Storages/StorageMerge.cpp               | 40 ++++++++++-----------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index f4931afe89c..fb472cb791c 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1529,7 +1529,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
             // Thus, we don't actually need to check if projection is active.
             if (!query_info.projection && expressions.filter_info)
             {
-                LOG_TRACE(log, "executeImpl,  adding Row-level security filter; column_name {}, block {}",
+                LOG_TRACE(log, "executeImpl, adding Row-level security filter; column_name {}, block {}",
                     expressions.filter_info->column_name, query_plan.getCurrentDataStream().header.dumpStructure());
 
                 auto row_level_security_step = std::make_unique<FilterStep>(
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 6d93d88804c..9b33f3aa268 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -369,7 +369,7 @@ void StorageMerge::read(
 
     /// What will be result structure depending on query processed stage in source tables?
     // Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, QueryProcessingStage::Complete /* processed_stage */);
-    Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage );
+    Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage);
 
     auto step = std::make_unique<ReadFromMerge>(
         common_header,
@@ -677,7 +677,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         QueryPlan & plan = child_plans.emplace_back();
 
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
-        if (/* !view || */ allow_experimental_analyzer)
+        if ( !view ||  allow_experimental_analyzer)
         // if (!view ||  allow_experimental_analyzer)
         {
             LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "direct storage->read");
@@ -690,26 +690,26 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 max_block_size,
                 UInt32(streams_num));
         }
-        else if (!view)
-        {
-            /// For view storage, we need to rewrite the `modified_query_info.view_query` to optimize read.
-            /// The most intuitive way is to use InterpreterSelectQuery.
+        // else if (!view)
+        // {
+        //     /// For view storage, we need to rewrite the `modified_query_info.view_query` to optimize read.
+        //     /// The most intuitive way is to use InterpreterSelectQuery.
 
-            /// Intercept the settings
-            modified_context->setSetting("max_threads", streams_num);
-            modified_context->setSetting("max_streams_to_max_threads_ratio", 1);
-            modified_context->setSetting("max_block_size", max_block_size);
+        //     /// Intercept the settings
+        //     modified_context->setSetting("max_threads", streams_num);
+        //     modified_context->setSetting("max_streams_to_max_threads_ratio", 1);
+        //     modified_context->setSetting("max_block_size", max_block_size);
 
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "creating InterpreterSelectQuery 1.0");
-            InterpreterSelectQuery interpreter(modified_query_info.query,
-                modified_context,
-                storage,
-                storage->getInMemoryMetadataPtr(), // view->getInMemoryMetadataPtr(),
-                // SelectQueryOptions(/* processed_stage*/));
-                SelectQueryOptions(processed_stage));
-                // SelectQueryOptions(QueryProcessingStage::WithMergeableState));
-            interpreter.buildQueryPlan(plan);
-        }
+        //     LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "creating InterpreterSelectQuery 1.0");
+        //     InterpreterSelectQuery interpreter(modified_query_info.query,
+        //         modified_context,
+        //         storage,
+        //         storage->getInMemoryMetadataPtr(), // view->getInMemoryMetadataPtr(),
+        //         // SelectQueryOptions(/* processed_stage*/));
+        //         SelectQueryOptions(processed_stage));
+        //         // SelectQueryOptions(QueryProcessingStage::WithMergeableState));
+        //     interpreter.buildQueryPlan(plan);
+        // }
         else
         {
             /// For view storage, we need to rewrite the `modified_query_info.view_query` to optimize read.

From c457fa727e358d77802a1da1540caa16ae64b6a9 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Fri, 9 Jun 2023 09:03:47 +0000
Subject: [PATCH 0066/1097] merge_row_policy: addFilter() instead of
 FilterTransform

---
 src/Storages/StorageMerge.cpp | 71 ++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 9b33f3aa268..c29c2af9b12 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -677,6 +677,8 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         QueryPlan & plan = child_plans.emplace_back();
 
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
+        bool direct_read = false;
+
         if ( !view ||  allow_experimental_analyzer)
         // if (!view ||  allow_experimental_analyzer)
         {
@@ -689,6 +691,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 processed_stage,
                 max_block_size,
                 UInt32(streams_num));
+            direct_read = true;
         }
         // else if (!view)
         // {
@@ -742,10 +745,74 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 read_from_merge_tree->addFilter(filter_dags[i], filter_nodes.nodes[i]);
             }
         }
-
         builder = plan.buildQueryPipeline(
             QueryPlanOptimizationSettings::fromContext(modified_context),
             BuildQueryPipelineSettings::fromContext(modified_context));
+        if (auto * source_step_with_filter = typeid_cast<SourceStepWithFilter *>(plan.getRootNode()->step.get()))
+        {
+            auto row_policy_filter = modified_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
+
+            if (row_policy_filter)
+            {
+
+
+                // row_policy_filter->expression
+                // auto pipe_columns = builder.getHeader().getNamesAndTypesList();
+
+
+                ASTPtr expr = row_policy_filter->expression;
+
+                // auto * select_ast = expr /* query_ast */ ->as<ASTSelectQuery>();
+                // assert(select_ast);
+
+                // select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
+                // auto expr_list = select_ast->select();
+                // expr_list->children.push_back(expr);
+                // String filter_column_name = expr_list->children.at(0)->getColumnName();
+                // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column_name: {} ", filter_column_name);
+
+                auto syntax_result = TreeRewriter(modified_context).analyze(expr, builder->getHeader().getNamesAndTypesList() /* pipe_columns*/);
+                // auto syntax_result = TreeRewriter(local_context).analyze(expr, NamesAndTypesList());
+                auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, modified_context};
+
+                auto filter_dag_ptr = expression_analyzer.getActionsDAG(true, false);
+
+
+                auto filter_actions = std::make_shared<ExpressionActions>(filter_dag_ptr, ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes));
+                auto required_columns = filter_actions->getRequiredColumns();
+
+                for (const auto & req_col : required_columns)
+                {
+                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "req_col: {}", req_col);
+                }
+
+
+
+                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
+                    filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
+
+
+                auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
+                std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
+
+                Names required_columns_sorted = required_columns;
+                std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
+
+                Names filter_columns;
+
+
+                std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
+                    required_columns.begin(), required_columns.end(),
+                    std::inserter(filter_columns, filter_columns.begin()));
+
+
+
+                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "SourceStepWithFilter detected");
+                source_step_with_filter->addFilter(filter_dag_ptr, filter_columns.front());
+            }
+        }
+
+
     }
     else if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns))
     {
@@ -1073,7 +1140,7 @@ void ReadFromMerge::convertingSourceStream(
     });
 
 
-    bool explicit_row_policy_filter_needed = true;
+    bool explicit_row_policy_filter_needed = false;
 
     if (explicit_row_policy_filter_needed)
     {

From a19cd8089b8620cf12e238181679a6492af30607 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Tue, 13 Jun 2023 17:19:20 +0000
Subject: [PATCH 0067/1097] merge_row_policy: add_filter() actually works

---
 src/Interpreters/InterpreterSelectQuery.cpp |  1 -
 src/Storages/StorageMerge.cpp               | 39 ++++++++-------------
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index fb472cb791c..22f0feb195f 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -561,7 +561,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     }
 
 
-
     StorageView * view = nullptr;
     if (storage)
         view = dynamic_cast<StorageView *>(storage.get());
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index c29c2af9b12..5942c49237a 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -34,6 +34,7 @@
 #include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
+#include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <Processors/Transforms/MaterializingTransform.h>
 #include <Processors/ConcatProcessor.h>
 #include <Processors/Transforms/ExpressionTransform.h>
@@ -668,6 +669,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         modified_query_info);
 
 #pragma GCC diagnostic ignored "-Wunreachable-code"
+#pragma GCC diagnostic ignored "-Wunused-variable"
     if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns))
     {
         /// If there are only virtual columns in query, you must request at least one other column.
@@ -679,7 +681,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
         bool direct_read = false;
 
-        if ( !view ||  allow_experimental_analyzer)
+        if (!view || allow_experimental_analyzer)
         // if (!view ||  allow_experimental_analyzer)
         {
             LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "direct storage->read");
@@ -741,25 +743,18 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "ReadFromMergeTree detected, DAG size {}", filters_dags_size);
             for (size_t i = 0; i < filters_dags_size; ++i)
             {
-                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "adding filter");
+                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "adding filter {}", filter_dags[i]->dumpDAG());
                 read_from_merge_tree->addFilter(filter_dags[i], filter_nodes.nodes[i]);
             }
         }
-        builder = plan.buildQueryPipeline(
-            QueryPlanOptimizationSettings::fromContext(modified_context),
-            BuildQueryPipelineSettings::fromContext(modified_context));
-        if (auto * source_step_with_filter = typeid_cast<SourceStepWithFilter *>(plan.getRootNode()->step.get()))
+        if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
         {
             auto row_policy_filter = modified_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
 
             if (row_policy_filter)
             {
-
-
                 // row_policy_filter->expression
                 // auto pipe_columns = builder.getHeader().getNamesAndTypesList();
-
-
                 ASTPtr expr = row_policy_filter->expression;
 
                 // auto * select_ast = expr /* query_ast */ ->as<ASTSelectQuery>();
@@ -771,7 +766,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 // String filter_column_name = expr_list->children.at(0)->getColumnName();
                 // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column_name: {} ", filter_column_name);
 
-                auto syntax_result = TreeRewriter(modified_context).analyze(expr, builder->getHeader().getNamesAndTypesList() /* pipe_columns*/);
+                auto syntax_result = TreeRewriter(modified_context).analyze(expr, header/*builder->getHeader().*/.getNamesAndTypesList() /* pipe_columns*/);
                 // auto syntax_result = TreeRewriter(local_context).analyze(expr, NamesAndTypesList());
                 auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, modified_context};
 
@@ -787,7 +782,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 }
 
 
-
                 LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
                     filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
 
@@ -806,12 +800,18 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                     std::inserter(filter_columns, filter_columns.begin()));
 
 
-
                 LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "SourceStepWithFilter detected");
-                source_step_with_filter->addFilter(filter_dag_ptr, filter_columns.front());
+                auto found_column = filter_dag_ptr->tryFindInOutputs(filter_columns.front());
+                assert(found_column);
+
+                // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "found column {}", found_column->dumpDAG());
+
+                source_step_with_filter->addFilter(/* filter_actions */  filter_dag_ptr, filter_columns.front());
             }
         }
-
+        builder = plan.buildQueryPipeline(
+            QueryPlanOptimizationSettings::fromContext(modified_context),
+            BuildQueryPipelineSettings::fromContext(modified_context));
 
     }
     else if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns))
@@ -1124,9 +1124,6 @@ void ReadFromMerge::convertingSourceStream(
         LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "column name: {} (header.getColumnsWithTypeAndName())", column_with_type_and_name.name);
     }
 
-
-
-
     auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(),
                                                                 header.getColumnsWithTypeAndName(),
                                                                 convert_actions_match_columns_mode);
@@ -1150,7 +1147,6 @@ void ReadFromMerge::convertingSourceStream(
         if (row_policy_filter)
         {
 
-
             // row_policy_filter->expression
             // auto pipe_columns = builder.getHeader().getNamesAndTypesList();
 
@@ -1174,7 +1170,6 @@ void ReadFromMerge::convertingSourceStream(
             LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "actions_dag: {},<> {}", actions_dag->dumpNames(), actions_dag->dumpDAG());
 
 
-
             auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
             auto required_columns = filter_actions->getRequiredColumns();
             for (const auto & req_col : required_columns)
@@ -1183,7 +1178,6 @@ void ReadFromMerge::convertingSourceStream(
             }
 
 
-
             LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
                 filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
 
@@ -1210,8 +1204,6 @@ void ReadFromMerge::convertingSourceStream(
             // block = filter_actions->getActionsDAG().updateHeader(std::move(block));
             // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "block from updateHeader {}", block.dumpStructure());
 
-
-
             builder.addSimpleTransform([&](const Block & stream_header)
             {
                 LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "stream_header {}", stream_header.dumpStructure());
@@ -1229,7 +1221,6 @@ void ReadFromMerge::convertingSourceStream(
             // query_plan.addStep(std::move(row_level_filter_step));
         }
 
-
     }
 }
 

From 978a535849da2266f186b6e10ccea1a5fdd3d8d6 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 14 Jun 2023 06:29:41 +0000
Subject: [PATCH 0068/1097] merge_row_policy: addFilter(() together with
 FilterTransform

---
 src/Storages/StorageMerge.cpp                 |  2 +-
 .../02763_row_policy_storage_merge.reference  | 28 +++++++++++++++++--
 .../02763_row_policy_storage_merge.sql.j2     | 23 ++++++++-------
 3 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 5942c49237a..e1ff6cb1091 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -1137,7 +1137,7 @@ void ReadFromMerge::convertingSourceStream(
     });
 
 
-    bool explicit_row_policy_filter_needed = false;
+    bool explicit_row_policy_filter_needed = true;
 
     if (explicit_row_policy_filter_needed)
     {
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
index ab531c5e300..9dcc5f449ab 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
@@ -58,7 +58,19 @@ SELECT * FROM merge(currentDatabase(), 02763_merge)
 4
 4
 4
-SELECT * FROM merge(currentDatabase(), 02763_merge) WHEER x>2
+SELECT * FROM merge(currentDatabase(), 02763_merge) WHERE x>2
+3
+3
+3
+4
+4
+4
+aaa	6
+aaa	6
+aaa	6
+aaa	8
+aaa	8
+aaa	8
 3
 3
 3
@@ -105,7 +117,19 @@ SELECT * FROM merge(currentDatabase(), 02763_merge)
 4
 4
 4
-SELECT * FROM merge(currentDatabase(), 02763_merge) WHEER x>2
+SELECT * FROM merge(currentDatabase(), 02763_merge) WHERE x>2
+3
+3
+3
+4
+4
+4
+aaa	6
+aaa	6
+aaa	6
+aaa	8
+aaa	8
+aaa	8
 3
 3
 3
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
index 3883b357602..33b02275d4a 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -31,28 +31,31 @@ CREATE ROW POLICY 02763_filter_1 ON 02763_merge_log_1 USING x=3 AS permissive TO
 SELECT 'SELECT * FROM 02763_merge_log_1';
 SELECT * FROM 02763_merge_log_1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_log_1)';
-SELECT * FROM merge(currentDatabase(), '02763_merge_log_1') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT * FROM merge(currentDatabase(), '02763_merge_log_1') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_log)';
-SELECT * FROM merge(currentDatabase(), '02763_merge_log') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT * FROM merge(currentDatabase(), '02763_merge_log') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>2';
-SELECT * FROM merge(currentDatabase(), '02763_merge_log') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT * FROM merge(currentDatabase(), '02763_merge_log') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
 CREATE ROW POLICY 02763_filter_2 ON 02763_merge_merge_1 USING x=4 AS permissive TO ALL;
 
 SELECT 'SELECT * FROM 02763_merge_merge_1';
-SELECT * FROM 02763_merge_merge_1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT * FROM 02763_merge_merge_1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1';
-SELECT * FROM merge(currentDatabase(), '02763_merge_merge_1') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT * FROM merge(currentDatabase(), '02763_merge_merge_1') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge)';
-SELECT * FROM merge(currentDatabase(), '02763_merge_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT * FROM merge(currentDatabase(), '02763_merge_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>2';
-SELECT * FROM merge(currentDatabase(), '02763_merge_merge') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT * FROM merge(currentDatabase(), '02763_merge_merge') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
 
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge)';
-SELECT * FROM merge(currentDatabase(), '02763_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
-SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge) WHEER x>2';
-SELECT * FROM merge(currentDatabase(), '02763_merge') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};;
+SELECT * FROM merge(currentDatabase(), '02763_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge) WHERE x>2';
+SELECT * FROM merge(currentDatabase(), '02763_merge') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+SELECT 'aaa', x*2 as x_2 FROM merge(currentDatabase(), '02763_merge') WHERE x>2 ORDER BY x_2 SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT x FROM (SELECT * FROM merge(currentDatabase(), '02763_merge') WHERE x IN (3,4)) ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
 DROP ROW POLICY 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY 02763_filter_2 ON 02763_merge_merge_1;

From da5f607242d1ff97d6380a600ac666651a9b08cc Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 14 Jun 2023 21:31:17 +0000
Subject: [PATCH 0069/1097] merge_row_policy: cleanup, remove some debug output

---
 src/Interpreters/InterpreterSelectQuery.cpp   |  41 +---
 .../MergeTree/MergeTreeSequentialSource.cpp   |   2 +-
 src/Storages/StorageMerge.cpp                 | 202 ++++--------------
 3 files changed, 43 insertions(+), 202 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 22f0feb195f..e1faa8c8958 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -93,14 +93,12 @@
 #include <Common/typeid_cast.h>
 #include <Common/ProfileEvents.h>
 
-#include <Common/logger_useful.h>
 
 namespace ProfileEvents
 {
     extern const Event SelectQueriesWithSubqueries;
     extern const Event QueriesWithSubqueries;
 }
-#pragma GCC diagnostic ignored "-Wold-style-cast"
 
 namespace DB
 {
@@ -134,9 +132,6 @@ FilterDAGInfoPtr generateFilterActions(
     Names & prerequisite_columns,
     PreparedSetsPtr prepared_sets)
 {
-    LOG_TRACE(&Poco::Logger::get("generateFilterActions"), "top of");
-
-
     auto filter_info = std::make_shared<FilterDAGInfo>();
 
     const auto & db_name = table_id.getDatabaseName();
@@ -551,15 +546,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     std::shared_ptr<TableJoin> table_join = joined_tables.makeTableJoin(query);
 
     if (storage)
-    {
-        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), " {}, table name: {}, calling getRowPolicyFilter", (void*)this, table_id.getTableName());
         row_policy_filter = context->getRowPolicyFilter(table_id.getDatabaseName(), table_id.getTableName(), RowPolicyFilterType::SELECT_FILTER);
-    }
-    else
-    {
-        LOG_TRACE(&Poco::Logger::get("InterpretSelectQuery ctor"), " {}, no storage", (void*)this);
-    }
-
 
     StorageView * view = nullptr;
     if (storage)
@@ -863,8 +850,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     /// Add prewhere actions with alias columns and record needed columns from storage.
     if (storage)
     {
-        LOG_TRACE(log, "calling addPrewhereAliasActions");
-
         addPrewhereAliasActions();
         analysis_result.required_columns = required_columns;
     }
@@ -960,8 +945,6 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
 
     analysis_result = ExpressionAnalysisResult(
         *query_analyzer, metadata_snapshot, first_stage, second_stage, options.only_analyze, filter_info, additional_filter_info, source_header);
-    LOG_TRACE(log, "getSampleBlockImpl {} : source_header after ExpressionAnalysisResult {}", (void*) this, source_header.dumpStructure());
-
 
     if (options.to_stage == QueryProcessingStage::Enum::FetchColumns)
     {
@@ -971,12 +954,8 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
         {
             header = analysis_result.prewhere_info->prewhere_actions->updateHeader(header);
             if (analysis_result.prewhere_info->remove_prewhere_column)
-            {
-                LOG_TRACE(log, "getSampleBlockImpl {} : erasing column {}", (void*) this, analysis_result.prewhere_info->prewhere_column_name);
                 header.erase(analysis_result.prewhere_info->prewhere_column_name);
-            }
         }
-        LOG_TRACE(log, "getSampleBlockImpl {} : returning header", (void*) this);
         return header;
     }
 
@@ -1462,7 +1441,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
         /// Read the data from Storage. from_stage - to what stage the request was completed in Storage.
         executeFetchColumns(from_stage, query_plan);
 
-        LOG_TRACE(log, "executeImpl {}, {} -> {}", (void*) this, QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage));
+        LOG_TRACE(log, "{} -> {}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage));
     }
 
     if (query_info.projection && query_info.projection->input_order_info && query_info.input_order_info)
@@ -1528,16 +1507,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
             // Thus, we don't actually need to check if projection is active.
             if (!query_info.projection && expressions.filter_info)
             {
-                LOG_TRACE(log, "executeImpl, adding Row-level security filter; column_name {}, block {}",
-                    expressions.filter_info->column_name, query_plan.getCurrentDataStream().header.dumpStructure());
-
                 auto row_level_security_step = std::make_unique<FilterStep>(
                     query_plan.getCurrentDataStream(),
                     expressions.filter_info->actions,
                     expressions.filter_info->column_name,
                     expressions.filter_info->do_remove_column);
 
-
                 row_level_security_step->setStepDescription("Row-level security filter");
                 query_plan.addStep(std::move(row_level_security_step));
             }
@@ -2075,16 +2050,11 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
     auto & expressions = analysis_result;
     if (expressions.filter_info)
     {
-        LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info", (void*)this);
-
         if (!expressions.prewhere_info)
         {
             const bool does_storage_support_prewhere = !input_pipe && storage && storage->supportsPrewhere();
-            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info 1 - does_storage_support_prewhere {} shouldMoveToPrewhere() {}",
-                (void*)this, does_storage_support_prewhere, shouldMoveToPrewhere());
             if (does_storage_support_prewhere && shouldMoveToPrewhere())
             {
-                LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info 1.5", (void*)this);
                 /// Execute row level filter in prewhere as a part of "move to prewhere" optimization.
                 expressions.prewhere_info = std::make_shared<PrewhereInfo>(
                     std::move(expressions.filter_info->actions),
@@ -2097,21 +2067,12 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
         }
         else
         {
-            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {}, expressions.filter_info 2", (void*)this);
             /// Add row level security actions to prewhere.
             expressions.prewhere_info->row_level_filter = std::move(expressions.filter_info->actions);
             expressions.prewhere_info->row_level_column_name = std::move(expressions.filter_info->column_name);
             expressions.prewhere_info->row_level_filter->projectInput(false);
             expressions.filter_info = nullptr;
         }
-        if (expressions.prewhere_info)
-        {
-            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " {} dump: {}", (void*)this, expressions.prewhere_info->dump());
-        }
-        else
-        {
-            LOG_TRACE(&Poco::Logger::get("addPrewhereAliasActions"), " no prewhere_info");
-        }
     }
 
     auto & prewhere_info = analysis_result.prewhere_info;
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 927c8fb7440..a586997360a 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -312,7 +312,7 @@ public:
             const auto & primary_key = storage_snapshot->metadata->getPrimaryKey();
             const Names & primary_key_column_names = primary_key.column_names;
             KeyCondition key_condition(filter, context, primary_key_column_names, primary_key.expression, NameSet{});
-            LOG_DEBUG(log, "ReadFromPart (MergeTreeSequentialSource) Key condition: {}", key_condition.toString());
+            LOG_DEBUG(log, "Key condition: {}", key_condition.toString());
 
             if (!key_condition.alwaysFalse())
                 mark_ranges = MergeTreeDataSelectExecutor::markRangesFromPKRange(
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index e1ff6cb1091..60ab9f86a2a 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -47,10 +47,6 @@
 
 #include <Common/logger_useful.h>
 
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-
-
 namespace
 {
 
@@ -270,7 +266,6 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
     /// (see removeJoin())
     ///
     /// And for this we need to return FetchColumns.
-    LOG_TRACE(&Poco::Logger::get("StorageMerge::getQueryProcessingStage"), "to_stage {}", to_stage);
 
     if (const auto * select = query_info.query->as<ASTSelectQuery>(); select && hasJoin(*select))
         return QueryProcessingStage::FetchColumns;
@@ -295,7 +290,6 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
                     stage_in_source_tables,
                     table->getQueryProcessingStage(local_context, to_stage,
                         table->getStorageSnapshot(table->getInMemoryMetadataPtr(), local_context), query_info));
-                LOG_TRACE(&Poco::Logger::get("StorageMerge::getQueryProcessingStage"), "stage_in_source_tables {}", stage_in_source_tables);
             }
 
             iterator->next();
@@ -322,9 +316,6 @@ void StorageMerge::read(
     auto modified_context = Context::createCopy(local_context);
     // modified_context->setSetting("optimize_move_to_prewhere", false);
 
-    LOG_TRACE(&Poco::Logger::get("StorageMerge::read"), "processed_stage {}", QueryProcessingStage::toString(processed_stage));
-
-
     bool has_database_virtual_column = false;
     bool has_table_virtual_column = false;
     Names real_column_names;
@@ -339,7 +330,6 @@ void StorageMerge::read(
         else
         {
             real_column_names.push_back(column_name);
-            LOG_TRACE(&Poco::Logger::get("StorageMerge::read"), "column_name {}", column_name);
         }
     }
 
@@ -494,7 +484,6 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
             bool with_aliases = common_processed_stage == QueryProcessingStage::FetchColumns && !storage_columns.getAliases().empty();
             if (with_aliases)
             {
-                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"), "with_aliases");
                 ASTPtr required_columns_expr_list = std::make_shared<ASTExpressionList>();
                 ASTPtr column_expr;
 
@@ -534,8 +523,6 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
             }
         }
 
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"), "table name: {}", storage->getStorageID().getTableName());
-
         auto source_pipeline = createSources(
             nested_storage_snaphsot,
             modified_query_info,
@@ -668,8 +655,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         storage_snapshot,
         modified_query_info);
 
-#pragma GCC diagnostic ignored "-Wunreachable-code"
-#pragma GCC diagnostic ignored "-Wunused-variable"
     if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns))
     {
         /// If there are only virtual columns in query, you must request at least one other column.
@@ -679,12 +664,9 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         QueryPlan & plan = child_plans.emplace_back();
 
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
-        bool direct_read = false;
 
         if (!view || allow_experimental_analyzer)
-        // if (!view ||  allow_experimental_analyzer)
         {
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "direct storage->read");
             storage->read(plan,
                 real_column_names,
                 storage_snapshot,
@@ -693,28 +675,45 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 processed_stage,
                 max_block_size,
                 UInt32(streams_num));
-            direct_read = true;
+
+
+            if (!plan.isInitialized())
+                return {};
+
+            if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
+            {
+                auto row_policy_filter = modified_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
+
+                if (row_policy_filter)
+                {
+                    ASTPtr expr = row_policy_filter->expression;
+
+                    auto syntax_result = TreeRewriter(modified_context).analyze(expr, header.getNamesAndTypesList());
+                    auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, modified_context};
+
+                    auto filter_dag_ptr = expression_analyzer.getActionsDAG(true, false);
+                    auto filter_actions = std::make_shared<ExpressionActions>(filter_dag_ptr, ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes));
+                    auto required_columns = filter_actions->getRequiredColumns();
+
+                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
+                        filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
+
+                    auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
+                    std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
+
+                    Names required_columns_sorted = required_columns;
+                    std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
+
+                    Names filter_columns;
+
+                    std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
+                        required_columns.begin(), required_columns.end(),
+                        std::inserter(filter_columns, filter_columns.begin()));
+
+                    source_step_with_filter->addFilter(filter_dag_ptr, filter_columns.front());
+                }
+            }
         }
-        // else if (!view)
-        // {
-        //     /// For view storage, we need to rewrite the `modified_query_info.view_query` to optimize read.
-        //     /// The most intuitive way is to use InterpreterSelectQuery.
-
-        //     /// Intercept the settings
-        //     modified_context->setSetting("max_threads", streams_num);
-        //     modified_context->setSetting("max_streams_to_max_threads_ratio", 1);
-        //     modified_context->setSetting("max_block_size", max_block_size);
-
-        //     LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "creating InterpreterSelectQuery 1.0");
-        //     InterpreterSelectQuery interpreter(modified_query_info.query,
-        //         modified_context,
-        //         storage,
-        //         storage->getInMemoryMetadataPtr(), // view->getInMemoryMetadataPtr(),
-        //         // SelectQueryOptions(/* processed_stage*/));
-        //         SelectQueryOptions(processed_stage));
-        //         // SelectQueryOptions(QueryProcessingStage::WithMergeableState));
-        //     interpreter.buildQueryPlan(plan);
-        // }
         else
         {
             /// For view storage, we need to rewrite the `modified_query_info.view_query` to optimize read.
@@ -725,90 +724,25 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             modified_context->setSetting("max_streams_to_max_threads_ratio", 1);
             modified_context->setSetting("max_block_size", max_block_size);
 
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "creating InterpreterSelectQuery 1.5");
             InterpreterSelectQuery interpreter(modified_query_info.query,
                 modified_context,
                 storage,
                 view->getInMemoryMetadataPtr(),
                 SelectQueryOptions(processed_stage));
             interpreter.buildQueryPlan(plan);
-        }
 
-        if (!plan.isInitialized())
-            return {};
+            if (!plan.isInitialized())
+                return {};
+        }
 
         if (auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(plan.getRootNode()->step.get()))
         {
             size_t filters_dags_size = filter_dags.size();
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "ReadFromMergeTree detected, DAG size {}", filters_dags_size);
             for (size_t i = 0; i < filters_dags_size; ++i)
             {
-                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "adding filter {}", filter_dags[i]->dumpDAG());
                 read_from_merge_tree->addFilter(filter_dags[i], filter_nodes.nodes[i]);
             }
         }
-        if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
-        {
-            auto row_policy_filter = modified_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
-
-            if (row_policy_filter)
-            {
-                // row_policy_filter->expression
-                // auto pipe_columns = builder.getHeader().getNamesAndTypesList();
-                ASTPtr expr = row_policy_filter->expression;
-
-                // auto * select_ast = expr /* query_ast */ ->as<ASTSelectQuery>();
-                // assert(select_ast);
-
-                // select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
-                // auto expr_list = select_ast->select();
-                // expr_list->children.push_back(expr);
-                // String filter_column_name = expr_list->children.at(0)->getColumnName();
-                // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column_name: {} ", filter_column_name);
-
-                auto syntax_result = TreeRewriter(modified_context).analyze(expr, header/*builder->getHeader().*/.getNamesAndTypesList() /* pipe_columns*/);
-                // auto syntax_result = TreeRewriter(local_context).analyze(expr, NamesAndTypesList());
-                auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, modified_context};
-
-                auto filter_dag_ptr = expression_analyzer.getActionsDAG(true, false);
-
-
-                auto filter_actions = std::make_shared<ExpressionActions>(filter_dag_ptr, ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes));
-                auto required_columns = filter_actions->getRequiredColumns();
-
-                for (const auto & req_col : required_columns)
-                {
-                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "req_col: {}", req_col);
-                }
-
-
-                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
-                    filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
-
-
-                auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
-                std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
-
-                Names required_columns_sorted = required_columns;
-                std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
-
-                Names filter_columns;
-
-
-                std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
-                    required_columns.begin(), required_columns.end(),
-                    std::inserter(filter_columns, filter_columns.begin()));
-
-
-                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "SourceStepWithFilter detected");
-                auto found_column = filter_dag_ptr->tryFindInOutputs(filter_columns.front());
-                assert(found_column);
-
-                // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "found column {}", found_column->dumpDAG());
-
-                source_step_with_filter->addFilter(/* filter_actions */  filter_dag_ptr, filter_columns.front());
-            }
-        }
         builder = plan.buildQueryPipeline(
             QueryPlanOptimizationSettings::fromContext(modified_context),
             BuildQueryPipelineSettings::fromContext(modified_context));
@@ -834,7 +768,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         {
             modified_select.replaceDatabaseAndTable(database_name, table_name);
             /// TODO: Find a way to support projections for StorageMerge
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "creating InterpreterSelectQuery 2");
             InterpreterSelectQuery interpreter{modified_query_info.query,
                 modified_context,
                 SelectQueryOptions(processed_stage).ignoreProjections()};
@@ -1114,16 +1047,6 @@ void ReadFromMerge::convertingSourceStream(
         convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position;
 
 
-    for (const auto & column_with_type_and_name : builder.getHeader().getColumnsWithTypeAndName())
-    {
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "column name: {} (builder.getHeader().getColumnsWithTypeAndName())", column_with_type_and_name.name);
-    }
-
-    for (const auto & column_with_type_and_name : header.getColumnsWithTypeAndName())
-    {
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "column name: {} (header.getColumnsWithTypeAndName())", column_with_type_and_name.name);
-    }
-
     auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(),
                                                                 header.getColumnsWithTypeAndName(),
                                                                 convert_actions_match_columns_mode);
@@ -1147,36 +1070,14 @@ void ReadFromMerge::convertingSourceStream(
         if (row_policy_filter)
         {
 
-            // row_policy_filter->expression
-            // auto pipe_columns = builder.getHeader().getNamesAndTypesList();
-
-
             ASTPtr expr = row_policy_filter->expression;
 
-            // auto * select_ast = expr /* query_ast */ ->as<ASTSelectQuery>();
-            // assert(select_ast);
-
-            // select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
-            // auto expr_list = select_ast->select();
-            // expr_list->children.push_back(expr);
-            // String filter_column_name = expr_list->children.at(0)->getColumnName();
-            // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column_name: {} ", filter_column_name);
-
             auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
-            // auto syntax_result = TreeRewriter(local_context).analyze(expr, NamesAndTypesList());
             auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, local_context};
 
             auto actions_dag = expression_analyzer.getActionsDAG(true, false);
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "actions_dag: {},<> {}", actions_dag->dumpNames(), actions_dag->dumpDAG());
-
-
             auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
             auto required_columns = filter_actions->getRequiredColumns();
-            for (const auto & req_col : required_columns)
-            {
-                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "req_col: {}", req_col);
-            }
-
 
             LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
                 filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
@@ -1195,32 +1096,11 @@ void ReadFromMerge::convertingSourceStream(
                 required_columns.begin(), required_columns.end(),
                 std::inserter(filter_columns, filter_columns.begin()));
 
-            for (const auto & filter_column : filter_columns)
-            {
-                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_column: {}", filter_column);
-            }
-
-            // Block block;
-            // block = filter_actions->getActionsDAG().updateHeader(std::move(block));
-            // LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "block from updateHeader {}", block.dumpStructure());
-
             builder.addSimpleTransform([&](const Block & stream_header)
             {
-                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "stream_header {}", stream_header.dumpStructure());
                 return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_columns.front(), true /* remove fake column */);
             });
-
-
-            // auto row_level_filter_step = std::make_unique<FilterStep>(
-            //     query_plan.getCurrentDataStream(),
-            //     expressions.prewhere_info->row_level_filter,
-            //     expressions.prewhere_info->row_level_column_name,
-            //     true);
-
-            // row_level_filter_step->setStepDescription("Row-level security filter (PREWHERE)");
-            // query_plan.addStep(std::move(row_level_filter_step));
         }
-
     }
 }
 

From e2ddf40cfd6fde71cc5ef3075f2e8190dd408353 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 14 Jun 2023 21:36:20 +0000
Subject: [PATCH 0070/1097] merge_row_policy: further cleanup

---
 src/Storages/StorageMerge.cpp | 72 ++++++++++++++---------------------
 1 file changed, 29 insertions(+), 43 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 60ab9f86a2a..483f592f819 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -297,7 +297,6 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
     }
 
     return selected_table_size == 1 ? stage_in_source_tables : std::min(stage_in_source_tables, QueryProcessingStage::WithMergeableState);
-    // return QueryProcessingStage::Complete;
 }
 
 void StorageMerge::read(
@@ -314,7 +313,6 @@ void StorageMerge::read(
       * since there is no certainty that it works when one of table is MergeTree and other is not.
       */
     auto modified_context = Context::createCopy(local_context);
-    // modified_context->setSetting("optimize_move_to_prewhere", false);
 
     bool has_database_virtual_column = false;
     bool has_table_virtual_column = false;
@@ -328,9 +326,7 @@ void StorageMerge::read(
         else if (column_name == "_table" && isVirtualColumn(column_name, storage_snapshot->metadata))
             has_table_virtual_column = true;
         else
-        {
             real_column_names.push_back(column_name);
-        }
     }
 
     StorageListWithLocks selected_tables
@@ -359,7 +355,6 @@ void StorageMerge::read(
     query_plan.addInterpreterContext(modified_context);
 
     /// What will be result structure depending on query processed stage in source tables?
-    // Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, QueryProcessingStage::Complete /* processed_stage */);
     Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage);
 
     auto step = std::make_unique<ReadFromMerge>(
@@ -664,7 +659,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         QueryPlan & plan = child_plans.emplace_back();
 
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
-
         if (!view || allow_experimental_analyzer)
         {
             storage->read(plan,
@@ -1046,7 +1040,6 @@ void ReadFromMerge::convertingSourceStream(
     if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)
         convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position;
 
-
     auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(),
                                                                 header.getColumnsWithTypeAndName(),
                                                                 convert_actions_match_columns_mode);
@@ -1060,47 +1053,40 @@ void ReadFromMerge::convertingSourceStream(
     });
 
 
-    bool explicit_row_policy_filter_needed = true;
+    auto row_policy_filter = local_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
 
-    if (explicit_row_policy_filter_needed)
+    if (row_policy_filter)
     {
+        ASTPtr expr = row_policy_filter->expression;
 
-        auto row_policy_filter = local_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
+        auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
+        auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, local_context};
 
-        if (row_policy_filter)
+        auto actions_dag = expression_analyzer.getActionsDAG(true, false);
+        auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+        auto required_columns = filter_actions->getRequiredColumns();
+
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
+            filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
+
+
+        auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
+        std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
+
+        Names required_columns_sorted = required_columns;
+        std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
+
+        Names filter_columns;
+
+
+        std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
+            required_columns.begin(), required_columns.end(),
+            std::inserter(filter_columns, filter_columns.begin()));
+
+        builder.addSimpleTransform([&](const Block & stream_header)
         {
-
-            ASTPtr expr = row_policy_filter->expression;
-
-            auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
-            auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, local_context};
-
-            auto actions_dag = expression_analyzer.getActionsDAG(true, false);
-            auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
-            auto required_columns = filter_actions->getRequiredColumns();
-
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
-                filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
-
-
-            auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
-            std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
-
-            Names required_columns_sorted = required_columns;
-            std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
-
-            Names filter_columns;
-
-
-            std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
-                required_columns.begin(), required_columns.end(),
-                std::inserter(filter_columns, filter_columns.begin()));
-
-            builder.addSimpleTransform([&](const Block & stream_header)
-            {
-                return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_columns.front(), true /* remove fake column */);
-            });
-        }
+            return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_columns.front(), true /* remove fake column */);
+        });
     }
 }
 

From a447b3f9e0576b76895c8840da17191d68531f58 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Sun, 25 Jun 2023 21:22:58 +0000
Subject: [PATCH 0071/1097] merge_row_policy: more tests

---
 .../02763_row_policy_storage_merge.reference  | 254 ++++++++++--------
 .../02763_row_policy_storage_merge.sql.j2     |  44 ++-
 2 files changed, 182 insertions(+), 116 deletions(-)

diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
index 9dcc5f449ab..444513c6c20 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
@@ -1,138 +1,180 @@
-1
-1
-1
-1
-2
-2
-2
-2
-3
-3
-3
-3
-4
-4
-4
-4
-1
-2
-3
-4
+1	11
+1	11
+1	11
+1	11
+2	12
+2	12
+2	12
+2	12
+3	13
+3	13
+3	13
+3	13
+4	14
+4	14
+4	14
+4	14
+1	11
+2	12
+3	13
+4	14
 SETTINGS optimize_move_to_prewhere= 0
 SELECT * FROM 02763_merge_log_1
-3
+3	13
 SELECT * FROM merge(currentDatabase(), 02763_merge_log_1)
-3
+3	13
 SELECT * FROM merge(currentDatabase(), 02763_merge_log)
-1
-2
-3
-3
-4
+1	11
+2	12
+3	13
+3	13
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>2
-3
-3
-4
+3	13
+3	13
+4	14
 SELECT * FROM 02763_merge_merge_1
-4
-SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1
-4
+4	14
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1)
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge_merge)
-1
-2
-3
-4
-4
+1	11
+2	12
+3	13
+4	14
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>2
-3
-4
-4
+3	13
+4	14
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge)
-1
-1
-2
-2
-3
-3
-3
-4
-4
-4
+1	11
+1	11
+2	12
+2	12
+3	13
+3	13
+3	13
+4	14
+4	14
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge) WHERE x>2
+3	13
+3	13
+3	13
+4	14
+4	14
+4	14
+aaa	6	39
+aaa	6	39
+aaa	6	39
+aaa	8	42
+aaa	8	42
+aaa	8	42
 3
 3
 3
 4
 4
 4
-aaa	6
-aaa	6
-aaa	6
-aaa	8
-aaa	8
-aaa	8
-3
-3
-3
-4
-4
-4
+SELECT * FROM merge(...) LEFT JOIN merge(...)
+3	13	13
+3	13	13
+4	14	14
+4	14	14
+SELECT * FROM merge(...) UNION ALL SELECT * FROM merge(...)
+1	11
+1	11
+2	12
+2	12
+3	13
+3	13
+3	13
+4	14
+4	14
+4	14
+SELECT x, SUM(x) FROM (SELECT * FROM merge(...) UNION ALL ...) GROUP BY x
+1	22
+2	24
+3	39
+4	42
 SETTINGS optimize_move_to_prewhere= 1
 SELECT * FROM 02763_merge_log_1
-3
+3	13
 SELECT * FROM merge(currentDatabase(), 02763_merge_log_1)
-3
+3	13
 SELECT * FROM merge(currentDatabase(), 02763_merge_log)
-1
-2
-3
-3
-4
+1	11
+2	12
+3	13
+3	13
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>2
-3
-3
-4
+3	13
+3	13
+4	14
 SELECT * FROM 02763_merge_merge_1
-4
-SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1
-4
+4	14
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1)
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge_merge)
-1
-2
-3
-4
-4
+1	11
+2	12
+3	13
+4	14
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>2
-3
-4
-4
+3	13
+4	14
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge)
-1
-1
-2
-2
-3
-3
-3
-4
-4
-4
+1	11
+1	11
+2	12
+2	12
+3	13
+3	13
+3	13
+4	14
+4	14
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge) WHERE x>2
+3	13
+3	13
+3	13
+4	14
+4	14
+4	14
+aaa	6	39
+aaa	6	39
+aaa	6	39
+aaa	8	42
+aaa	8	42
+aaa	8	42
 3
 3
 3
 4
 4
 4
-aaa	6
-aaa	6
-aaa	6
-aaa	8
-aaa	8
-aaa	8
-3
-3
-3
-4
-4
-4
+SELECT * FROM merge(...) LEFT JOIN merge(...)
+3	13	13
+3	13	13
+4	14	14
+4	14	14
+SELECT * FROM merge(...) UNION ALL SELECT * FROM merge(...)
+1	11
+1	11
+2	12
+2	12
+3	13
+3	13
+3	13
+4	14
+4	14
+4	14
+SELECT x, SUM(x) FROM (SELECT * FROM merge(...) UNION ALL ...) GROUP BY x
+1	22
+2	24
+3	39
+4	42
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
index 33b02275d4a..b5094f927f4 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -6,16 +6,16 @@ DROP ROW POLICY IF EXISTS 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY IF EXISTS 02763_filter_2 ON 02763_merge_merge_1;
 
 
-CREATE TABLE 02763_merge_log_1 (x UInt8) ENGINE = Log;
-CREATE TABLE 02763_merge_log_2 (x UInt8) ENGINE = Log;
+CREATE TABLE 02763_merge_log_1 (x UInt8, y UInt64) ENGINE = Log;
+CREATE TABLE 02763_merge_log_2 (x UInt8, y UInt64) ENGINE = Log;
 
-CREATE TABLE 02763_merge_merge_1 (x UInt8) ENGINE = MergeTree ORDER BY x;
-CREATE TABLE 02763_merge_merge_2 (x UInt8) ENGINE = MergeTree ORDER BY x;
+CREATE TABLE 02763_merge_merge_1 (x UInt8, y UInt64) ENGINE = MergeTree ORDER BY x;
+CREATE TABLE 02763_merge_merge_2 (x UInt8, y UInt64) ENGINE = MergeTree ORDER BY x;
 
-INSERT INTO 02763_merge_log_1 VALUES (1), (2), (3), (4);
-INSERT INTO 02763_merge_log_2 VALUES (1), (2), (3), (4);
-INSERT INTO 02763_merge_merge_1 VALUES (1), (2), (3), (4);
-INSERT INTO 02763_merge_merge_2 VALUES (1), (2), (3), (4);
+INSERT INTO 02763_merge_log_1 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
+INSERT INTO 02763_merge_log_2 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
+INSERT INTO 02763_merge_merge_1 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
+INSERT INTO 02763_merge_merge_2 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
 
 SELECT * FROM merge(currentDatabase(), '02763_merge') ORDER BY x;
 
@@ -41,7 +41,7 @@ CREATE ROW POLICY 02763_filter_2 ON 02763_merge_merge_1 USING x=4 AS permissive
 
 SELECT 'SELECT * FROM 02763_merge_merge_1';
 SELECT * FROM 02763_merge_merge_1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
-SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1';
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge_1)';
 SELECT * FROM merge(currentDatabase(), '02763_merge_merge_1') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge)';
 SELECT * FROM merge(currentDatabase(), '02763_merge_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
@@ -54,9 +54,33 @@ SELECT * FROM merge(currentDatabase(), '02763_merge') ORDER BY x SETTINGS optimi
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge) WHERE x>2';
 SELECT * FROM merge(currentDatabase(), '02763_merge') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
-SELECT 'aaa', x*2 as x_2 FROM merge(currentDatabase(), '02763_merge') WHERE x>2 ORDER BY x_2 SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'aaa', x*2 as x_2, y*3 as y_3 FROM merge(currentDatabase(), '02763_merge') WHERE x>2 ORDER BY x_2 SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT x FROM (SELECT * FROM merge(currentDatabase(), '02763_merge') WHERE x IN (3,4)) ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
+SELECT 'SELECT * FROM merge(...) LEFT JOIN merge(...)';
+SELECT * FROM merge(currentDatabase(), '02763_merge.*1') as a
+LEFT JOIN
+merge(currentDatabase(), '02763_merge.*2') as b
+USING (x)
+ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+SELECT 'SELECT * FROM merge(...) UNION ALL SELECT * FROM merge(...)';
+SELECT * FROM
+(
+SELECT * FROM merge(currentDatabase(), '02763_merge.*1')
+UNION ALL
+SELECT * FROM merge(currentDatabase(), '02763_merge.*2')
+)
+ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+SELECT 'SELECT x, SUM(x) FROM (SELECT * FROM merge(...) UNION ALL ...) GROUP BY x';
+SELECT x, SUM(y) FROM
+(SELECT * FROM merge(currentDatabase(), '02763_merge.*1')
+UNION ALL
+SELECT * FROM merge(currentDatabase(), '02763_merge.*2'))
+GROUP BY x
+ORDER BY x;
+
 DROP ROW POLICY 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY 02763_filter_2 ON 02763_merge_merge_1;
 

From f4c77c1f1827c639aefc8e12cac78080b8f380cc Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Tue, 11 Jul 2023 11:46:37 +0000
Subject: [PATCH 0072/1097] merge_row_policy: namesDifference, try to handle
 nonselected columns

---
 src/Storages/StorageMerge.cpp                 | 75 ++++++++++++-------
 .../02763_row_policy_storage_merge.reference  | 54 ++++++++++++-
 .../02763_row_policy_storage_merge.sql.j2     | 23 +++++-
 3 files changed, 118 insertions(+), 34 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 483f592f819..ce1fdece231 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -66,6 +66,26 @@ bool columnDefaultKindHasSameType(ColumnDefaultKind lhs, ColumnDefaultKind rhs)
     return false;
 }
 
+std::string namesDifference(Names && outer_set, Names && inner_set)
+{
+    std::sort(outer_set.begin(), outer_set.end());
+
+    std::sort(inner_set.begin(), inner_set.end());
+
+    Names result;
+
+    std::set_difference(outer_set.begin(), outer_set.end(),
+        inner_set.begin(), inner_set.end(),  std::inserter(result, result.begin()));
+
+    if (result.size() != 1)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "Cannot determine row level filter");
+    }
+
+    return result.front();
+}
+
 }
 
 namespace DB
@@ -682,29 +702,22 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 {
                     ASTPtr expr = row_policy_filter->expression;
 
-                    auto syntax_result = TreeRewriter(modified_context).analyze(expr, header.getNamesAndTypesList());
+                    auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
+                    auto storage_columns = storage_metadata_snapshot->getColumns();
+                    auto needed_columns = storage_columns.getAllPhysical(); // header.getNamesAndTypesList()
+
+                    auto syntax_result = TreeRewriter(modified_context).analyze(expr, needed_columns);
                     auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, modified_context};
 
-                    auto filter_dag_ptr = expression_analyzer.getActionsDAG(true, false);
-                    auto filter_actions = std::make_shared<ExpressionActions>(filter_dag_ptr, ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes));
+                    auto actions_dag = expression_analyzer.getActionsDAG(true, false);
+                    auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes));
                     auto required_columns = filter_actions->getRequiredColumns();
 
-                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
+                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
                         filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
 
-                    auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
-                    std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
-
-                    Names required_columns_sorted = required_columns;
-                    std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
-
-                    Names filter_columns;
-
-                    std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
-                        required_columns.begin(), required_columns.end(),
-                        std::inserter(filter_columns, filter_columns.begin()));
-
-                    source_step_with_filter->addFilter(filter_dag_ptr, filter_columns.front());
+                    auto filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
+                    source_step_with_filter->addFilter(actions_dag, filter_column_name);
                 }
             }
         }
@@ -1059,33 +1072,39 @@ void ReadFromMerge::convertingSourceStream(
     {
         ASTPtr expr = row_policy_filter->expression;
 
-        auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
+        auto storage_columns = metadata_snapshot->getColumns();
+        auto needed_columns = storage_columns.getAllPhysical(); // header.getNamesAndTypesList()
+
+
+        auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns /* pipe_columns */);
         auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, local_context};
 
         auto actions_dag = expression_analyzer.getActionsDAG(true, false);
         auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
-        auto required_columns = filter_actions->getRequiredColumns();
 
         LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
             filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
 
 
-        auto fa_actions_columns_sorted = filter_actions->getSampleBlock().getNames();
-        std::sort(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end());
 
-        Names required_columns_sorted = required_columns;
-        std::sort(required_columns_sorted.begin(), required_columns_sorted.end());
+        for (auto & colname : filter_actions->getSampleBlock().getNames())
+        {
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions->getSampleBlock().getNames(): {}", colname);
+        }
 
-        Names filter_columns;
+        for (auto & colname : filter_actions->getRequiredColumns())
+        {
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions->getRequiredColumns(): {}", colname);
+        }
 
 
-        std::set_difference(fa_actions_columns_sorted.begin(), fa_actions_columns_sorted.end(),
-            required_columns.begin(), required_columns.end(),
-            std::inserter(filter_columns, filter_columns.begin()));
+
+        auto filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
+
 
         builder.addSimpleTransform([&](const Block & stream_header)
         {
-            return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_columns.front(), true /* remove fake column */);
+            return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_column_name, true /* remove fake column */);
         });
     }
 }
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
index 444513c6c20..55890a11783 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
@@ -1,3 +1,9 @@
+SELECT * FROM 02763_merge_log_1 ORDER BY x
+1	11
+2	12
+3	13
+4	14
+SELECT * FROM merge(currentDatabase(), 02763_merge) ORDER BY x
 1	11
 1	11
 1	11
@@ -14,10 +20,6 @@
 4	14
 4	14
 4	14
-1	11
-2	12
-3	13
-4	14
 SETTINGS optimize_move_to_prewhere= 0
 SELECT * FROM 02763_merge_log_1
 3	13
@@ -98,6 +100,28 @@ SELECT x, SUM(x) FROM (SELECT * FROM merge(...) UNION ALL ...) GROUP BY x
 2	24
 3	39
 4	42
+1	11	0
+2	12	0
+3	13	0
+4	14	1
+4	14	1
+SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>1 -- with y>12
+2	12
+3	13
+3	13
+4	14
+4	14
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>1 -- with y>12
+2	12
+3	13
+3	13
+4	14
+4	14
+2	12	0
+3	13	1
+3	13	1
+4	14	1
+4	14	1
 SETTINGS optimize_move_to_prewhere= 1
 SELECT * FROM 02763_merge_log_1
 3	13
@@ -178,3 +202,25 @@ SELECT x, SUM(x) FROM (SELECT * FROM merge(...) UNION ALL ...) GROUP BY x
 2	24
 3	39
 4	42
+1	11	0
+2	12	0
+3	13	0
+4	14	1
+4	14	1
+SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>1 -- with y>12
+2	12
+3	13
+3	13
+4	14
+4	14
+SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>1 -- with y>12
+2	12
+3	13
+3	13
+4	14
+4	14
+2	12	0
+3	13	1
+3	13	1
+4	14	1
+4	14	1
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
index b5094f927f4..94872dfd7b6 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -17,10 +17,12 @@ INSERT INTO 02763_merge_log_2 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
 INSERT INTO 02763_merge_merge_1 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
 INSERT INTO 02763_merge_merge_2 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
 
-SELECT * FROM merge(currentDatabase(), '02763_merge') ORDER BY x;
-
+SELECT 'SELECT * FROM 02763_merge_log_1 ORDER BY x';
 SELECT * FROM 02763_merge_log_1 ORDER BY x;
 
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge) ORDER BY x';
+SELECT * FROM merge(currentDatabase(), '02763_merge') ORDER BY x;
+
 
 {% for prew in [0 , 1] -%}
 
@@ -81,7 +83,24 @@ SELECT * FROM merge(currentDatabase(), '02763_merge.*2'))
 GROUP BY x
 ORDER BY x;
 
+SELECT *, x=4 FROM merge(currentDatabase(), '02763_merge_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+
+CREATE ROW POLICY 02763_filter_3 ON 02763_merge_log_1 USING y>12 AS permissive TO ALL;
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>1 -- with y>12';
+SELECT * FROM merge(currentDatabase(), '02763_merge_log') WHERE x>1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+CREATE ROW POLICY 02763_filter_4 ON 02763_merge_merge_1 USING y>12 AS permissive TO ALL;
+SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>1 -- with y>12';
+SELECT * FROM merge(currentDatabase(), '02763_merge_merge') WHERE x>1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+SELECT *, (x=4 OR y>12) FROM merge(currentDatabase(), '02763_merge_merge') WHERE x>1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+
 DROP ROW POLICY 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY 02763_filter_2 ON 02763_merge_merge_1;
 
+DROP ROW POLICY 02763_filter_3 ON 02763_merge_log_1;
+DROP ROW POLICY 02763_filter_4 ON 02763_merge_merge_1;
+
 {% endfor %}

From b57d8bc4a91eddcc41469cc21f983237797fa272 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Tue, 11 Jul 2023 21:16:54 +0000
Subject: [PATCH 0073/1097] merge_row_policy: works again with adding missed
 columns

---
 src/Storages/StorageMerge.cpp | 119 +++++++++++++++++++++-------------
 1 file changed, 73 insertions(+), 46 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index ce1fdece231..920604c876a 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -16,6 +16,7 @@
 #include <Interpreters/getHeaderForProcessingStage.h>
 #include <Interpreters/addTypeConversionToAST.h>
 #include <Interpreters/replaceAliasColumnsInQuery.h>
+#include <Interpreters/RequiredSourceColumnsVisitor.h>
 #include <Planner/Utils.h>
 #include <Analyzer/Utils.h>
 #include <Analyzer/ConstantNode.h>
@@ -681,6 +682,29 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
         if (!view || allow_experimental_analyzer)
         {
+
+            auto row_policy_filter = modified_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
+            if (row_policy_filter)
+            {
+                ASTPtr expr = row_policy_filter->expression;
+
+                RequiredSourceColumnsVisitor::Data columns_context;
+                RequiredSourceColumnsVisitor(columns_context).visit(expr);
+
+                auto req_columns = columns_context.requiredColumns();
+                for (const auto & req_column : req_columns)
+                {
+                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "req.column: {}", req_column);
+                    std::sort(real_column_names.begin(), real_column_names.end());
+
+                    if (!std::binary_search(real_column_names.begin(), real_column_names.end(), req_column))
+                    {
+                        real_column_names.push_back(req_column);
+                    }
+                }
+            }
+
+
             storage->read(plan,
                 real_column_names,
                 storage_snapshot,
@@ -694,13 +718,12 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             if (!plan.isInitialized())
                 return {};
 
-            if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
-            {
-                auto row_policy_filter = modified_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
 
-                if (row_policy_filter)
+            if (row_policy_filter)
+            {
+                ASTPtr expr = row_policy_filter->expression;
+                if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
                 {
-                    ASTPtr expr = row_policy_filter->expression;
 
                     auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
                     auto storage_columns = storage_metadata_snapshot->getColumns();
@@ -713,12 +736,13 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                     auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes));
                     auto required_columns = filter_actions->getRequiredColumns();
 
-                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
+                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "filter_actions_dag: {},<> {}, <> {}",
                         filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
 
                     auto filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
                     source_step_with_filter->addFilter(actions_dag, filter_column_name);
                 }
+
             }
         }
         else
@@ -1026,46 +1050,6 @@ void ReadFromMerge::convertingSourceStream(
     const String & database_name,
     const String & table_name)
 {
-    Block before_block_header = builder.getHeader();
-
-    auto storage_sample_block = metadata_snapshot->getSampleBlock();
-    auto pipe_columns = builder.getHeader().getNamesAndTypesList();
-
-    for (const auto & alias : aliases)
-    {
-        pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type));
-        ASTPtr expr = alias.expression;
-        auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
-        auto expression_analyzer = ExpressionAnalyzer{alias.expression, syntax_result, local_context};
-
-        auto dag = std::make_shared<ActionsDAG>(pipe_columns);
-        auto actions_dag = expression_analyzer.getActionsDAG(true, false);
-        auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
-
-        builder.addSimpleTransform([&](const Block & stream_header)
-        {
-            return std::make_shared<ExpressionTransform>(stream_header, actions);
-        });
-    }
-
-    ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name;
-
-    if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)
-        convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position;
-
-    auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(),
-                                                                header.getColumnsWithTypeAndName(),
-                                                                convert_actions_match_columns_mode);
-    auto actions = std::make_shared<ExpressionActions>(
-        std::move(convert_actions_dag),
-        ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
-
-    builder.addSimpleTransform([&](const Block & stream_header)
-    {
-        return std::make_shared<ExpressionTransform>(stream_header, actions);
-    });
-
-
     auto row_policy_filter = local_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
 
     if (row_policy_filter)
@@ -1107,6 +1091,49 @@ void ReadFromMerge::convertingSourceStream(
             return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_column_name, true /* remove fake column */);
         });
     }
+
+
+
+    Block before_block_header = builder.getHeader();
+
+    auto storage_sample_block = metadata_snapshot->getSampleBlock();
+    auto pipe_columns = builder.getHeader().getNamesAndTypesList();
+
+    for (const auto & alias : aliases)
+    {
+        pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type));
+        ASTPtr expr = alias.expression;
+        auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns);
+        auto expression_analyzer = ExpressionAnalyzer{alias.expression, syntax_result, local_context};
+
+        auto dag = std::make_shared<ActionsDAG>(pipe_columns);
+        auto actions_dag = expression_analyzer.getActionsDAG(true, false);
+        auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+
+        builder.addSimpleTransform([&](const Block & stream_header)
+        {
+            return std::make_shared<ExpressionTransform>(stream_header, actions);
+        });
+    }
+
+    ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name;
+
+    if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)
+        convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position;
+
+    auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(),
+                                                                header.getColumnsWithTypeAndName(),
+                                                                convert_actions_match_columns_mode);
+    auto actions = std::make_shared<ExpressionActions>(
+        std::move(convert_actions_dag),
+        ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+
+    builder.addSimpleTransform([&](const Block & stream_header)
+    {
+        return std::make_shared<ExpressionTransform>(stream_header, actions);
+    });
+
+
 }
 
 bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)

From b39a201cd0ea81adcbe97943913d961e80cca088 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 12 Jul 2023 14:42:32 +0000
Subject: [PATCH 0074/1097] merge_row_policy: refactored with
 ReadFromMerge::RowPolicyData

---
 src/Storages/StorageMerge.cpp                 | 246 ++++++++++--------
 src/Storages/StorageMerge.h                   |   5 +-
 .../02763_row_policy_storage_merge.reference  |  26 ++
 .../02763_row_policy_storage_merge.sql.j2     |   3 +-
 4 files changed, 173 insertions(+), 107 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 920604c876a..0b6968f78ce 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -67,26 +67,6 @@ bool columnDefaultKindHasSameType(ColumnDefaultKind lhs, ColumnDefaultKind rhs)
     return false;
 }
 
-std::string namesDifference(Names && outer_set, Names && inner_set)
-{
-    std::sort(outer_set.begin(), outer_set.end());
-
-    std::sort(inner_set.begin(), inner_set.end());
-
-    Names result;
-
-    std::set_difference(outer_set.begin(), outer_set.end(),
-        inner_set.begin(), inner_set.end(),  std::inserter(result, result.begin()));
-
-    if (result.size() != 1)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "Cannot determine row level filter");
-    }
-
-    return result.front();
-}
-
 }
 
 namespace DB
@@ -395,6 +375,28 @@ void StorageMerge::read(
     query_plan.addStep(std::move(step));
 }
 
+class ReadFromMerge::RowPolicyData
+{
+public:
+    bool needCare()
+    {
+        return static_cast<bool>(row_policy_filter_ptr);
+    }
+    void init(RowPolicyFilterPtr,
+        const std::shared_ptr<DB::IStorage>,
+        ContextPtr);
+    void extendNames(Names &);
+    void addStorageFilter(SourceStepWithFilter *);
+    void addFilterTransform(QueryPipelineBuilder &);
+private:
+    static std::string namesDifference(Names && outer_set, Names && inner_set);
+    RowPolicyFilterPtr row_policy_filter_ptr;
+    std::string filter_column_name;
+    ActionsDAGPtr actions_dag;
+    ExpressionActionsPtr filter_actions;
+};
+
+
 ReadFromMerge::ReadFromMerge(
     Block common_header_,
     StorageListWithLocks selected_tables_,
@@ -671,6 +673,8 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         storage_snapshot,
         modified_query_info);
 
+    RowPolicyData row_policy_data;
+
     if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns))
     {
         /// If there are only virtual columns in query, you must request at least one other column.
@@ -682,29 +686,15 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
         if (!view || allow_experimental_analyzer)
         {
+            row_policy_data.init(modified_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER),
+                storage,
+                modified_context);
 
-            auto row_policy_filter = modified_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
-            if (row_policy_filter)
+            if (row_policy_data.needCare())
             {
-                ASTPtr expr = row_policy_filter->expression;
-
-                RequiredSourceColumnsVisitor::Data columns_context;
-                RequiredSourceColumnsVisitor(columns_context).visit(expr);
-
-                auto req_columns = columns_context.requiredColumns();
-                for (const auto & req_column : req_columns)
-                {
-                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "req.column: {}", req_column);
-                    std::sort(real_column_names.begin(), real_column_names.end());
-
-                    if (!std::binary_search(real_column_names.begin(), real_column_names.end(), req_column))
-                    {
-                        real_column_names.push_back(req_column);
-                    }
-                }
+                row_policy_data.extendNames(real_column_names);
             }
 
-
             storage->read(plan,
                 real_column_names,
                 storage_snapshot,
@@ -714,35 +704,16 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 max_block_size,
                 UInt32(streams_num));
 
-
             if (!plan.isInitialized())
                 return {};
 
 
-            if (row_policy_filter)
+            if (row_policy_data.needCare())
             {
-                ASTPtr expr = row_policy_filter->expression;
                 if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
                 {
-
-                    auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
-                    auto storage_columns = storage_metadata_snapshot->getColumns();
-                    auto needed_columns = storage_columns.getAllPhysical(); // header.getNamesAndTypesList()
-
-                    auto syntax_result = TreeRewriter(modified_context).analyze(expr, needed_columns);
-                    auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, modified_context};
-
-                    auto actions_dag = expression_analyzer.getActionsDAG(true, false);
-                    auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes));
-                    auto required_columns = filter_actions->getRequiredColumns();
-
-                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "filter_actions_dag: {},<> {}, <> {}",
-                        filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
-
-                    auto filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
-                    source_step_with_filter->addFilter(actions_dag, filter_column_name);
+                    row_policy_data.addStorageFilter(source_step_with_filter);
                 }
-
             }
         }
         else
@@ -864,12 +835,119 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
 
-        convertingSourceStream(header, storage_snapshot->metadata, aliases, modified_context, *builder, processed_stage, database_name, table_name);
+        convertingSourceStream(header,
+            storage_snapshot->metadata,
+            aliases,
+            modified_context,
+            *builder,
+            processed_stage,
+            row_policy_data);
     }
 
     return builder;
 }
 
+void ReadFromMerge::RowPolicyData::init(RowPolicyFilterPtr row_policy_filter_ptr_,
+    const std::shared_ptr<DB::IStorage> storage,
+    ContextPtr local_context)
+{
+
+    if (row_policy_filter_ptr_)
+    {
+        row_policy_filter_ptr = row_policy_filter_ptr_;
+
+        ASTPtr expr = row_policy_filter_ptr->expression;
+
+        auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
+        auto storage_columns = storage_metadata_snapshot->getColumns();
+        auto needed_columns = storage_columns.getAllPhysical(); // header.getNamesAndTypesList()
+
+
+        auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns /* pipe_columns */);
+        auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context};
+
+        actions_dag = expression_analyzer.getActionsDAG(true, false);
+        filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+        filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
+    }
+
+}
+
+void ReadFromMerge::RowPolicyData::extendNames(Names & names)
+{
+    assert(row_policy_filter_ptr);
+    ASTPtr expr = row_policy_filter_ptr->expression;
+
+    RequiredSourceColumnsVisitor::Data columns_context;
+    RequiredSourceColumnsVisitor(columns_context).visit(expr);
+
+    auto req_columns = columns_context.requiredColumns();
+    for (const auto & req_column : req_columns)
+    {
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::extendNames"), "req.column: {}", req_column);
+        std::sort(names.begin(), names.end());
+
+        if (!std::binary_search(names.begin(), names.end(), req_column))
+        {
+            names.push_back(req_column);
+        }
+    }
+}
+
+void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step)
+{
+    assert(row_policy_filter_ptr);
+    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "filter_actions_dag: {},<> {}, <> {}",
+        filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
+        step->addFilter(actions_dag, filter_column_name);
+}
+
+void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & builder)
+{
+    assert(row_policy_filter_ptr);
+
+    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
+        filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
+
+
+    for (auto & colname : filter_actions->getSampleBlock().getNames())
+    {
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "filter_actions->getSampleBlock().getNames(): {}", colname);
+    }
+
+    for (auto & colname : filter_actions->getRequiredColumns())
+    {
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "filter_actions->getRequiredColumns(): {}", colname);
+    }
+
+    // auto filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
+
+    builder.addSimpleTransform([&](const Block & stream_header)
+    {
+        return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_column_name, true /* remove fake column */);
+    });
+}
+
+std::string ReadFromMerge::RowPolicyData::namesDifference(Names && outer_set, Names && inner_set)
+{
+    std::sort(outer_set.begin(), outer_set.end());
+
+    std::sort(inner_set.begin(), inner_set.end());
+
+    Names result;
+
+    std::set_difference(outer_set.begin(), outer_set.end(),
+        inner_set.begin(), inner_set.end(),  std::inserter(result, result.begin()));
+
+    if (result.size() != 1)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "Cannot determine row level filter");
+    }
+
+    return result.front();
+}
+
 StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
     ContextPtr query_context,
     const ASTPtr & query /* = nullptr */,
@@ -1047,53 +1125,13 @@ void ReadFromMerge::convertingSourceStream(
     ContextPtr local_context,
     QueryPipelineBuilder & builder,
     const QueryProcessingStage::Enum & processed_stage,
-    const String & database_name,
-    const String & table_name)
+    RowPolicyData & row_policy_data)
 {
-    auto row_policy_filter = local_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER);
-
-    if (row_policy_filter)
+    if (row_policy_data.needCare())
     {
-        ASTPtr expr = row_policy_filter->expression;
-
-        auto storage_columns = metadata_snapshot->getColumns();
-        auto needed_columns = storage_columns.getAllPhysical(); // header.getNamesAndTypesList()
-
-
-        auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns /* pipe_columns */);
-        auto expression_analyzer = ExpressionAnalyzer{row_policy_filter->expression, syntax_result, local_context};
-
-        auto actions_dag = expression_analyzer.getActionsDAG(true, false);
-        auto filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
-
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
-            filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
-
-
-
-        for (auto & colname : filter_actions->getSampleBlock().getNames())
-        {
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions->getSampleBlock().getNames(): {}", colname);
-        }
-
-        for (auto & colname : filter_actions->getRequiredColumns())
-        {
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions->getRequiredColumns(): {}", colname);
-        }
-
-
-
-        auto filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
-
-
-        builder.addSimpleTransform([&](const Block & stream_header)
-        {
-            return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_column_name, true /* remove fake column */);
-        });
+        row_policy_data.addFilterTransform(builder);
     }
 
-
-
     Block before_block_header = builder.getHeader();
 
     auto storage_sample_block = metadata_snapshot->getSampleBlock();
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index fbe6dcec298..4dc7cb77c48 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -177,6 +177,8 @@ private:
 
     using Aliases = std::vector<AliasData>;
 
+    class RowPolicyData;
+
     static SelectQueryInfo getModifiedQueryInfo(const SelectQueryInfo & query_info,
         const ContextPtr & modified_context,
         const StorageWithLockAndName & storage_with_lock_and_name,
@@ -202,8 +204,7 @@ private:
         ContextPtr context,
         QueryPipelineBuilder & builder,
         const QueryProcessingStage::Enum & processed_stage,
-        const String & database_name,
-        const String & table_name);
+        RowPolicyData & row_policy_data);
 };
 
 }
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
index 55890a11783..070520bb392 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
@@ -122,6 +122,19 @@ SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>1 -- with y>12
 3	13	1
 4	14	1
 4	14	1
+SELECT y from merge(currentDatabase(), 02763_merge)
+11
+11
+12
+12
+13
+13
+13
+13
+14
+14
+14
+14
 SETTINGS optimize_move_to_prewhere= 1
 SELECT * FROM 02763_merge_log_1
 3	13
@@ -224,3 +237,16 @@ SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>1 -- with y>12
 3	13	1
 4	14	1
 4	14	1
+SELECT y from merge(currentDatabase(), 02763_merge)
+11
+11
+12
+12
+13
+13
+13
+13
+14
+14
+14
+14
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
index 94872dfd7b6..3030be2539f 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -85,7 +85,6 @@ ORDER BY x;
 
 SELECT *, x=4 FROM merge(currentDatabase(), '02763_merge_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
-
 CREATE ROW POLICY 02763_filter_3 ON 02763_merge_log_1 USING y>12 AS permissive TO ALL;
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_log) WHERE x>1 -- with y>12';
 SELECT * FROM merge(currentDatabase(), '02763_merge_log') WHERE x>1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
@@ -96,6 +95,8 @@ SELECT * FROM merge(currentDatabase(), '02763_merge_merge') WHERE x>1 ORDER BY x
 
 SELECT *, (x=4 OR y>12) FROM merge(currentDatabase(), '02763_merge_merge') WHERE x>1 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
+SELECT 'SELECT y from merge(currentDatabase(), 02763_merge)';
+SELECT y from merge(currentDatabase(), '02763_merge') ORDER BY y SETTINGS optimize_move_to_prewhere= {{prew}};
 
 DROP ROW POLICY 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY 02763_filter_2 ON 02763_merge_merge_1;

From f89a538f314f01ef8eeb056afb0c6d4b381e89c7 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 12 Jul 2023 15:11:59 +0000
Subject: [PATCH 0075/1097] merge_row_policy: stylecheck

---
 src/Storages/StorageMerge.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 0b6968f78ce..76c7afc5b73 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -937,7 +937,7 @@ std::string ReadFromMerge::RowPolicyData::namesDifference(Names && outer_set, Na
     Names result;
 
     std::set_difference(outer_set.begin(), outer_set.end(),
-        inner_set.begin(), inner_set.end(),  std::inserter(result, result.begin()));
+        inner_set.begin(), inner_set.end(), std::inserter(result, result.begin()));
 
     if (result.size() != 1)
     {

From 6161116bf2ce1b271fa327181d5017e637b25424 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 12 Jul 2023 15:52:48 +0000
Subject: [PATCH 0076/1097] merge_row_policy: stylecheck again

---
 src/Storages/StorageMerge.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 76c7afc5b73..934a72684d2 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -81,6 +81,7 @@ namespace ErrorCodes
     extern const int SAMPLING_NOT_SUPPORTED;
     extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int LOGICAL_ERROR;
 }
 
 StorageMerge::StorageMerge(

From 57f1e5627658f71e5e5dcfec17dadd5afeeaeac0 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Fri, 14 Jul 2023 13:39:43 +0000
Subject: [PATCH 0077/1097] merge_row_policy: cleanup, comments, new tests

---
 src/Storages/StorageMerge.cpp                 | 78 ++++++++++---------
 .../02763_row_policy_storage_merge.reference  | 54 +++++++++++++
 .../02763_row_policy_storage_merge.sql.j2     | 25 ++++++
 3 files changed, 121 insertions(+), 36 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 934a72684d2..df353fa3158 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -268,7 +268,6 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
     /// (see removeJoin())
     ///
     /// And for this we need to return FetchColumns.
-
     if (const auto * select = query_info.query->as<ASTSelectQuery>(); select && hasJoin(*select))
         return QueryProcessingStage::FetchColumns;
 
@@ -315,6 +314,7 @@ void StorageMerge::read(
       * since there is no certainty that it works when one of table is MergeTree and other is not.
       */
     auto modified_context = Context::createCopy(local_context);
+    modified_context->setSetting("optimize_move_to_prewhere", false);
 
     bool has_database_virtual_column = false;
     bool has_table_virtual_column = false;
@@ -376,9 +376,13 @@ void StorageMerge::read(
     query_plan.addStep(std::move(step));
 }
 
+/// A transient object of this helper class is created
+///  when processing a Merge table data source (subordinary table)
+///  to guarantee that row policies are applied
 class ReadFromMerge::RowPolicyData
 {
 public:
+    /// Row policy requires extra filtering
     bool needCare()
     {
         return static_cast<bool>(row_policy_filter_ptr);
@@ -386,18 +390,28 @@ public:
     void init(RowPolicyFilterPtr,
         const std::shared_ptr<DB::IStorage>,
         ContextPtr);
+
+    /// Add columns that needed for row policies to data stream
+    /// SELECT x from T  if  T has row policy  y=42
+    /// required y in data pipeline
     void extendNames(Names &);
+
+    /// Use storage facilities to filter data
+    /// does not guarantee accuracy, but reduce number of rows
     void addStorageFilter(SourceStepWithFilter *);
+
+    /// Create explicit filter transform to stop
+    /// rows that are not conform to row level policy
     void addFilterTransform(QueryPipelineBuilder &);
+
 private:
     static std::string namesDifference(Names && outer_set, Names && inner_set);
     RowPolicyFilterPtr row_policy_filter_ptr;
-    std::string filter_column_name;
+    std::string filter_column_name; // complex filer, may contain logic operations
     ActionsDAGPtr actions_dag;
     ExpressionActionsPtr filter_actions;
 };
 
-
 ReadFromMerge::ReadFromMerge(
     Block common_header_,
     StorageListWithLocks selected_tables_,
@@ -687,7 +701,11 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
         if (!view || allow_experimental_analyzer)
         {
-            row_policy_data.init(modified_context->getRowPolicyFilter(database_name, table_name, RowPolicyFilterType::SELECT_FILTER),
+            row_policy_data.init(
+                modified_context->getRowPolicyFilter(
+                    database_name,
+                    table_name,
+                    RowPolicyFilterType::SELECT_FILTER),
                 storage,
                 modified_context);
 
@@ -708,7 +726,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             if (!plan.isInitialized())
                 return {};
 
-
             if (row_policy_data.needCare())
             {
                 if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
@@ -742,14 +759,12 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         {
             size_t filters_dags_size = filter_dags.size();
             for (size_t i = 0; i < filters_dags_size; ++i)
-            {
                 read_from_merge_tree->addFilter(filter_dags[i], filter_nodes.nodes[i]);
-            }
         }
+
         builder = plan.buildQueryPipeline(
             QueryPlanOptimizationSettings::fromContext(modified_context),
             BuildQueryPipelineSettings::fromContext(modified_context));
-
     }
     else if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns))
     {
@@ -835,6 +850,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
+        /// Besides this we add FilterTransform if it is needed to follow row level policies.
 
         convertingSourceStream(header,
             storage_snapshot->metadata,
@@ -852,7 +868,6 @@ void ReadFromMerge::RowPolicyData::init(RowPolicyFilterPtr row_policy_filter_ptr
     const std::shared_ptr<DB::IStorage> storage,
     ContextPtr local_context)
 {
-
     if (row_policy_filter_ptr_)
     {
         row_policy_filter_ptr = row_policy_filter_ptr_;
@@ -861,19 +876,20 @@ void ReadFromMerge::RowPolicyData::init(RowPolicyFilterPtr row_policy_filter_ptr
 
         auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
         auto storage_columns = storage_metadata_snapshot->getColumns();
-        auto needed_columns = storage_columns.getAllPhysical(); // header.getNamesAndTypesList()
+        auto needed_columns = storage_columns.getAllPhysical();
 
-
-        auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns /* pipe_columns */);
+        auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns);
         auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context};
 
         actions_dag = expression_analyzer.getActionsDAG(true, false);
-        filter_actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+        filter_actions = std::make_shared<ExpressionActions>(actions_dag,
+            ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
         filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
     }
-
 }
 
+// Add columns that needed to evaluate row policies
+// SELECT x from t  if  t has row policy
 void ReadFromMerge::RowPolicyData::extendNames(Names & names)
 {
     assert(row_policy_filter_ptr);
@@ -885,7 +901,6 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names)
     auto req_columns = columns_context.requiredColumns();
     for (const auto & req_column : req_columns)
     {
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::extendNames"), "req.column: {}", req_column);
         std::sort(names.begin(), names.end());
 
         if (!std::binary_search(names.begin(), names.end(), req_column))
@@ -898,30 +913,22 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names)
 void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step)
 {
     assert(row_policy_filter_ptr);
-    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::createSources"), "filter_actions_dag: {},<> {}, <> {}",
-        filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
-        step->addFilter(actions_dag, filter_column_name);
+    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::addStorageFilter"), "filter_actions_dag: {},<> {}, <> {}",
+        filter_actions->getActionsDAG().dumpNames(),
+        filter_actions->getActionsDAG().dumpDAG(),
+        filter_actions->getSampleBlock().dumpStructure());
+
+    step->addFilter(actions_dag, filter_column_name);
 }
 
 void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & builder)
 {
     assert(row_policy_filter_ptr);
 
-    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertinfSourceStream"), "filter_actions_dag: {},<> {}, <> {}",
-        filter_actions->getActionsDAG().dumpNames(), filter_actions->getActionsDAG().dumpDAG(), filter_actions->getSampleBlock().dumpStructure());
-
-
-    for (auto & colname : filter_actions->getSampleBlock().getNames())
-    {
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "filter_actions->getSampleBlock().getNames(): {}", colname);
-    }
-
-    for (auto & colname : filter_actions->getRequiredColumns())
-    {
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"), "filter_actions->getRequiredColumns(): {}", colname);
-    }
-
-    // auto filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
+    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::addFilterTransform"), "filter_actions_dag: {},<> {}, <> {}",
+        filter_actions->getActionsDAG().dumpNames(),
+        filter_actions->getActionsDAG().dumpDAG(),
+        filter_actions->getSampleBlock().dumpStructure());
 
     builder.addSimpleTransform([&](const Block & stream_header)
     {
@@ -929,10 +936,10 @@ void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & bui
     });
 }
 
+/// Find out an item that in outer_set vector, but not in inner_set vector
 std::string ReadFromMerge::RowPolicyData::namesDifference(Names && outer_set, Names && inner_set)
 {
     std::sort(outer_set.begin(), outer_set.end());
-
     std::sort(inner_set.begin(), inner_set.end());
 
     Names result;
@@ -949,6 +956,7 @@ std::string ReadFromMerge::RowPolicyData::namesDifference(Names && outer_set, Na
     return result.front();
 }
 
+
 StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
     ContextPtr query_context,
     const ASTPtr & query /* = nullptr */,
@@ -1171,8 +1179,6 @@ void ReadFromMerge::convertingSourceStream(
     {
         return std::make_shared<ExpressionTransform>(stream_header, actions);
     });
-
-
 }
 
 bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
index 070520bb392..9c91a1652c7 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
@@ -135,6 +135,33 @@ SELECT y from merge(currentDatabase(), 02763_merge)
 14
 14
 14
+02763_merge_fancycols
+SELECT *
+SELECT x, lc
+SELECT *
+1	11	111	111	42
+1	11	111	111	42
+SELECT x, lc
+1	111
+1	111
+SELECT x, lc, cnst
+1	111	42
+1	111	42
+SELECT x, y from merge(currentDatabase(), 02763_merge
+1	11
+1	11
+1	11
+1	11
+2	12
+2	12
+3	13
+3	13
+3	13
+3	13
+4	14
+4	14
+4	14
+4	14
 SETTINGS optimize_move_to_prewhere= 1
 SELECT * FROM 02763_merge_log_1
 3	13
@@ -250,3 +277,30 @@ SELECT y from merge(currentDatabase(), 02763_merge)
 14
 14
 14
+02763_merge_fancycols
+SELECT *
+SELECT x, lc
+SELECT *
+1	11	111	111	42
+1	11	111	111	42
+SELECT x, lc
+1	111
+1	111
+SELECT x, lc, cnst
+1	111	42
+1	111	42
+SELECT x, y from merge(currentDatabase(), 02763_merge
+1	11
+1	11
+1	11
+1	11
+2	12
+2	12
+3	13
+3	13
+3	13
+3	13
+4	14
+4	14
+4	14
+4	14
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
index 3030be2539f..ea2cf1fef00 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -98,10 +98,35 @@ SELECT *, (x=4 OR y>12) FROM merge(currentDatabase(), '02763_merge_merge') WHERE
 SELECT 'SELECT y from merge(currentDatabase(), 02763_merge)';
 SELECT y from merge(currentDatabase(), '02763_merge') ORDER BY y SETTINGS optimize_move_to_prewhere= {{prew}};
 
+SELECT '02763_merge_fancycols';
+CREATE OR REPLACE TABLE 02763_merge_fancycols (x UInt8, y Nullable(UInt64), z String DEFAULT CONCAT(toString(x), toString(y)), lc LowCardinality(String) DEFAULT z, cnst UInt32 MATERIALIZED 42) ENGINE = MergeTree() ORDER BY tuple();
+INSERT INTO 02763_merge_fancycols  (x, y) SELECT x, y from merge(currentDatabase(), '02763_merge');
+
+CREATE ROW POLICY 02763_filter_5 ON 02763_merge_fancycols USING cnst<>42 AS permissive TO ALL;
+SELECT 'SELECT *';
+SELECT * from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+SELECT 'SELECT x, lc';
+SELECT x, lc from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+
+CREATE ROW POLICY 02763_filter_6 ON 02763_merge_fancycols USING lc='111' AS permissive TO ALL;
+SELECT 'SELECT *';
+SELECT * from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+SELECT 'SELECT x, lc';
+SELECT x, lc from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+SELECT 'SELECT x, lc, cnst';
+SELECT x, lc, cnst from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+SELECT 'SELECT x, y from merge(currentDatabase(), 02763_merge';
+SELECT x, y from merge(currentDatabase(), '02763_merge') ORDER BY x;
+
+DROP TABLE 02763_merge_fancycols;
+
 DROP ROW POLICY 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY 02763_filter_2 ON 02763_merge_merge_1;
 
 DROP ROW POLICY 02763_filter_3 ON 02763_merge_log_1;
 DROP ROW POLICY 02763_filter_4 ON 02763_merge_merge_1;
 
+DROP ROW POLICY 02763_filter_5 ON 02763_merge_fancycols;
+DROP ROW POLICY 02763_filter_6 ON 02763_merge_fancycols;
+
 {% endfor %}

From 9a561cff8258a878271d289a3d74f1a014d2419b Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Mon, 17 Jul 2023 12:10:27 +0000
Subject: [PATCH 0078/1097] merge_row_policy: cleanup, verbose short messages
 in 00002_log ...

---
 src/Storages/StorageMerge.cpp                       | 13 ++++++-------
 src/Storages/StorageMerge.h                         |  4 ++--
 .../00002_log_and_exception_messages_formatting.sql |  3 ++-
 .../02763_row_policy_storage_merge.sql.j2           |  7 ++++++-
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index df353fa3158..5bbc6c9a03c 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -387,9 +387,8 @@ public:
     {
         return static_cast<bool>(row_policy_filter_ptr);
     }
-    void init(RowPolicyFilterPtr,
-        const std::shared_ptr<DB::IStorage>,
-        ContextPtr);
+
+    void init(RowPolicyFilterPtr, std::shared_ptr<DB::IStorage>, ContextPtr);
 
     /// Add columns that needed for row policies to data stream
     /// SELECT x from T  if  T has row policy  y=42
@@ -660,8 +659,8 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & quer
 QueryPipelineBuilderPtr ReadFromMerge::createSources(
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & modified_query_info,
-    const QueryProcessingStage::Enum & processed_stage,
-    const UInt64 max_block_size,
+    QueryProcessingStage::Enum processed_stage,
+    UInt64 max_block_size,
     const Block & header,
     const Aliases & aliases,
     const StorageWithLockAndName & storage_with_lock,
@@ -865,7 +864,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 }
 
 void ReadFromMerge::RowPolicyData::init(RowPolicyFilterPtr row_policy_filter_ptr_,
-    const std::shared_ptr<DB::IStorage> storage,
+    std::shared_ptr<DB::IStorage> storage,
     ContextPtr local_context)
 {
     if (row_policy_filter_ptr_)
@@ -1133,7 +1132,7 @@ void ReadFromMerge::convertingSourceStream(
     const Aliases & aliases,
     ContextPtr local_context,
     QueryPipelineBuilder & builder,
-    const QueryProcessingStage::Enum & processed_stage,
+    QueryProcessingStage::Enum processed_stage,
     RowPolicyData & row_policy_data)
 {
     if (row_policy_data.needCare())
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 4dc7cb77c48..1a5aca24e4c 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -187,7 +187,7 @@ private:
     QueryPipelineBuilderPtr createSources(
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
-        const QueryProcessingStage::Enum & processed_stage,
+        QueryProcessingStage::Enum processed_stage,
         UInt64 max_block_size,
         const Block & header,
         const Aliases & aliases,
@@ -203,7 +203,7 @@ private:
         const Aliases & aliases,
         ContextPtr context,
         QueryPipelineBuilder & builder,
-        const QueryProcessingStage::Enum & processed_stage,
+        QueryProcessingStage::Enum processed_stage,
         RowPolicyData & row_policy_data);
 };
 
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index f4ec9b79a4c..19b68873a10 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -52,7 +52,8 @@ create temporary table known_short_messages (s String) as select * from (select
 select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_string), 1) from logs where length(message_format_string) < 10 and message_format_string not in known_short_messages;
 
 -- Same as above. Feel free to update the threshold or remove this query if really necessary
-select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
+-- select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
+select message_format_string from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
 
 -- Unlike above, here we look at length of the formatted message, not format string. Most short format strings are fine because they end up decorated with context from outer or inner exceptions, e.g.:
 -- "Expected end of line" -> "Code: 117. DB::Exception: Expected end of line: (in file/uri /var/lib/clickhouse/user_files/data_02118): (at row 1)"
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
index ea2cf1fef00..9b3197cc34f 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -2,8 +2,13 @@ DROP TABLE IF EXISTS 02763_merge_log_1;
 DROP TABLE IF EXISTS 02763_merge_log_2;
 DROP TABLE IF EXISTS 02763_merge_merge_1;
 DROP TABLE IF EXISTS 02763_merge_merge_2;
+DROP TABLE IF EXISTS 02763_merge_fancycols;
 DROP ROW POLICY IF EXISTS 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY IF EXISTS 02763_filter_2 ON 02763_merge_merge_1;
+DROP ROW POLICY IF EXISTS 02763_filter_3 ON 02763_merge_log_1;
+DROP ROW POLICY IF EXISTS 02763_filter_4 ON 02763_merge_merge_1;
+DROP ROW POLICY IF EXISTS 02763_filter_5 ON 02763_merge_fancycols;
+DROP ROW POLICY IF EXISTS 02763_filter_6 ON 02763_merge_fancycols;
 
 
 CREATE TABLE 02763_merge_log_1 (x UInt8, y UInt64) ENGINE = Log;
@@ -99,7 +104,7 @@ SELECT 'SELECT y from merge(currentDatabase(), 02763_merge)';
 SELECT y from merge(currentDatabase(), '02763_merge') ORDER BY y SETTINGS optimize_move_to_prewhere= {{prew}};
 
 SELECT '02763_merge_fancycols';
-CREATE OR REPLACE TABLE 02763_merge_fancycols (x UInt8, y Nullable(UInt64), z String DEFAULT CONCAT(toString(x), toString(y)), lc LowCardinality(String) DEFAULT z, cnst UInt32 MATERIALIZED 42) ENGINE = MergeTree() ORDER BY tuple();
+CREATE TABLE 02763_merge_fancycols (x UInt8, y Nullable(UInt64), z String DEFAULT CONCAT(toString(x), toString(y)), lc LowCardinality(String) DEFAULT z, cnst UInt32 MATERIALIZED 42) ENGINE = MergeTree() ORDER BY tuple();
 INSERT INTO 02763_merge_fancycols  (x, y) SELECT x, y from merge(currentDatabase(), '02763_merge');
 
 CREATE ROW POLICY 02763_filter_5 ON 02763_merge_fancycols USING cnst<>42 AS permissive TO ALL;

From da3419068d5da01f5f9daca1b7e00ce8a55685a8 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 19 Jul 2023 21:32:52 +0000
Subject: [PATCH 0079/1097] merge_row_policy: more verbosity for
 00002_log_and_exception_...

---
 .../00002_log_and_exception_messages_formatting.sql            | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index 19b68873a10..30814b791ed 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -53,7 +53,8 @@ select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_st
 
 -- Same as above. Feel free to update the threshold or remove this query if really necessary
 -- select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
-select message_format_string from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
+select 'from logs', message_format_string from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
+select 'from system.text_log', message_format_string from system.text_log where length(message_format_string) < 16 and message_format_string not in known_short_messages and now() - toIntervalMinute(120) < event_time;
 
 -- Unlike above, here we look at length of the formatted message, not format string. Most short format strings are fine because they end up decorated with context from outer or inner exceptions, e.g.:
 -- "Expected end of line" -> "Code: 117. DB::Exception: Expected end of line: (in file/uri /var/lib/clickhouse/user_files/data_02118): (at row 1)"

From ea48679185c1de20ea492c9e5afaf0d873b7f051 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Mon, 11 Sep 2023 15:07:10 +0000
Subject: [PATCH 0080/1097] merge_row_policy: revert debug, 00002_log... test
 is fixed in master

---
 .../00002_log_and_exception_messages_formatting.sql           | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index 30814b791ed..f4ec9b79a4c 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -52,9 +52,7 @@ create temporary table known_short_messages (s String) as select * from (select
 select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_string), 1) from logs where length(message_format_string) < 10 and message_format_string not in known_short_messages;
 
 -- Same as above. Feel free to update the threshold or remove this query if really necessary
--- select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
-select 'from logs', message_format_string from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
-select 'from system.text_log', message_format_string from system.text_log where length(message_format_string) < 16 and message_format_string not in known_short_messages and now() - toIntervalMinute(120) < event_time;
+select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
 
 -- Unlike above, here we look at length of the formatted message, not format string. Most short format strings are fine because they end up decorated with context from outer or inner exceptions, e.g.:
 -- "Expected end of line" -> "Code: 117. DB::Exception: Expected end of line: (in file/uri /var/lib/clickhouse/user_files/data_02118): (at row 1)"

From 3480243fa3b07a2df8638da527b8ef4fa9a378e9 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 4 Oct 2023 08:31:12 +0000
Subject: [PATCH 0081/1097] merge_row_policy: per code review

---
 src/Storages/StorageMerge.cpp | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 5bbc6c9a03c..354e17ad5ba 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -383,7 +383,7 @@ class ReadFromMerge::RowPolicyData
 {
 public:
     /// Row policy requires extra filtering
-    bool needCare()
+    bool hasRowPolicy()
     {
         return static_cast<bool>(row_policy_filter_ptr);
     }
@@ -708,7 +708,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 storage,
                 modified_context);
 
-            if (row_policy_data.needCare())
+            if (row_policy_data.hasRowPolicy())
             {
                 row_policy_data.extendNames(real_column_names);
             }
@@ -725,7 +725,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             if (!plan.isInitialized())
                 return {};
 
-            if (row_policy_data.needCare())
+            if (row_policy_data.hasRowPolicy())
             {
                 if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
                 {
@@ -880,7 +880,7 @@ void ReadFromMerge::RowPolicyData::init(RowPolicyFilterPtr row_policy_filter_ptr
         auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns);
         auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context};
 
-        actions_dag = expression_analyzer.getActionsDAG(true, false);
+        actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */);
         filter_actions = std::make_shared<ExpressionActions>(actions_dag,
             ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
         filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
@@ -897,16 +897,22 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names)
     RequiredSourceColumnsVisitor::Data columns_context;
     RequiredSourceColumnsVisitor(columns_context).visit(expr);
 
-    auto req_columns = columns_context.requiredColumns();
+    const auto req_columns = columns_context.requiredColumns();
+
+    std::sort(names.begin(), names.end());
+    NameSet added_names;
+
     for (const auto & req_column : req_columns)
     {
-        std::sort(names.begin(), names.end());
-
         if (!std::binary_search(names.begin(), names.end(), req_column))
         {
-            names.push_back(req_column);
+            added_names.insert(req_column);
         }
     }
+    if (!added_names.empty())
+    {
+        std::copy(added_names.begin(), added_names.end(), std::back_inserter(names));
+    }
 }
 
 void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step)
@@ -931,7 +937,7 @@ void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & bui
 
     builder.addSimpleTransform([&](const Block & stream_header)
     {
-        return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_column_name, true /* remove fake column */);
+        return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_column_name, true /* remove filter column */);
     });
 }
 
@@ -1135,7 +1141,7 @@ void ReadFromMerge::convertingSourceStream(
     QueryProcessingStage::Enum processed_stage,
     RowPolicyData & row_policy_data)
 {
-    if (row_policy_data.needCare())
+    if (row_policy_data.hasRowPolicy())
     {
         row_policy_data.addFilterTransform(builder);
     }

From c7a3c74cde8d6dd68b23fb318827d0eaadf8f292 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 7 Oct 2023 15:20:31 +0200
Subject: [PATCH 0082/1097] Better

---
 src/Core/Settings.h                    |  2 +-
 src/Interpreters/Cache/FileCache.cpp   | 68 ++++++++++++++++++++------
 src/Interpreters/Cache/FileCache.h     |  3 +-
 src/Interpreters/Cache/FileCache_fwd.h |  2 +-
 4 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b9aa678a3c5..485b96b2341 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -719,7 +719,7 @@ class IColumn;
     M(Bool, skip_download_if_exceeds_query_cache, true, "Skip download from remote filesystem if exceeds query cache size", 0) \
     M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \
     M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \
-    M(UInt64, filesystem_cache_getorset_batch_size, 100, "A batch size for holding file segments for a single read range", 0) \
+    M(UInt64, filesystem_cache_getorset_batch_size, 20, "A batch size for holding file segments for a single read range", 0) \
     \
     M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \
     M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index bef1f3086df..576aab31adc 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -147,7 +147,7 @@ CacheGuard::Lock FileCache::lockCache() const
     return cache_guard.lock();
 }
 
-FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment::Range & range) const
+FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment::Range & range, size_t file_segments_limit) const
 {
     /// Given range = [left, right] and non-overlapping ordered set of file segments,
     /// find list [segment1, ..., segmentN] of segments which intersect with given range.
@@ -166,6 +166,9 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
     FileSegments result;
     auto add_to_result = [&](const FileSegmentMetadata & file_segment_metadata)
     {
+        if (file_segments_limit && result.size() == file_segments_limit)
+            return false;
+
         FileSegmentPtr file_segment;
         if (!file_segment_metadata.evicting())
         {
@@ -181,6 +184,7 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
         }
 
         result.push_back(file_segment);
+        return true;
     };
 
     auto segment_it = file_segments.lower_bound(range.left);
@@ -197,7 +201,8 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
         if (file_segment_metadata.file_segment->range().right < range.left)
             return {};
 
-        add_to_result(file_segment_metadata);
+        if (!add_to_result(file_segment_metadata))
+            return result;
     }
     else /// segment_it <-- segmment{k}
     {
@@ -213,7 +218,8 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
                 ///       [___________
                 ///       ^
                 ///       range.left
-                add_to_result(prev_file_segment_metadata);
+                if (!add_to_result(prev_file_segment_metadata))
+                    return result;
             }
         }
 
@@ -229,7 +235,8 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
             if (range.right < file_segment_metadata.file_segment->range().left)
                 break;
 
-            add_to_result(file_segment_metadata);
+            if (!add_to_result(file_segment_metadata))
+                return result;
             ++segment_it;
         }
     }
@@ -273,6 +280,7 @@ void FileCache::fillHolesWithEmptyFileSegments(
     LockedKey & locked_key,
     FileSegments & file_segments,
     const FileSegment::Range & range,
+    size_t file_segments_limit,
     bool fill_with_detached_file_segments,
     const CreateFileSegmentSettings & settings)
 {
@@ -338,6 +346,9 @@ void FileCache::fillHolesWithEmptyFileSegments(
         ++it;
     }
 
+    if (file_segments.size() >= file_segments_limit)
+        return;
+
     if (current_pos <= range.right)
     {
         ///   ________]     -- requested range
@@ -374,7 +385,7 @@ FileSegmentsHolderPtr FileCache::set(
     auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY);
     FileSegment::Range range(offset, offset + size - 1);
 
-    auto file_segments = getImpl(*locked_key, range);
+    auto file_segments = getImpl(*locked_key, range, /* file_segments_limit */0);
     if (!file_segments.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Having intersection with already existing cache");
 
@@ -416,19 +427,46 @@ FileCache::getOrSet(
     auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY);
 
     /// Get all segments which intersect with the given range.
-    auto file_segments = getImpl(*locked_key, range);
+    auto file_segments = getImpl(*locked_key, range, file_segments_limit);
+
+    bool limit_reached = false;
     if (file_segments.empty())
     {
         file_segments = splitRangeIntoFileSegments(*locked_key, range.left, range.size(), FileSegment::State::EMPTY, settings);
+
+        while (!file_segments.empty() && file_segments.front()->range().right < offset)
+            file_segments.pop_front();
     }
     else
     {
-        fillHolesWithEmptyFileSegments(
-            *locked_key, file_segments, range, /* fill_with_detached */false, settings);
-    }
+        limit_reached = file_segments_limit && file_segments.size() >= file_segments_limit;
 
-    while (!file_segments.empty() && file_segments.front()->range().right < offset)
-        file_segments.pop_front();
+        /// A while loop for the case if we set a limit to n, but all these n file segments are removed
+        /// as they turned out redundant because of the alignment of offset to aligned_offset.
+        while (true)
+        {
+            size_t last_offset = file_segments.back()->range().right;
+
+            while (!file_segments.empty() && file_segments.front()->range().right < offset)
+                file_segments.pop_front();
+
+            if (!file_segments.empty())
+                break;
+
+            if (!limit_reached)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty list of file segments");
+
+            range.left = std::min(offset, last_offset + 1);
+            file_segments = getImpl(*locked_key, range, file_segments_limit);
+        }
+
+        range.left = std::min(offset, file_segments.front()->range().left);
+        if (limit_reached)
+            range.right =  file_segments.back()->range().right;
+
+        fillHolesWithEmptyFileSegments(
+            *locked_key, file_segments, range, file_segments_limit, /* fill_with_detached */false, settings);
+    }
 
     while (!file_segments.empty() && file_segments.back()->range().left >= offset + size)
         file_segments.pop_back();
@@ -439,7 +477,9 @@ FileCache::getOrSet(
             file_segments.pop_back();
     }
 
-    chassert(!file_segments.empty());
+    if (file_segments.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty list of file segments for offset {}, size {} (file size: {})", offset, size, file_size);
+
     return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
 }
 
@@ -455,11 +495,11 @@ FileSegmentsHolderPtr FileCache::get(const Key & key, size_t offset, size_t size
         FileSegment::Range range(offset, offset + size - 1);
 
         /// Get all segments which intersect with the given range.
-        auto file_segments = getImpl(*locked_key, range);
+        auto file_segments = getImpl(*locked_key, range, file_segments_limit);
         if (!file_segments.empty())
         {
             fillHolesWithEmptyFileSegments(
-                *locked_key, file_segments, range, /* fill_with_detached */true, CreateFileSegmentSettings{});
+                *locked_key, file_segments, range, file_segments_limit, /* fill_with_detached */true, CreateFileSegmentSettings{});
 
             if (file_segments_limit)
             {
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 1a1a25cd9c1..14f27a69a68 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -215,7 +215,7 @@ private:
     void loadMetadataImpl();
     void loadMetadataForKeys(const std::filesystem::path & keys_dir);
 
-    FileSegments getImpl(const LockedKey & locked_key, const FileSegment::Range & range) const;
+    FileSegments getImpl(const LockedKey & locked_key, const FileSegment::Range & range, size_t file_segments_limit) const;
 
     FileSegments splitRangeIntoFileSegments(
         LockedKey & locked_key,
@@ -228,6 +228,7 @@ private:
         LockedKey & locked_key,
         FileSegments & file_segments,
         const FileSegment::Range & range,
+        size_t file_segments_limit,
         bool fill_with_detached_file_segments,
         const CreateFileSegmentSettings & settings);
 
diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h
index 1f61617668e..3e7150ad253 100644
--- a/src/Interpreters/Cache/FileCache_fwd.h
+++ b/src/Interpreters/Cache/FileCache_fwd.h
@@ -4,7 +4,7 @@
 namespace DB
 {
 
-static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 16 * 1024 * 1024; /// 16Mi
+static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi
 static constexpr int FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT = 4 * 1024 * 1024; /// 4Mi
 static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 2;
 static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 1;

From d7c5caef927d4e0ca2091f483e824a1fbddc3909 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 7 Oct 2023 15:23:32 +0200
Subject: [PATCH 0083/1097] Better

---
 src/Interpreters/Cache/FileCache.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 576aab31adc..39cef20829e 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -456,7 +456,8 @@ FileCache::getOrSet(
             if (!limit_reached)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty list of file segments");
 
-            range.left = std::min(offset, last_offset + 1);
+            range.left = last_offset + 1;
+            chassert(offset >= range.left);
             file_segments = getImpl(*locked_key, range, file_segments_limit);
         }
 

From 253b8efa58fb6cd56345d4785494d39ff38f0242 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Wed, 11 Oct 2023 16:57:40 +0200
Subject: [PATCH 0084/1097] do not modify column type when there is statistic

---
 .../mergetree-family/mergetree.md             | 19 +++++++++++++++----
 docs/en/operations/settings/settings.md       |  8 ++++++++
 src/Storages/MergeTree/MergeTreeData.cpp      | 11 +++++++++++
 .../0_stateless/02864_statistic_exception.sql |  5 +++++
 4 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 16a9e977c6b..74b6fe40600 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -1358,16 +1358,27 @@ In this sample configuration:
 
 ## Column Statistics (Experimental) {#column-statistics}
 
-The statistic declaration is in the columns section of the `CREATE` query.
+The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`.
 
 ``` sql
-STATISTIC(type)
+CREATE TABLE example_table
+(
+    a Int64 STATISTIC(tdigest),
+    b Float64
+)
+ENGINE = MergeTree
+ORDER BY a
 ```
 
-For tables from the `*MergeTree` family, statistics can be specified.
+We can also manipulate statistics with `ATLER` statements.
+
+```sql
+ATLER TABLE example_table ADD STATISTIC b TYPE tdigest;
+ATLER TABLE example_table DROP STATISTIC a TYPE tdigest;
+```
 
 These lightweight statistics aggregate information about distribution of values in columns.
-They can be used for query optimization (At current time they are used for moving expressions to PREWHERE).
+They can be used for query optimization when we enable `set allow_statistic_optimize = 1`.
 
 #### Available Types of Column Statistics {#available-types-of-column-statistics}
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index f703429cb70..4644375235c 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4726,3 +4726,11 @@ a	Tuple(
     l Nullable(String)
 )
 ```
+
+## allow_experimental_statistic {#allow_experimental_statistic}
+
+Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md#column-statistics).
+
+## allow_statistic_optimize {#allow_statistic_optimize}
+
+Allows using statistic to optimize the order of [prewhere conditions](../../sql-reference/statements/select/prewhere.md).
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index a610ac15f9b..2a5c1da02ab 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3312,6 +3312,17 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
                 {
                     columns_to_check_conversion.push_back(
                         new_metadata.getColumns().getPhysical(command.column_name));
+
+                    const auto & old_column = old_metadata.getColumns().get(command.column_name);
+                    if (old_column.stat)
+                    {
+                        const auto & new_column = new_metadata.getColumns().get(command.column_name);
+                        if (!old_column.type->equals(*new_column.type))
+                            throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN,
+                                            "ALTER types of column {} with statistic is not not safe "
+                                            "because it can change the representation of statistic",
+                                            backQuoteIfNeed(command.column_name));
+                    }
                 }
             }
         }
diff --git a/tests/queries/0_stateless/02864_statistic_exception.sql b/tests/queries/0_stateless/02864_statistic_exception.sql
index 95b94a9bca3..c37f6b1ce06 100644
--- a/tests/queries/0_stateless/02864_statistic_exception.sql
+++ b/tests/queries/0_stateless/02864_statistic_exception.sql
@@ -45,4 +45,9 @@ ALTER TABLE t1 DROP STATISTIC a TYPE tdigest; -- { serverError ILLEGAL_STATISTIC
 ALTER TABLE t1 CLEAR STATISTIC a TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }
 ALTER TABLE t1 MATERIALIZE STATISTIC b TYPE tdigest; -- { serverError ILLEGAL_STATISTIC }
 
+ALTER TABLE t1 ADD STATISTIC a TYPE tdigest;
+ALTER TABLE t1 ADD STATISTIC b TYPE tdigest;
+ALTER TABLE t1 MODIFY COLUMN a Float64 TTL now() + INTERVAL 1 MONTH;
+ALTER TABLE t1 MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN }
+
 DROP TABLE t1;

From f31a6f2c0db3e1012394c1e7454c6ccbb2f5cf21 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 12 Oct 2023 14:49:16 +0000
Subject: [PATCH 0085/1097] Support only Full Sort

---
 src/Processors/QueryPlan/SortingStep.cpp      |   8 +-
 src/Processors/QueryPlan/SortingStep.h        |  17 ++-
 .../02884_parallel_window_functions.reference | 100 +++++++++++++++
 .../02884_parallel_window_functions.sql       | 119 ++++++++++++++++++
 4 files changed, 234 insertions(+), 10 deletions(-)
 create mode 100644 tests/queries/0_stateless/02884_parallel_window_functions.reference
 create mode 100644 tests/queries/0_stateless/02884_parallel_window_functions.sql

diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index ec32f6f6a28..e94e818a4aa 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -193,7 +193,7 @@ void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline)
                 for (size_t i = 0; i < threads; ++i)
                 {
                     size_t output_it = i;
-                    auto resize = std::make_shared<ResizeProcessor>(ports[output_it]->getHeader(), streams, 1);
+                    auto resize = std::make_shared<ResizeProcessor>(stream_header, streams, 1);
                     auto & inputs = resize->getInputs();
 
                     for (auto input_it = inputs.begin(); input_it != inputs.end(); output_it += threads, ++input_it)
@@ -367,11 +367,7 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
     if (type == Type::FinishSorting)
     {
         bool need_finish_sorting = (prefix_description.size() < result_description.size());
-
-        if (partition_by_description.empty())
-            mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit));
-
-        scatterByPartitionIfNeeded(pipeline);
+        mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit));
 
         if (need_finish_sorting)
         {
diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h
index ede91852aff..bdfc96f7931 100644
--- a/src/Processors/QueryPlan/SortingStep.h
+++ b/src/Processors/QueryPlan/SortingStep.h
@@ -97,12 +97,21 @@ private:
     void scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline);
     void updateOutputStream() override;
 
-    static void
-    mergeSorting(QueryPipelineBuilder & pipeline, const Settings & sort_settings, const SortDescription & result_sort_desc, UInt64 limit_);
+    static void mergeSorting(
+        QueryPipelineBuilder & pipeline,
+        const Settings & sort_settings,
+        const SortDescription & result_sort_desc,
+        UInt64 limit_);
 
-    void mergingSorted(QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, UInt64 limit_);
+    void mergingSorted(
+        QueryPipelineBuilder & pipeline,
+        const SortDescription & result_sort_desc,
+        UInt64 limit_);
     void finishSorting(
-        QueryPipelineBuilder & pipeline, const SortDescription & input_sort_desc, const SortDescription & result_sort_desc, UInt64 limit_);
+        QueryPipelineBuilder & pipeline,
+        const SortDescription & input_sort_desc,
+        const SortDescription & result_sort_desc,
+        UInt64 limit_);
     void fullSort(
         QueryPipelineBuilder & pipeline,
         const SortDescription & result_sort_desc,
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference
new file mode 100644
index 00000000000..cab6195b625
--- /dev/null
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference
@@ -0,0 +1,100 @@
+1
+-- { echoOn }
+
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY R DESC
+LIMIT 10;
+0	2	0
+1	2	0
+2	2	0
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY R DESC
+LIMIT 10
+SETTINGS max_threads = 1;
+0	2	0
+1	2	0
+2	2	0
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 0
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 1
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 2
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 3
+    GROUP BY
+        ac,
+        nw
+)
+GROUP BY nw
+ORDER BY R DESC
+LIMIT 10;
+0	2	0
+1	2	0
+2	2	0
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql
new file mode 100644
index 00000000000..5e71fadb3ff
--- /dev/null
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql
@@ -0,0 +1,119 @@
+CREATE TABLE window_funtion_threading
+Engine = MergeTree
+ORDER BY (ac, nw)
+AS SELECT
+        toUInt64(toFloat32(number % 2) % 20000000) as ac,
+        toFloat32(1) as wg,        
+        toUInt16(toFloat32(number % 3) % 400) as nw
+FROM numbers_mt(10000000);
+
+SELECT count() FROM (EXPLAIN PIPELINE SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY R DESC
+LIMIT 10) where explain ilike '%ScatterByPartitionTransform%';
+
+-- { echoOn }
+
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY R DESC
+LIMIT 10;
+
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY R DESC
+LIMIT 10
+SETTINGS max_threads = 1;
+
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 0
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 1
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 2
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 3
+    GROUP BY
+        ac,
+        nw
+)
+GROUP BY nw
+ORDER BY R DESC
+LIMIT 10;

From 09072097ec33231cd0df95dc91cfd1317f662da2 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 16 Oct 2023 14:32:47 +0200
Subject: [PATCH 0086/1097] Better

---
 src/Interpreters/Cache/FileCache.cpp         | 82 ++++++++++----------
 src/Interpreters/Cache/FileCacheSettings.cpp |  3 +
 src/Interpreters/Cache/FileSegment.h         |  2 +
 3 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index bbb5fd71b8f..6d507413bab 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -237,6 +237,7 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
 
             if (!add_to_result(file_segment_metadata))
                 return result;
+
             ++segment_it;
         }
     }
@@ -418,66 +419,67 @@ FileCache::getOrSet(
 
     assertInitialized();
 
+    const auto end_offset = offset + size - 1;
     const auto aligned_offset = roundDownToMultiple(offset, boundary_alignment);
-    const auto aligned_end = std::min(roundUpToMultiple(offset + size, boundary_alignment), file_size);
-    const auto aligned_size = aligned_end - aligned_offset;
-
-    FileSegment::Range range(aligned_offset, aligned_offset + aligned_size - 1);
+    const auto aligned_end_offset = std::min(roundUpToMultiple(offset + size, boundary_alignment), file_size) - 1;
+    chassert(aligned_offset <= offset);
 
     auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY);
 
     /// Get all segments which intersect with the given range.
+    FileSegment::Range range(offset, end_offset);
     auto file_segments = getImpl(*locked_key, range, file_segments_limit);
 
-    bool limit_reached = false;
+    if (aligned_offset < offset && (file_segments.empty() || offset < file_segments.front()->range().left))
+    {
+        auto prefix_range = FileSegment::Range(aligned_offset, file_segments.empty() ? offset - 1 : file_segments.front()->range().left - 1);
+        auto prefix_file_segments = getImpl(*locked_key, prefix_range, /* file_segments_limit */0);
+
+        while (!prefix_file_segments.empty() && prefix_file_segments.front()->range().right < offset)
+            prefix_file_segments.pop_front();
+
+        if (!prefix_file_segments.empty())
+        {
+            file_segments.splice(file_segments.begin(), prefix_file_segments);
+            range.left = file_segments.front()->range().left;
+        }
+    }
+
+    if (end_offset < aligned_end_offset && (file_segments.empty() || file_segments.back()->range().right < end_offset))
+    {
+        auto suffix_range = FileSegment::Range(end_offset, aligned_end_offset);
+        /// Get only 1 file segment.
+        auto suffix_file_segments = getImpl(*locked_key, suffix_range, /* file_segments_limit */1);
+
+        if (!suffix_file_segments.empty())
+            range.right = suffix_file_segments.front()->range().left - 1;
+    }
+
     if (file_segments.empty())
     {
         file_segments = splitRangeIntoFileSegments(*locked_key, range.left, range.size(), FileSegment::State::EMPTY, settings);
-
-        while (!file_segments.empty() && file_segments.front()->range().right < offset)
-            file_segments.pop_front();
     }
     else
     {
-        limit_reached = file_segments_limit && file_segments.size() >= file_segments_limit;
-
-        /// A while loop for the case if we set a limit to n, but all these n file segments are removed
-        /// as they turned out redundant because of the alignment of offset to aligned_offset.
-        while (true)
-        {
-            size_t last_offset = file_segments.back()->range().right;
-
-            while (!file_segments.empty() && file_segments.front()->range().right < offset)
-                file_segments.pop_front();
-
-            if (!file_segments.empty())
-                break;
-
-            if (!limit_reached)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty list of file segments");
-
-            range.left = last_offset + 1;
-            chassert(offset >= range.left);
-            file_segments = getImpl(*locked_key, range, file_segments_limit);
-        }
-
-        range.left = std::min(offset, file_segments.front()->range().left);
-        if (limit_reached)
-            range.right =  file_segments.back()->range().right;
+        chassert(file_segments.front()->range().right >= offset);
+        chassert(file_segments.back()->range().left <= end_offset);
 
         fillHolesWithEmptyFileSegments(
             *locked_key, file_segments, range, file_segments_limit, /* fill_with_detached */false, settings);
+
+        if (!file_segments.front()->range().contains(offset))
+        {
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} to include {} "
+                            "(end offset: {}, aligned offset: {}, aligned end offset: {})",
+                            file_segments.front()->range().toString(), offset, end_offset, aligned_offset, aligned_end_offset);
+        }
+
+        chassert(file_segments_limit ? file_segments.back()->range().left <= end_offset : file_segments.back()->range().contains(end_offset));
     }
 
-    while (!file_segments.empty() && file_segments.back()->range().left >= offset + size)
+    while (file_segments_limit && file_segments.size() > file_segments_limit)
         file_segments.pop_back();
 
-    if (file_segments_limit)
-    {
-        while (file_segments.size() > file_segments_limit)
-            file_segments.pop_back();
-    }
-
     if (file_segments.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty list of file segments for offset {}, size {} (file size: {})", offset, size, file_size);
 
diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp
index 6f2f8c4b778..de21555c050 100644
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@@ -47,6 +47,9 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
     if (config.has(config_prefix + ".boundary_alignment"))
         boundary_alignment = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".boundary_alignment"));
 
+    if (boundary_alignment > max_file_segment_size)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `boundary_alignment` cannot exceed `max_file_segment_size`");
+
     if (config.has(config_prefix + ".background_download_threads"))
         background_download_threads = config.getUInt(config_prefix + ".background_download_threads");
 
diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index 03d534f906d..04c82d71b5e 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -136,6 +136,8 @@ public:
         size_t size() const { return right - left + 1; }
 
         String toString() const { return fmt::format("[{}, {}]", std::to_string(left), std::to_string(right)); }
+
+        bool contains(size_t offset) const { return left <= offset && offset <= right; }
     };
 
     static String getCallerId();

From ea95a49e864f3e49de467b53b29dc72a719e4f33 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Mon, 16 Oct 2023 21:22:43 +0000
Subject: [PATCH 0087/1097] merge_row_policy: some changes per code review

---
 src/Storages/StorageMerge.cpp | 76 ++++++++++++++---------------------
 src/Storages/StorageMerge.h   |  3 +-
 2 files changed, 32 insertions(+), 47 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 354e17ad5ba..33b850f0aa0 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -382,13 +382,7 @@ void StorageMerge::read(
 class ReadFromMerge::RowPolicyData
 {
 public:
-    /// Row policy requires extra filtering
-    bool hasRowPolicy()
-    {
-        return static_cast<bool>(row_policy_filter_ptr);
-    }
-
-    void init(RowPolicyFilterPtr, std::shared_ptr<DB::IStorage>, ContextPtr);
+    RowPolicyData(RowPolicyFilterPtr, std::shared_ptr<DB::IStorage>, ContextPtr);
 
     /// Add columns that needed for row policies to data stream
     /// SELECT x from T  if  T has row policy  y=42
@@ -687,7 +681,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         storage_snapshot,
         modified_query_info);
 
-    RowPolicyData row_policy_data;
+    std::optional<RowPolicyData> row_policy_data;
 
     if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns))
     {
@@ -700,19 +694,17 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
         if (!view || allow_experimental_analyzer)
         {
-            row_policy_data.init(
-                modified_context->getRowPolicyFilter(
+            auto row_policy_filter_ptr = modified_context->getRowPolicyFilter(
                     database_name,
                     table_name,
-                    RowPolicyFilterType::SELECT_FILTER),
-                storage,
-                modified_context);
-
-            if (row_policy_data.hasRowPolicy())
+                    RowPolicyFilterType::SELECT_FILTER);
+            if (row_policy_filter_ptr)
             {
-                row_policy_data.extendNames(real_column_names);
+                row_policy_data.emplace(row_policy_filter_ptr, storage, modified_context);
+                row_policy_data->extendNames(real_column_names);
             }
 
+
             storage->read(plan,
                 real_column_names,
                 storage_snapshot,
@@ -725,11 +717,11 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             if (!plan.isInitialized())
                 return {};
 
-            if (row_policy_data.hasRowPolicy())
+            if (row_policy_data)
             {
                 if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
                 {
-                    row_policy_data.addStorageFilter(source_step_with_filter);
+                    row_policy_data->addStorageFilter(source_step_with_filter);
                 }
             }
         }
@@ -847,6 +839,11 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             });
         }
 
+        if (row_policy_data)
+        {
+            row_policy_data->addFilterTransform(*builder);
+        }
+
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
         /// Besides this we add FilterTransform if it is needed to follow row level policies.
@@ -856,42 +853,38 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             aliases,
             modified_context,
             *builder,
-            processed_stage,
-            row_policy_data);
+            processed_stage);
     }
 
     return builder;
 }
 
-void ReadFromMerge::RowPolicyData::init(RowPolicyFilterPtr row_policy_filter_ptr_,
+ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter_ptr_,
     std::shared_ptr<DB::IStorage> storage,
     ContextPtr local_context)
+    : row_policy_filter_ptr(row_policy_filter_ptr_)
 {
-    if (row_policy_filter_ptr_)
-    {
-        row_policy_filter_ptr = row_policy_filter_ptr_;
+    assert(row_policy_filter_ptr_);
 
-        ASTPtr expr = row_policy_filter_ptr->expression;
+    ASTPtr expr = row_policy_filter_ptr->expression;
 
-        auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
-        auto storage_columns = storage_metadata_snapshot->getColumns();
-        auto needed_columns = storage_columns.getAllPhysical();
+    auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
+    auto storage_columns = storage_metadata_snapshot->getColumns();
+    auto needed_columns = storage_columns.getAllPhysical();
 
-        auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns);
-        auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context};
+    auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns);
+    auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context};
 
-        actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */);
-        filter_actions = std::make_shared<ExpressionActions>(actions_dag,
-            ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
-        filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
-    }
+    actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */);
+    filter_actions = std::make_shared<ExpressionActions>(actions_dag,
+        ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
+    filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
 }
 
 // Add columns that needed to evaluate row policies
 // SELECT x from t  if  t has row policy
 void ReadFromMerge::RowPolicyData::extendNames(Names & names)
 {
-    assert(row_policy_filter_ptr);
     ASTPtr expr = row_policy_filter_ptr->expression;
 
     RequiredSourceColumnsVisitor::Data columns_context;
@@ -899,12 +892,11 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names)
 
     const auto req_columns = columns_context.requiredColumns();
 
-    std::sort(names.begin(), names.end());
     NameSet added_names;
 
     for (const auto & req_column : req_columns)
     {
-        if (!std::binary_search(names.begin(), names.end(), req_column))
+        if (std::find(names.begin(), names.end(), req_column) == names.end())
         {
             added_names.insert(req_column);
         }
@@ -1138,14 +1130,8 @@ void ReadFromMerge::convertingSourceStream(
     const Aliases & aliases,
     ContextPtr local_context,
     QueryPipelineBuilder & builder,
-    QueryProcessingStage::Enum processed_stage,
-    RowPolicyData & row_policy_data)
+    QueryProcessingStage::Enum processed_stage)
 {
-    if (row_policy_data.hasRowPolicy())
-    {
-        row_policy_data.addFilterTransform(builder);
-    }
-
     Block before_block_header = builder.getHeader();
 
     auto storage_sample_block = metadata_snapshot->getSampleBlock();
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 1a5aca24e4c..04c5a70651f 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -203,8 +203,7 @@ private:
         const Aliases & aliases,
         ContextPtr context,
         QueryPipelineBuilder & builder,
-        QueryProcessingStage::Enum processed_stage,
-        RowPolicyData & row_policy_data);
+        QueryProcessingStage::Enum processed_stage);
 };
 
 }

From 7f991bf6e7372c1520a278fad9f7bc8d7dd83bc3 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Mon, 16 Oct 2023 21:49:16 +0000
Subject: [PATCH 0088/1097] merge_row_policy: a comment

---
 src/Storages/StorageMerge.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 33b850f0aa0..95760bbeb42 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -881,8 +881,8 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
     filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
 }
 
-// Add columns that needed to evaluate row policies
-// SELECT x from t  if  t has row policy
+// Add columns that needed _only_ to evaluate row policies
+// SELECT x from t  if  t has row policy that is based on y
 void ReadFromMerge::RowPolicyData::extendNames(Names & names)
 {
     ASTPtr expr = row_policy_filter_ptr->expression;

From 786183ee2447936fe9277df4b2554ed8a9d9cc47 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Mon, 16 Oct 2023 22:16:53 +0000
Subject: [PATCH 0089/1097] merge_row_policy: get rid of
 RequiredSourceColumnsVisitor

---
 src/Storages/StorageMerge.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 95760bbeb42..61f2132cfeb 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -885,16 +885,9 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
 // SELECT x from t  if  t has row policy that is based on y
 void ReadFromMerge::RowPolicyData::extendNames(Names & names)
 {
-    ASTPtr expr = row_policy_filter_ptr->expression;
-
-    RequiredSourceColumnsVisitor::Data columns_context;
-    RequiredSourceColumnsVisitor(columns_context).visit(expr);
-
-    const auto req_columns = columns_context.requiredColumns();
-
     NameSet added_names;
 
-    for (const auto & req_column : req_columns)
+    for (const auto & req_column : filter_actions->getRequiredColumns())
     {
         if (std::find(names.begin(), names.end(), req_column) == names.end())
         {

From 7515853ad4b3e910f20df99038d706ef77ab2819 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 17 Oct 2023 11:43:51 +0200
Subject: [PATCH 0090/1097] Fix build

---
 src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 2 +-
 src/Interpreters/Cache/FileCache.cpp            | 4 ++--
 src/Interpreters/Cache/FileCache.h              | 9 +++++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 3c16d3d9ae2..27d0b6706a6 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -127,7 +127,7 @@ bool CachedOnDiskReadBufferFromFile::nextFileSegmentsBatch()
     else
     {
         CreateFileSegmentSettings create_settings(FileSegmentKind::Regular);
-        file_segments = cache->getOrSet(cache_key, file_offset_of_buffer_end, size, file_size.value(), settings.filesystem_cache_getorset_batch_size, create_settings);
+        file_segments = cache->getOrSet(cache_key, file_offset_of_buffer_end, size, file_size.value(), create_settings, settings.filesystem_cache_getorset_batch_size);
     }
     return !file_segments->empty();
 }
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 6d507413bab..82a724523e7 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -412,8 +412,8 @@ FileCache::getOrSet(
     size_t offset,
     size_t size,
     size_t file_size,
-    size_t file_segments_limit,
-    const CreateFileSegmentSettings & settings)
+    const CreateFileSegmentSettings & settings,
+    size_t file_segments_limit)
 {
     ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FilesystemCacheGetOrSetMicroseconds);
 
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 2bf7b9281d5..d85f50cf34c 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -84,8 +84,13 @@ public:
      * As long as pointers to returned file segments are held
      * it is guaranteed that these file segments are not removed from cache.
      */
-    FileSegmentsHolderPtr
-    getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, size_t file_segments_limit, const CreateFileSegmentSettings & settings);
+    FileSegmentsHolderPtr getOrSet(
+        const Key & key,
+        size_t offset,
+        size_t size,
+        size_t file_size,
+        const CreateFileSegmentSettings & settings,
+        size_t file_segments_limit = 0);
 
     /**
      * Segments in returned list are ordered in ascending order and represent a full contiguous

From d837aa675f5ec56434aa7f58332fc4b922b1b9ba Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 17 Oct 2023 13:14:28 +0200
Subject: [PATCH 0091/1097] Fix

---
 src/Interpreters/Cache/FileCache.cpp | 29 +++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 82a724523e7..ba4998bfa00 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -435,13 +435,26 @@ FileCache::getOrSet(
         auto prefix_range = FileSegment::Range(aligned_offset, file_segments.empty() ? offset - 1 : file_segments.front()->range().left - 1);
         auto prefix_file_segments = getImpl(*locked_key, prefix_range, /* file_segments_limit */0);
 
-        while (!prefix_file_segments.empty() && prefix_file_segments.front()->range().right < offset)
-            prefix_file_segments.pop_front();
-
-        if (!prefix_file_segments.empty())
+        if (prefix_file_segments.empty())
         {
-            file_segments.splice(file_segments.begin(), prefix_file_segments);
-            range.left = file_segments.front()->range().left;
+            range.left = aligned_offset;
+        }
+        else
+        {
+            size_t last_right_offset = prefix_file_segments.back()->range().right;
+
+            while (!prefix_file_segments.empty() && prefix_file_segments.front()->range().right < offset)
+                prefix_file_segments.pop_front();
+
+            if (prefix_file_segments.empty())
+            {
+                range.left = last_right_offset + 1;
+            }
+            else
+            {
+                file_segments.splice(file_segments.begin(), prefix_file_segments);
+                range.left = file_segments.front()->range().left;
+            }
         }
     }
 
@@ -451,7 +464,9 @@ FileCache::getOrSet(
         /// Get only 1 file segment.
         auto suffix_file_segments = getImpl(*locked_key, suffix_range, /* file_segments_limit */1);
 
-        if (!suffix_file_segments.empty())
+        if (suffix_file_segments.empty())
+            range.right = aligned_end_offset;
+        else
             range.right = suffix_file_segments.front()->range().left - 1;
     }
 

From d2ac16749a01c7246e4e9533846c5384c4146edc Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 17 Oct 2023 16:25:01 +0200
Subject: [PATCH 0092/1097] refinement

---
 src/Interpreters/InterpreterCreateQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 18a635f26e2..1d00ba0ecb6 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -647,7 +647,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
 
         if (col_decl.stat_type)
         {
-            if (!context_->getSettingsRef().allow_experimental_statistic)
+            if (!attach && !context_->getSettingsRef().allow_experimental_statistic)
                  throw Exception(ErrorCodes::INCORRECT_QUERY, "Create table with statistic is now disabled. Turn on allow_experimental_statistic");
             column.stat = StatisticDescription::getStatisticFromColumnDeclaration(col_decl);
         }

From 5d8b1cea910b3beb531e1e6122d2596f7197eae8 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 17 Oct 2023 19:19:19 +0200
Subject: [PATCH 0093/1097] Fix

---
 src/Interpreters/Cache/FileCache.cpp                            | 2 +-
 .../0_stateless/02503_cache_on_write_with_small_segment_size.sh | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index ba4998bfa00..3349dcc8d3f 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -347,7 +347,7 @@ void FileCache::fillHolesWithEmptyFileSegments(
         ++it;
     }
 
-    if (file_segments.size() >= file_segments_limit)
+    if (file_segments_limit && file_segments.size() >= file_segments_limit)
         return;
 
     if (current_pos <= range.right)
diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh
index 63f912c6bff..4f3fd0e54f6 100755
--- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh
+++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh
@@ -22,6 +22,7 @@ SETTINGS min_bytes_for_wide_part = 0,
             type = cache,
             max_size = '128Mi',
             max_file_segment_size = '10Ki',
+            boundary_alignment = '5Ki',
             path = '${CLICKHOUSE_TEST_UNIQUE_NAME}',
             cache_on_write_operations = 1,
             enable_filesystem_query_cache_limit = 1,

From 5ca43e51c1dfce7d13b0f192327b8c16e524243c Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 17 Oct 2023 20:45:00 +0000
Subject: [PATCH 0094/1097] Remove unused code

---
 src/Processors/QueryPlan/WindowStep.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index d104cfe2975..12f78ac014d 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -7,11 +7,6 @@
 #include <IO/Operators.h>
 #include <Common/JSONBuilder.h>
 
-#include "Columns/ColumnConst.h"
-#include "DataTypes/DataTypesNumber.h"
-#include "Functions/FunctionFactory.h"
-#include "Processors/Transforms/ScatterByPartitionTransform.h"
-
 namespace DB
 {
 

From 22bab4bcc3b13741f19e0b8dd4afa23b4f1cca44 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 18 Oct 2023 11:26:44 +0200
Subject: [PATCH 0095/1097] Fix configs

---
 tests/integration/test_filesystem_cache/test.py                 | 2 +-
 .../configs/config.d/storage_configuration.xml                  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py
index be7b12946a7..3a6a1ef76eb 100644
--- a/tests/integration/test_filesystem_cache/test.py
+++ b/tests/integration/test_filesystem_cache/test.py
@@ -46,7 +46,7 @@ def test_parallel_cache_loading_on_startup(cluster, node_name):
             path = 'paralel_loading_test',
             disk = 'hdd_blob',
             max_file_segment_size = '1Ki',
-            boundary_alignemt = '1Ki',
+            boundary_alignment = '1Ki',
             max_size = '1Gi',
             max_elements = 10000000,
             load_metadata_threads = 30);
diff --git a/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml b/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml
index b527c74e8de..3064003e6c8 100644
--- a/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml
+++ b/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml
@@ -12,6 +12,7 @@
                 <path>/tiny_local_cache/</path>
                 <max_size>10M</max_size>
                 <max_file_segment_size>1M</max_file_segment_size>
+                <boundary_alignment>1M</boundary_alignment>
                 <cache_on_write_operations>1</cache_on_write_operations>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
             </tiny_local_cache>

From 7aa57516c199f45548aea308c4ce2ee1d814e73e Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 18 Oct 2023 11:31:10 +0200
Subject: [PATCH 0096/1097] Update tests config

---
 tests/config/users.d/s3_cache.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/config/users.d/s3_cache.xml b/tests/config/users.d/s3_cache.xml
index 69b24ecbbc4..4740f37a90c 100644
--- a/tests/config/users.d/s3_cache.xml
+++ b/tests/config/users.d/s3_cache.xml
@@ -3,6 +3,7 @@
         <default>
           <enable_filesystem_cache_on_write_operations>1</enable_filesystem_cache_on_write_operations>
           <enable_filesystem_cache>1</enable_filesystem_cache>
+          <filesystem_cache_getorset_batch_size>10</filesystem_cache_getorset_batch_size>
         </default>
     </profiles>
 </clickhouse>

From c792d952716f333b198bf014d99fc1dceb6a062b Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 18 Oct 2023 14:41:06 +0200
Subject: [PATCH 0097/1097] Update config

---
 tests/config/config.d/s3_storage_policy_by_default.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/config/config.d/s3_storage_policy_by_default.xml b/tests/config/config.d/s3_storage_policy_by_default.xml
index dd93a317a77..e161c2ee01a 100644
--- a/tests/config/config.d/s3_storage_policy_by_default.xml
+++ b/tests/config/config.d/s3_storage_policy_by_default.xml
@@ -12,6 +12,7 @@
                 <max_size>1Gi</max_size>
                 <path>cached_s3/</path>
                 <disk>s3</disk>
+                <filesystem_cache_getorset_batch_size>10</filesystem_cache_getorset_batch_size>
             </cached_s3>
         </disks>
         <policies>

From 89272e0925c91ed659b51741c58ddc364e149792 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 19 Oct 2023 11:23:56 +0200
Subject: [PATCH 0098/1097] Fix upgrade check, randomize more settings

---
 docker/test/upgrade/run.sh                             | 2 ++
 tests/clickhouse-test                                  | 3 +++
 tests/config/config.d/s3_storage_policy_by_default.xml | 1 -
 tests/config/install.sh                                | 1 +
 tests/config/users.d/s3_cache_new.xml                  | 7 +++++++
 5 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 tests/config/users.d/s3_cache_new.xml

diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index c69d90b9af0..3580f8e5021 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -78,6 +78,7 @@ remove_keeper_config "create_if_not_exists" "[01]"
 rm /etc/clickhouse-server/config.d/merge_tree.xml
 rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
 rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
+rm /etc/clickhouse-server/users.d/s3_cache_new.xml
 
 start
 stop
@@ -114,6 +115,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
 rm /etc/clickhouse-server/config.d/merge_tree.xml
 rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
 rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
+rm /etc/clickhouse-server/users.d/s3_cache_new.xml
 
 start
 
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index cab7d7e79ff..c0c2d482703 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -577,6 +577,9 @@ class SettingsRandomizer:
         ),
         "remote_filesystem_read_method": lambda: random.choice(["read", "threadpool"]),
         "local_filesystem_read_prefetch": lambda: random.randint(0, 1),
+        "filesystem_cache_getorset_batch_size": lambda: random.randint(0, 3, 10, 50),
+        "read_from_filesystem_cache_if_exists_otherwise_bypass_cache": lambda: random.randint(0, 1),
+        "throw_on_error_from_cache_on_write_operations": lambda: random.randint(0, 1),
         "remote_filesystem_read_prefetch": lambda: random.randint(0, 1),
         "allow_prefetched_read_pool_for_remote_filesystem": lambda: random.randint(
             0, 1
diff --git a/tests/config/config.d/s3_storage_policy_by_default.xml b/tests/config/config.d/s3_storage_policy_by_default.xml
index e161c2ee01a..dd93a317a77 100644
--- a/tests/config/config.d/s3_storage_policy_by_default.xml
+++ b/tests/config/config.d/s3_storage_policy_by_default.xml
@@ -12,7 +12,6 @@
                 <max_size>1Gi</max_size>
                 <path>cached_s3/</path>
                 <disk>s3</disk>
-                <filesystem_cache_getorset_batch_size>10</filesystem_cache_getorset_batch_size>
             </cached_s3>
         </disks>
         <policies>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 9e3b235515d..d76949fadc7 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -151,6 +151,7 @@ if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then
 
     ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/
     ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/users.d/
+    ln -sf $SRC_PATH/users.d/s3_cache_new.xml $DEST_SERVER_PATH/users.d/
 fi
 
 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
diff --git a/tests/config/users.d/s3_cache_new.xml b/tests/config/users.d/s3_cache_new.xml
new file mode 100644
index 00000000000..638b7267960
--- /dev/null
+++ b/tests/config/users.d/s3_cache_new.xml
@@ -0,0 +1,7 @@
+<clickhouse>
+    <profiles>
+        <default>
+          <filesystem_cache_getorset_batch_size>10</filesystem_cache_getorset_batch_size>
+        </default>
+    </profiles>
+</clickhouse>

From 7a096904ed001be79b88d2ef50d0c7c460a933bc Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 19 Oct 2023 15:40:32 +0200
Subject: [PATCH 0099/1097] clickhouse-test: export product function for jinja2

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/clickhouse-test | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index cab7d7e79ff..c09974b780d 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -8,6 +8,7 @@
 import enum
 from queue import Full
 import shutil
+import itertools
 import sys
 import os
 import os.path
@@ -1605,6 +1606,8 @@ class TestSuite:
             if USE_JINJA
             else None
         )
+        if j2env is not None:
+            j2env.globals.update(product=itertools.product)
 
         for test_name in os.listdir(self.suite_path):
             if not is_test_from_dir(self.suite_path, test_name):

From 6da75a47c5eb0c68c6b39b22a08ca9923181f808 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 19 Oct 2023 14:32:50 +0200
Subject: [PATCH 0100/1097] Fix "Cannot find column X in source stream" for
 Distributed queries with LIMIT BY

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/InterpreterSelectQuery.cpp   | 15 +++++-
 .../02899_distributed_limit_by.reference      | 52 +++++++++++++++++++
 .../02899_distributed_limit_by.sql.j2         | 26 ++++++++++
 .../02900_limit_by_query_stage.reference      |  3 ++
 .../0_stateless/02900_limit_by_query_stage.sh |  9 ++++
 5 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02899_distributed_limit_by.reference
 create mode 100644 tests/queries/0_stateless/02899_distributed_limit_by.sql.j2
 create mode 100644 tests/queries/0_stateless/02900_limit_by_query_stage.reference
 create mode 100755 tests/queries/0_stateless/02900_limit_by_query_stage.sh

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 0300cc4c9b4..4c51e1d6396 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -993,6 +993,9 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
             if (analysis_result.before_window)
                 return analysis_result.before_window->getResultColumns();
 
+            // NOTE: should not handle before_limit_by specially since
+            // WithMergeableState does not process LIMIT BY
+
             return analysis_result.before_order_by->getResultColumns();
         }
 
@@ -1036,6 +1039,12 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
         if (analysis_result.before_window)
             return analysis_result.before_window->getResultColumns();
 
+        // In case of query on remote shards executed up to
+        // WithMergeableStateAfterAggregation*, they can process LIMIT BY,
+        // since the initiator will not apply LIMIT BY again.
+        if (analysis_result.before_limit_by)
+            return analysis_result.before_limit_by->getResultColumns();
+
         return analysis_result.before_order_by->getResultColumns();
     }
 
@@ -1482,7 +1491,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                 if (query.limitLength())
                     executeDistinct(query_plan, false, expressions.selected_columns, false);
 
-                if (expressions.hasLimitBy())
+                /// In case of query executed on remote shards (up to
+                /// WithMergeableState*) LIMIT BY cannot be applied, since it
+                /// will be applied on the initiator as well, and the header
+                /// may not match in some obscure cases.
+                if (options.to_stage == QueryProcessingStage::FetchColumns && expressions.hasLimitBy())
                 {
                     executeExpression(query_plan, expressions.before_limit_by, "Before LIMIT BY");
                     executeLimitBy(query_plan);
diff --git a/tests/queries/0_stateless/02899_distributed_limit_by.reference b/tests/queries/0_stateless/02899_distributed_limit_by.reference
new file mode 100644
index 00000000000..c20ecbcc4e4
--- /dev/null
+++ b/tests/queries/0_stateless/02899_distributed_limit_by.reference
@@ -0,0 +1,52 @@
+Used settings: prefer_localhost_replica=0,distributed_group_by_no_merge=0,distributed_push_down_limit=1
+0
+0
+Used settings: prefer_localhost_replica=0,distributed_group_by_no_merge=0,distributed_push_down_limit=0
+0
+0
+Used settings: prefer_localhost_replica=0,distributed_group_by_no_merge=1,distributed_push_down_limit=1
+0
+0
+0
+0
+Used settings: prefer_localhost_replica=0,distributed_group_by_no_merge=1,distributed_push_down_limit=0
+0
+0
+0
+0
+Used settings: prefer_localhost_replica=0,distributed_group_by_no_merge=2,distributed_push_down_limit=1
+0
+0
+0
+0
+Used settings: prefer_localhost_replica=0,distributed_group_by_no_merge=2,distributed_push_down_limit=0
+0
+0
+0
+0
+Used settings: prefer_localhost_replica=1,distributed_group_by_no_merge=0,distributed_push_down_limit=1
+0
+0
+Used settings: prefer_localhost_replica=1,distributed_group_by_no_merge=0,distributed_push_down_limit=0
+0
+0
+Used settings: prefer_localhost_replica=1,distributed_group_by_no_merge=1,distributed_push_down_limit=1
+0
+0
+0
+0
+Used settings: prefer_localhost_replica=1,distributed_group_by_no_merge=1,distributed_push_down_limit=0
+0
+0
+0
+0
+Used settings: prefer_localhost_replica=1,distributed_group_by_no_merge=2,distributed_push_down_limit=1
+0
+0
+0
+0
+Used settings: prefer_localhost_replica=1,distributed_group_by_no_merge=2,distributed_push_down_limit=0
+0
+0
+0
+0
diff --git a/tests/queries/0_stateless/02899_distributed_limit_by.sql.j2 b/tests/queries/0_stateless/02899_distributed_limit_by.sql.j2
new file mode 100644
index 00000000000..4f885ef2b6c
--- /dev/null
+++ b/tests/queries/0_stateless/02899_distributed_limit_by.sql.j2
@@ -0,0 +1,26 @@
+{#
+Randomize settings:
+- prefer_localhost_replica
+- distributed_group_by_no_merge (0 = WithMergeableState, 1 = Complete, 2 = WithMergeableStateAfterAggregation/WithMergeableStateAfterAggregationAndLimit)
+- distributed_push_down_limit (0/1 = dis/allows WithMergeableStateAfterAggregationAndLimit
+#}
+{% for settings in product(
+    [
+        'prefer_localhost_replica=0',
+        'prefer_localhost_replica=1',
+    ],
+    [
+        'distributed_group_by_no_merge=0',
+        'distributed_group_by_no_merge=1',
+        'distributed_group_by_no_merge=2',
+    ],
+    [
+        'distributed_push_down_limit=1',
+        'distributed_push_down_limit=0',
+    ],
+) %}
+{% set settings = settings | join(',') %}
+select 'Used settings: {{ settings }}';
+select dummy from remote('127.{1,1}', system.one) where dummy + dummy >= 0 limit 1 by dummy + dummy + 0 as l settings {{ settings }};
+select dummy from (select dummy + dummy + 0 as l, dummy from remote('127.{1,1}', system.one) where dummy + dummy >= 0 limit 1 by l) settings {{ settings }};
+{% endfor %}
diff --git a/tests/queries/0_stateless/02900_limit_by_query_stage.reference b/tests/queries/0_stateless/02900_limit_by_query_stage.reference
new file mode 100644
index 00000000000..b01fb1ca5b0
--- /dev/null
+++ b/tests/queries/0_stateless/02900_limit_by_query_stage.reference
@@ -0,0 +1,3 @@
+0	0
+0	0
+0	0
diff --git a/tests/queries/0_stateless/02900_limit_by_query_stage.sh b/tests/queries/0_stateless/02900_limit_by_query_stage.sh
new file mode 100755
index 00000000000..d34d0d81bcd
--- /dev/null
+++ b/tests/queries/0_stateless/02900_limit_by_query_stage.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --stage with_mergeable_state --query 'SELECT dummy FROM system.one WHERE (dummy + dummy) >= 0 LIMIT 1 BY (dummy + dummy) + 0 AS l'
+$CLICKHOUSE_CLIENT --stage with_mergeable_state_after_aggregation --query 'SELECT dummy FROM system.one WHERE (dummy + dummy) >= 0 LIMIT 1 BY (dummy + dummy) + 0 AS l'
+$CLICKHOUSE_CLIENT --stage with_mergeable_state_after_aggregation_and_limit --query 'SELECT dummy FROM system.one WHERE (dummy + dummy) >= 0 LIMIT 1 BY (dummy + dummy) + 0 AS l'

From b902cf51bb8ae71066ea6f46c174608440b04d44 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 19 Oct 2023 18:41:49 +0200
Subject: [PATCH 0101/1097] Update
 01952_optimize_distributed_group_by_sharding_key reference

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 ...ze_distributed_group_by_sharding_key.reference | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference
index 9016e731106..6adb2382a6f 100644
--- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference
+++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference
@@ -19,10 +19,8 @@ explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2
 Expression (Projection)
   LimitBy
     Union
-      Expression (Before LIMIT BY)
-        LimitBy
-          Expression ((Before LIMIT BY + (Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))))
-            ReadFromStorage (SystemNumbers)
+      Expression ((Before LIMIT BY + (Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))))
+        ReadFromStorage (SystemNumbers)
       Expression
         ReadFromRemote (Read from remote replica)
 explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized
@@ -58,11 +56,10 @@ Expression (Projection)
     Expression (Before LIMIT BY)
       Sorting (Merge sorted streams for ORDER BY, without aggregation)
         Union
-          LimitBy
-            Expression ((Before LIMIT BY + (Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) [lifted up part]))
-              Sorting (Sorting for ORDER BY)
-                Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))))
-                  ReadFromStorage (SystemNumbers)
+          Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) [lifted up part])
+            Sorting (Sorting for ORDER BY)
+              Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))))
+                ReadFromStorage (SystemNumbers)
           ReadFromRemote (Read from remote replica)
 explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized
 Expression (Projection)

From b13adbbeab1d5d0c91562f187ad5f4d651316311 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 20 Oct 2023 11:48:27 +0200
Subject: [PATCH 0102/1097] Fix style check

---
 tests/clickhouse-test | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index c0c2d482703..36ac409a4cb 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -577,8 +577,10 @@ class SettingsRandomizer:
         ),
         "remote_filesystem_read_method": lambda: random.choice(["read", "threadpool"]),
         "local_filesystem_read_prefetch": lambda: random.randint(0, 1),
-        "filesystem_cache_getorset_batch_size": lambda: random.randint(0, 3, 10, 50),
-        "read_from_filesystem_cache_if_exists_otherwise_bypass_cache": lambda: random.randint(0, 1),
+        "filesystem_cache_getorset_batch_size": lambda: random.choice([0, 3, 10, 50]),
+        "read_from_filesystem_cache_if_exists_otherwise_bypass_cache": lambda: random.randint(
+            0, 1
+        ),
         "throw_on_error_from_cache_on_write_operations": lambda: random.randint(0, 1),
         "remote_filesystem_read_prefetch": lambda: random.randint(0, 1),
         "allow_prefetched_read_pool_for_remote_filesystem": lambda: random.randint(

From 8609125f7a244b91a64e75242939e64f824d2ce6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 20 Oct 2023 12:21:05 +0200
Subject: [PATCH 0103/1097] fuzzer/generate-test-j2: export missing product
 function

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 docker/test/fuzzer/generate-test-j2.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docker/test/fuzzer/generate-test-j2.py b/docker/test/fuzzer/generate-test-j2.py
index 11525163ed8..6fd37d6bd02 100755
--- a/docker/test/fuzzer/generate-test-j2.py
+++ b/docker/test/fuzzer/generate-test-j2.py
@@ -3,6 +3,7 @@
 from argparse import ArgumentParser
 import os
 import jinja2
+import itertools
 
 
 def removesuffix(text, suffix):
@@ -47,6 +48,7 @@ def main(args):
         loader=jinja2.FileSystemLoader(suite_dir),
         keep_trailing_newline=True,
     )
+    j2env.globals.update(product=itertools.product)
 
     test_names = os.listdir(suite_dir)
     for test_name in test_names:

From 0d27150948a54a9bf8513622111a4feb52476bbb Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Tue, 24 Oct 2023 13:09:57 +0000
Subject: [PATCH 0104/1097] merge_row_policy: cleanup

---
 src/Storages/StorageMerge.cpp | 88 +++++++++++++----------------------
 1 file changed, 32 insertions(+), 56 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 61f2132cfeb..7df8b8cc6c1 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -16,7 +16,6 @@
 #include <Interpreters/getHeaderForProcessingStage.h>
 #include <Interpreters/addTypeConversionToAST.h>
 #include <Interpreters/replaceAliasColumnsInQuery.h>
-#include <Interpreters/RequiredSourceColumnsVisitor.h>
 #include <Planner/Utils.h>
 #include <Analyzer/Utils.h>
 #include <Analyzer/ConstantNode.h>
@@ -46,7 +45,6 @@
 #include <base/range.h>
 #include <algorithm>
 
-#include <Common/logger_useful.h>
 
 namespace
 {
@@ -384,23 +382,23 @@ class ReadFromMerge::RowPolicyData
 public:
     RowPolicyData(RowPolicyFilterPtr, std::shared_ptr<DB::IStorage>, ContextPtr);
 
-    /// Add columns that needed for row policies to data stream
-    /// SELECT x from T  if  T has row policy  y=42
-    /// required y in data pipeline
+    /// Add to data stream columns that are needed only for row policies
+    ///  SELECT x from T  if  T has row policy  y=42
+    ///  required y in data pipeline
     void extendNames(Names &);
 
     /// Use storage facilities to filter data
-    /// does not guarantee accuracy, but reduce number of rows
+    ///  optimization
+    ///  does not guarantee accuracy, but reduces number of rows
     void addStorageFilter(SourceStepWithFilter *);
 
-    /// Create explicit filter transform to stop
+    /// Create explicit filter transform to exclude
     /// rows that are not conform to row level policy
     void addFilterTransform(QueryPipelineBuilder &);
 
 private:
-    static std::string namesDifference(Names && outer_set, Names && inner_set);
     RowPolicyFilterPtr row_policy_filter_ptr;
-    std::string filter_column_name; // complex filer, may contain logic operations
+    std::string filter_column_name; // complex filter, may contain logic operations
     ActionsDAGPtr actions_dag;
     ExpressionActionsPtr filter_actions;
 };
@@ -704,7 +702,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 row_policy_data->extendNames(real_column_names);
             }
 
-
             storage->read(plan,
                 real_column_names,
                 storage_snapshot,
@@ -713,17 +710,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 processed_stage,
                 max_block_size,
                 UInt32(streams_num));
-
-            if (!plan.isInitialized())
-                return {};
-
-            if (row_policy_data)
-            {
-                if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
-                {
-                    row_policy_data->addStorageFilter(source_step_with_filter);
-                }
-            }
         }
         else
         {
@@ -741,9 +727,17 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
                 view->getInMemoryMetadataPtr(),
                 SelectQueryOptions(processed_stage));
             interpreter.buildQueryPlan(plan);
+        }
 
-            if (!plan.isInitialized())
-                return {};
+        if (!plan.isInitialized())
+            return {};
+
+        if (row_policy_data)
+        {
+            if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
+            {
+                row_policy_data->addStorageFilter(source_step_with_filter);
+            }
         }
 
         if (auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(plan.getRootNode()->step.get()))
@@ -846,14 +840,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
-        /// Besides this we add FilterTransform if it is needed to follow row level policies.
-
-        convertingSourceStream(header,
-            storage_snapshot->metadata,
-            aliases,
-            modified_context,
-            *builder,
-            processed_stage);
+        convertingSourceStream(header, storage_snapshot->metadata, aliases, modified_context, *builder, processed_stage);
     }
 
     return builder;
@@ -878,18 +865,28 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
     actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */);
     filter_actions = std::make_shared<ExpressionActions>(actions_dag,
         ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes));
-    filter_column_name = namesDifference(filter_actions->getSampleBlock().getNames(), filter_actions->getRequiredColumns());
+    const auto & required_columns = filter_actions->getRequiredColumnsWithTypes();
+    const auto & sample_block_columns = filter_actions->getSampleBlock().getNamesAndTypesList();
+
+    NamesAndTypesList added, deleted;
+    sample_block_columns.getDifference(required_columns, added, deleted);
+    if (!deleted.empty() || added.size() != 1)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "Cannot determine row level filter");
+    }
+
+    filter_column_name = added.getNames().front();
 }
 
-// Add columns that needed _only_ to evaluate row policies
-// SELECT x from t  if  t has row policy that is based on y
 void ReadFromMerge::RowPolicyData::extendNames(Names & names)
 {
+    std::sort(names.begin(), names.end());
     NameSet added_names;
 
     for (const auto & req_column : filter_actions->getRequiredColumns())
     {
-        if (std::find(names.begin(), names.end(), req_column) == names.end())
+        if (!std::binary_search(names.begin(), names.end(), req_column))
         {
             added_names.insert(req_column);
         }
@@ -926,27 +923,6 @@ void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & bui
     });
 }
 
-/// Find out an item that in outer_set vector, but not in inner_set vector
-std::string ReadFromMerge::RowPolicyData::namesDifference(Names && outer_set, Names && inner_set)
-{
-    std::sort(outer_set.begin(), outer_set.end());
-    std::sort(inner_set.begin(), inner_set.end());
-
-    Names result;
-
-    std::set_difference(outer_set.begin(), outer_set.end(),
-        inner_set.begin(), inner_set.end(), std::inserter(result, result.begin()));
-
-    if (result.size() != 1)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "Cannot determine row level filter");
-    }
-
-    return result.front();
-}
-
-
 StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(
     ContextPtr query_context,
     const ASTPtr & query /* = nullptr */,

From 2c055480d622d0ccc05b65c9c0252b36b66f7eca Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 24 Oct 2023 14:52:47 +0000
Subject: [PATCH 0105/1097] Remove unnecessary flag

---
 src/Processors/Sources/RemoteSource.cpp | 6 ++----
 src/Processors/Sources/RemoteSource.h   | 1 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp
index 74ab3649068..6ca5e611713 100644
--- a/src/Processors/Sources/RemoteSource.cpp
+++ b/src/Processors/Sources/RemoteSource.cpp
@@ -42,7 +42,7 @@ void RemoteSource::setStorageLimits(const std::shared_ptr<const StorageLimitsLis
 ISource::Status RemoteSource::prepare()
 {
     /// Check if query was cancelled before returning Async status. Otherwise it may lead to infinite loop.
-    if (was_query_canceled)
+    if (isCancelled())
     {
         getPort().finish();
         return Status::Finished;
@@ -67,7 +67,7 @@ ISource::Status RemoteSource::prepare()
 std::optional<Chunk> RemoteSource::tryGenerate()
 {
     /// onCancel() will do the cancel if the query was sent.
-    if (was_query_canceled)
+    if (isCancelled())
         return {};
 
     if (!was_query_sent)
@@ -169,7 +169,6 @@ std::optional<Chunk> RemoteSource::tryGenerate()
 
 void RemoteSource::onCancel()
 {
-    was_query_canceled = true;
     query_executor->cancel();
 }
 
@@ -177,7 +176,6 @@ void RemoteSource::onUpdatePorts()
 {
     if (getPort().isFinished())
     {
-        was_query_canceled = true;
         query_executor->finish();
     }
 }
diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h
index da39b5d0046..dbfa0156331 100644
--- a/src/Processors/Sources/RemoteSource.h
+++ b/src/Processors/Sources/RemoteSource.h
@@ -39,7 +39,6 @@ protected:
     void onCancel() override;
 
 private:
-    std::atomic<bool> was_query_canceled = false;
     bool was_query_sent = false;
     bool add_aggregation_info = false;
     RemoteQueryExecutorPtr query_executor;

From 2516c2ea284d85d0cdeaca33f781e54f1813d806 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 24 Oct 2023 20:46:44 +0000
Subject: [PATCH 0106/1097] Fix clickhouse-local exit on bad send_logs_level
 setting

---
 src/Client/ClientBase.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index a350654cdda..1382d3830f5 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1810,7 +1810,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
     {
         const auto * logs_level_field = set_query->changes.tryGet(std::string_view{"send_logs_level"});
         if (logs_level_field)
-            updateLoggerLevel(logs_level_field->safeGet<String>());
+        {
+            auto logs_level = logs_level_field->safeGet<String>();
+            /// Check that setting value is correct before updating logger level.
+            SettingFieldLogsLevelTraits::fromString(logs_level);
+            updateLoggerLevel(logs_level);
+        }
     }
 
     if (const auto * create_user_query = parsed_query->as<ASTCreateUserQuery>())

From 2606b60e6649915d71dbd70b2ee7a80dc371a6d7 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 25 Oct 2023 04:17:35 +0000
Subject: [PATCH 0107/1097] Fix REPLICA_ALREADY_EXISTS for ReplicatedMergeTree

---
 src/Storages/StorageReplicatedMergeTree.cpp | 84 ++++++++++++++++++++-
 1 file changed, 81 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 4de7ecb7fd9..91b15f96297 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -861,6 +861,84 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
 
     LOG_DEBUG(log, "Creating replica {}", replica_path);
 
+    const String local_metadata = ReplicatedMergeTreeTableMetadata(*this, metadata_snapshot).toString();
+    const String local_columns = metadata_snapshot->getColumns().toString();
+    const String local_metadata_version = toString(metadata_snapshot->getMetadataVersion());
+
+    /// It is possible for the replica to fail after creating ZK nodes without saving local metadata.
+    /// Because of that we need to check whether the replica exists and is newly created.
+    /// For this we check that all nodes exist, the metadata of the table is the same, and other nodes are not modified.
+
+    std::vector<String> paths_exists = {
+        replica_path,
+        replica_path + "/host",
+        replica_path + "/log_pointer",
+        replica_path + "/queue",
+        replica_path + "/parts",
+        replica_path + "/flags",
+        replica_path + "/is_lost",
+        replica_path + "/metadata",
+        replica_path + "/columns",
+        replica_path + "/metadata_version",
+        replica_path + "/mutation_pointer",
+        replica_path + "/min_unprocessed_insert_time",
+        replica_path + "/max_processed_insert_time",
+        replica_path + "/mutation_pointer"
+    };
+
+    auto response_exists = zookeeper->tryGet(paths_exists);
+    size_t response_num = 0;
+
+    if (response_exists[response_num++].error == Coordination::Error::ZOK)
+    {
+        bool all_nodes_exist = true;
+
+        for (size_t i = 0; i < response_exists.size(); ++i)
+        {
+            if (response_exists[i].error != Coordination::Error::ZOK)
+            {
+                all_nodes_exist = false;
+                break;
+            }
+        }
+
+        if (all_nodes_exist)
+        {
+            const auto & zk_host                        = response_exists[response_num++].data;
+            const auto & zk_log_pointer                 = response_exists[response_num++].data;
+            const auto & zk_queue                       = response_exists[response_num++].data;
+            const auto & zk_parts                       = response_exists[response_num++].data;
+            const auto & zk_flags                       = response_exists[response_num++].data;
+            const auto & zk_is_lost                     = response_exists[response_num++].data;
+            const auto & zk_metadata                    = response_exists[response_num++].data;
+            const auto & zk_columns                     = response_exists[response_num++].data;
+            const auto & zk_metadata_version            = response_exists[response_num++].data;
+            const auto & zk_min_unprocessed_insert_time = response_exists[response_num++].data;
+            const auto & zk_max_processed_insert_time   = response_exists[response_num++].data;
+            const auto & zk_mutation_pointer            = response_exists[response_num++].data;
+
+            if (zk_host.empty() &&
+                zk_log_pointer.empty() &&
+                zk_queue.empty() &&
+                zk_parts.empty() &&
+                zk_flags.empty() &&
+                (zk_is_lost == "0" || zk_is_lost == "1") &&
+                zk_metadata == local_metadata &&
+                zk_columns == local_columns &&
+                zk_metadata_version == local_metadata_version &&
+                zk_mutation_pointer.empty() &&
+                zk_min_unprocessed_insert_time.empty() &&
+                zk_max_processed_insert_time.empty() &&
+                zk_mutation_pointer.empty())
+            {
+                LOG_DEBUG(log, "Empty replica {} exists, will use it", replica_path);
+                return;
+            }
+        }
+
+        throw Exception(ErrorCodes::REPLICA_ALREADY_EXISTS, "Replica {} already exists", replica_path);
+    }
+
     Coordination::Error code;
 
     do
@@ -892,11 +970,11 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
             zkutil::CreateMode::Persistent));
         ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/is_lost", is_lost_value,
             zkutil::CreateMode::Persistent));
-        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", ReplicatedMergeTreeTableMetadata(*this, metadata_snapshot).toString(),
+        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", local_metadata,
             zkutil::CreateMode::Persistent));
-        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", metadata_snapshot->getColumns().toString(),
+        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", local_columns,
             zkutil::CreateMode::Persistent));
-        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", toString(metadata_snapshot->getMetadataVersion()),
+        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", local_metadata_version,
             zkutil::CreateMode::Persistent));
 
         /// The following 3 nodes were added in version 1.1.xxx, so we create them here, not in createNewZooKeeperNodes()

From efbcac4e600430262e4999c2a799e92dc6e5c4c8 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 25 Oct 2023 10:21:06 +0200
Subject: [PATCH 0108/1097] Bug fix explain ast with parameterized view

---
 src/Interpreters/executeQuery.cpp                    |  6 ++++++
 .../02903_parameterized_view_explain_ast.reference   | 12 ++++++++++++
 .../02903_parameterized_view_explain_ast.sql         |  3 +++
 3 files changed, 21 insertions(+)
 create mode 100644 tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference
 create mode 100644 tests/queries/0_stateless/02903_parameterized_view_explain_ast.sql

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index decda4c62f9..557f80e8d70 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -730,6 +730,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         bool is_create_parameterized_view = false;
         if (const auto * create_query = ast->as<ASTCreateQuery>())
             is_create_parameterized_view = create_query->isParameterizedView();
+        else if (const auto * explain_query = ast->as<ASTExplainQuery>())
+        {
+            assert(explain_query->children.size() => 1);
+            if (const auto * create_of_explain_query = explain_query->children[0]->as<ASTCreateQuery>())
+                is_create_parameterized_view = create_of_explain_query->isParameterizedView();
+        }
 
         /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
         /// Even if we don't have parameters in query_context, check that AST doesn't have unknown parameters
diff --git a/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference b/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference
new file mode 100644
index 00000000000..6ee8d0c3d23
--- /dev/null
+++ b/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference
@@ -0,0 +1,12 @@
+CreateQuery  numbers_pv (children 2)
+ Identifier numbers_pv
+ SelectWithUnionQuery (children 1)
+  ExpressionList (children 1)
+   SelectQuery (children 3)
+    ExpressionList (children 1)
+     Asterisk
+    TablesInSelectQuery (children 1)
+     TablesInSelectQueryElement (children 1)
+      TableExpression (children 1)
+       TableIdentifier numbers
+    QueryParameter amount:UInt8
diff --git a/tests/queries/0_stateless/02903_parameterized_view_explain_ast.sql b/tests/queries/0_stateless/02903_parameterized_view_explain_ast.sql
new file mode 100644
index 00000000000..6af6dab2f4e
--- /dev/null
+++ b/tests/queries/0_stateless/02903_parameterized_view_explain_ast.sql
@@ -0,0 +1,3 @@
+EXPLAIN AST
+CREATE VIEW numbers_pv AS
+SELECT * FROM numbers LIMIT {amount:UInt8};
\ No newline at end of file

From d6c63e07ce97932bdfcf7baa5ab76bd9c1fee85b Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 25 Oct 2023 10:23:22 +0200
Subject: [PATCH 0109/1097] Fixed expression

---
 src/Interpreters/executeQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 557f80e8d70..27fbce5311c 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -732,7 +732,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             is_create_parameterized_view = create_query->isParameterizedView();
         else if (const auto * explain_query = ast->as<ASTExplainQuery>())
         {
-            assert(explain_query->children.size() => 1);
+            assert(explain_query->children.size() >= 1);
             if (const auto * create_of_explain_query = explain_query->children[0]->as<ASTCreateQuery>())
                 is_create_parameterized_view = create_of_explain_query->isParameterizedView();
         }

From 3c690337eccba39587c7cf4f6206d951c64414de Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 26 Oct 2023 03:17:30 +0000
Subject: [PATCH 0110/1097] Improvements

---
 src/Core/Settings.h                           |  2 +
 src/Interpreters/InterpreterCreateQuery.cpp   |  8 ++
 src/Storages/StorageReplicatedMergeTree.cpp   | 96 ++++++++++---------
 ...ated_merge_tree_creation_failure.reference |  4 +
 ..._replicated_merge_tree_creation_failure.sh | 38 ++++++++
 5 files changed, 102 insertions(+), 46 deletions(-)
 create mode 100644 tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.reference
 create mode 100755 tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index f2b55fbcd0b..955ad815e00 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -607,6 +607,8 @@ class IColumn;
     M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \
     M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \
     \
+    M(Bool, create_replicated_merge_tree_fault_injection, false, "If true, the creation of table will be aborted after creating metadata in ZooKeeper", 0) \
+    \
     M(Bool, use_query_cache, false, "Enable the query cache", 0) \
     M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
     M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index a0635f18214..3da6c9fa264 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -104,6 +104,7 @@ namespace ErrorCodes
     extern const int UNKNOWN_STORAGE;
     extern const int SYNTAX_ERROR;
     extern const int SUPPORT_IS_DISABLED;
+    extern const int ABORTED;
 }
 
 namespace fs = std::filesystem;
@@ -1442,6 +1443,13 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
                         "ATTACH ... FROM ... query is not supported for {} table engine, "
                         "because such tables do not store any data on disk. Use CREATE instead.", res->getName());
 
+    if (getContext()->getSettingsRef().create_replicated_merge_tree_fault_injection)
+    {
+        bool is_replicated_storage = typeid_cast<const StorageReplicatedMergeTree *>(res.get()) != nullptr;
+        if (is_replicated_storage)
+            throw Exception(ErrorCodes::ABORTED, "Shutdown is called for table");
+    }
+
     database->createTable(getContext(), create.getTable(), res, query_ptr);
 
     /// Move table data to the proper place. Wo do not move data earlier to avoid situations
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 91b15f96297..0781684b7b7 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -19,6 +19,8 @@
 #include <Common/typeid_cast.h>
 #include <Common/ThreadFuzzer.h>
 
+#include <Core/ServerUUID.h>
+
 #include <Disks/ObjectStorages/IMetadataStorage.h>
 
 #include <base/sort.h>
@@ -834,6 +836,9 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr
         ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/mutation_pointer", "",
             zkutil::CreateMode::Persistent));
 
+        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/creator_uuid", toString(ServerUUID::get()),
+            zkutil::CreateMode::Persistent));
+
         Coordination::Responses responses;
         auto code = zookeeper->tryMulti(ops, responses);
         if (code == Coordination::Error::ZNODEEXISTS)
@@ -864,13 +869,13 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
     const String local_metadata = ReplicatedMergeTreeTableMetadata(*this, metadata_snapshot).toString();
     const String local_columns = metadata_snapshot->getColumns().toString();
     const String local_metadata_version = toString(metadata_snapshot->getMetadataVersion());
+    const String creator_uuid = toString(ServerUUID::get());
 
     /// It is possible for the replica to fail after creating ZK nodes without saving local metadata.
     /// Because of that we need to check whether the replica exists and is newly created.
     /// For this we check that all nodes exist, the metadata of the table is the same, and other nodes are not modified.
 
     std::vector<String> paths_exists = {
-        replica_path,
         replica_path + "/host",
         replica_path + "/log_pointer",
         replica_path + "/queue",
@@ -880,63 +885,59 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
         replica_path + "/metadata",
         replica_path + "/columns",
         replica_path + "/metadata_version",
-        replica_path + "/mutation_pointer",
         replica_path + "/min_unprocessed_insert_time",
         replica_path + "/max_processed_insert_time",
-        replica_path + "/mutation_pointer"
+        replica_path + "/mutation_pointer",
+        replica_path + "/creator_uuid"
     };
 
     auto response_exists = zookeeper->tryGet(paths_exists);
-    size_t response_num = 0;
+    bool all_nodes_exist = true;
 
-    if (response_exists[response_num++].error == Coordination::Error::ZOK)
+    for (size_t i = 0; i < response_exists.size(); ++i)
     {
-        bool all_nodes_exist = true;
-
-        for (size_t i = 0; i < response_exists.size(); ++i)
+        if (response_exists[i].error != Coordination::Error::ZOK)
         {
-            if (response_exists[i].error != Coordination::Error::ZOK)
-            {
-                all_nodes_exist = false;
-                break;
-            }
+            all_nodes_exist = false;
+            break;
         }
+    }
 
-        if (all_nodes_exist)
+    if (all_nodes_exist)
+    {
+        size_t response_num = 0;
+
+        const auto & zk_host                        = response_exists[response_num++].data;
+        const auto & zk_log_pointer                 = response_exists[response_num++].data;
+        const auto & zk_queue                       = response_exists[response_num++].data;
+        const auto & zk_parts                       = response_exists[response_num++].data;
+        const auto & zk_flags                       = response_exists[response_num++].data;
+        const auto & zk_is_lost                     = response_exists[response_num++].data;
+        const auto & zk_metadata                    = response_exists[response_num++].data;
+        const auto & zk_columns                     = response_exists[response_num++].data;
+        const auto & zk_metadata_version            = response_exists[response_num++].data;
+        const auto & zk_min_unprocessed_insert_time = response_exists[response_num++].data;
+        const auto & zk_max_processed_insert_time   = response_exists[response_num++].data;
+        const auto & zk_mutation_pointer            = response_exists[response_num++].data;
+        const auto & zk_creator_uuid                = response_exists[response_num++].data;
+
+        if (zk_host.empty() &&
+            zk_log_pointer.empty() &&
+            zk_queue.empty() &&
+            zk_parts.empty() &&
+            zk_flags.empty() &&
+            (zk_is_lost == "0" || zk_is_lost == "1") &&
+            zk_metadata == local_metadata &&
+            zk_columns == local_columns &&
+            zk_metadata_version == local_metadata_version &&
+            zk_min_unprocessed_insert_time.empty() &&
+            zk_max_processed_insert_time.empty() &&
+            zk_mutation_pointer.empty() &&
+            zk_creator_uuid == creator_uuid)
         {
-            const auto & zk_host                        = response_exists[response_num++].data;
-            const auto & zk_log_pointer                 = response_exists[response_num++].data;
-            const auto & zk_queue                       = response_exists[response_num++].data;
-            const auto & zk_parts                       = response_exists[response_num++].data;
-            const auto & zk_flags                       = response_exists[response_num++].data;
-            const auto & zk_is_lost                     = response_exists[response_num++].data;
-            const auto & zk_metadata                    = response_exists[response_num++].data;
-            const auto & zk_columns                     = response_exists[response_num++].data;
-            const auto & zk_metadata_version            = response_exists[response_num++].data;
-            const auto & zk_min_unprocessed_insert_time = response_exists[response_num++].data;
-            const auto & zk_max_processed_insert_time   = response_exists[response_num++].data;
-            const auto & zk_mutation_pointer            = response_exists[response_num++].data;
-
-            if (zk_host.empty() &&
-                zk_log_pointer.empty() &&
-                zk_queue.empty() &&
-                zk_parts.empty() &&
-                zk_flags.empty() &&
-                (zk_is_lost == "0" || zk_is_lost == "1") &&
-                zk_metadata == local_metadata &&
-                zk_columns == local_columns &&
-                zk_metadata_version == local_metadata_version &&
-                zk_mutation_pointer.empty() &&
-                zk_min_unprocessed_insert_time.empty() &&
-                zk_max_processed_insert_time.empty() &&
-                zk_mutation_pointer.empty())
-            {
-                LOG_DEBUG(log, "Empty replica {} exists, will use it", replica_path);
-                return;
-            }
+            LOG_DEBUG(log, "Empty replica {} exists, will use it", replica_path);
+            return;
         }
-
-        throw Exception(ErrorCodes::REPLICA_ALREADY_EXISTS, "Replica {} already exists", replica_path);
     }
 
     Coordination::Error code;
@@ -985,6 +986,9 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
         ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/mutation_pointer", "",
             zkutil::CreateMode::Persistent));
 
+        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/creator_uuid", creator_uuid,
+            zkutil::CreateMode::Persistent));
+
         /// Check version of /replicas to see if there are any replicas created at the same moment of time.
         ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/replicas", "last added replica: " + replica_name, replicas_stat.version));
 
diff --git a/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.reference b/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.reference
new file mode 100644
index 00000000000..487b1165348
--- /dev/null
+++ b/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.reference
@@ -0,0 +1,4 @@
+2
+2
+2
+2
diff --git a/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh b/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh
new file mode 100755
index 00000000000..0c5705c7a35
--- /dev/null
+++ b/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# Tags: zookeeper
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+
+
+${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_exception_replicated SYNC"
+
+#### 1 - There is only one replica
+
+${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection=1 \
+    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "ABORT"
+
+# We will see that the replica is empty and throw the same ABORT exception as before
+${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection=1 \
+    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "ABORT"
+
+# We will succeed
+${CLICKHOUSE_CLIENT} \
+    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date"
+
+${CLICKHOUSE_CLIENT} -q "DROP TABLE test_exception_replicated SYNC"
+
+#### 2 - There are two replicas
+
+${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection=1 \
+    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "ABORT"
+${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection=1 \
+    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r2') ORDER BY date" 2>&1 | grep -c "ABORT"
+
+# We will succeed
+${CLICKHOUSE_CLIENT} \
+    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date"
+
+${CLICKHOUSE_CLIENT} -q "DROP TABLE test_exception_replicated SYNC"
\ No newline at end of file

From 940d099e84d92eaaacaa96682c5a94b26f7a782c Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Fri, 27 Oct 2023 16:50:34 -0700
Subject: [PATCH 0111/1097] Set correct max_block_size value in docs

---
 docs/en/operations/settings/settings.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index ccf290c8e20..60eda45ab22 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -731,11 +731,13 @@ Default value: LZ4.
 
 ## max_block_size {#setting-max_block_size}
 
-In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of the block (in a count of rows) to load from tables. The block size shouldn’t be too small, so that the expenditures on each block are still noticeable, but not too large so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality.
+In ClickHouse, data is processed by blocks, which are sets of column parts. The internal processing cycles for a single block are efficient but there are noticeable costs when processing each block.
 
-Default value: 65,536.
+The `max_block_size` setting indicates the recommended maximum number of rows to include in a single block when loading data from tables. Blocks the size of `max_block_size` are not always loaded from the table: if ClickHouse determines that less data needs to be retrieved, a smaller block is processed.
 
-Blocks the size of `max_block_size` are not always loaded from the table. If it is obvious that less data needs to be retrieved, a smaller block is processed.
+The block size should not be too small to avoid noticeable costs when processing each block. It should also not be too large to ensure that queries with a LIMIT clause execute quickly after processing the first block. When setting `max_block_size`, the goal should be to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality.
+
+Default value: `65,409`
 
 ## preferred_block_size_bytes {#preferred-block-size-bytes}
 

From 70e3dd808cc3f087504892d18a9e61eb6f948151 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 29 Oct 2023 02:07:24 +0100
Subject: [PATCH 0112/1097] Granular code coverage with introspection

---
 CMakeLists.txt                               |   9 --
 base/base/CMakeLists.txt                     |   2 +
 base/base/coverage.cpp                       | 106 ++++++++++++++++++-
 base/base/coverage.h                         |   6 ++
 base/glibc-compatibility/memcpy/memcpy.cpp   |   1 +
 base/glibc-compatibility/memcpy/memcpy.h     |   2 +-
 cmake/sanitize.cmake                         |  18 ++++
 contrib/CMakeLists.txt                       |   9 --
 contrib/google-protobuf-cmake/CMakeLists.txt |  32 ------
 contrib/libcxx-cmake/CMakeLists.txt          |   2 -
 programs/CMakeLists.txt                      |   2 +
 src/CMakeLists.txt                           |   5 +-
 src/Functions/coverage.cpp                   |  91 ++++++++++++++++
 src/Interpreters/InterpreterSystemQuery.cpp  |   8 ++
 src/Parsers/ASTSystemQuery.h                 |   1 +
 src/Parsers/ParserSystemQuery.cpp            |  16 +--
 16 files changed, 244 insertions(+), 66 deletions(-)
 create mode 100644 src/Functions/coverage.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5b94efefc5..d259b105a0a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -286,9 +286,6 @@ set (CMAKE_C_STANDARD 11)
 set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
 set (CMAKE_C_STANDARD_REQUIRED ON)
 
-# Compiler-specific coverage flags e.g. -fcoverage-mapping
-option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
-
 if (COMPILER_CLANG)
     # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
     # See https://reviews.llvm.org/D112921
@@ -304,12 +301,6 @@ if (COMPILER_CLANG)
         set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries")
         set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
     endif()
-
-    if (WITH_COVERAGE)
-        set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
-        # If we want to disable coverage for specific translation units
-        set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping")
-    endif()
 endif ()
 
 set (COMPILER_FLAGS "${COMPILER_FLAGS}")
diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt
index 8ab3c8a0711..f9bf413a6c8 100644
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
+
 if (USE_CLANG_TIDY)
     set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
 endif ()
diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp
index 1027638be3d..60eb6fcac72 100644
--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@@ -1,11 +1,15 @@
 #include "coverage.h"
 
-#if WITH_COVERAGE
-
 #pragma GCC diagnostic ignored "-Wreserved-identifier"
 
-#    include <mutex>
-#    include <unistd.h>
+
+/// WITH_COVERAGE enables the default implementation of code coverage,
+/// that dumps a map to the filesystem.
+
+#if WITH_COVERAGE
+
+#include <mutex>
+#include <unistd.h>
 
 
 #    if defined(__clang__)
@@ -31,3 +35,97 @@ void dumpCoverageReportIfPossible()
 
 #endif
 }
+
+
+/// SANITIZE_COVERAGE enables code instrumentation,
+/// but leaves the callbacks implementation to us,
+/// which we use to calculate coverage on a per-test basis
+/// and to write it to system tables.
+
+#if defined(SANITIZE_COVERAGE)
+
+namespace
+{
+    bool initialized = false;
+
+    uint32_t * guards_start = nullptr;
+    uint32_t * guards_end = nullptr;
+    size_t coverage_array_size = 0;
+
+    uintptr_t * coverage_array = nullptr;
+}
+
+extern "C"
+{
+
+/// This is called at least once for every DSO for initialization.
+/// But we will use it only for the main DSO.
+void __sanitizer_cov_trace_pc_guard_init(uint32_t * start, uint32_t * stop)
+{
+    if (initialized)
+        return;
+    initialized = true;
+
+    /// The function can be called multiple times, but we need to initialize only once.
+    if (start == stop || *start)
+        return;
+
+    guards_start = start;
+    guards_end = stop;
+    coverage_array_size = stop - start;
+
+    /// Note: we will leak this.
+    coverage_array = static_cast<uintptr_t*>(malloc(sizeof(uintptr_t) * coverage_array_size));
+
+    resetCoverage();
+}
+
+/// This is called at every basic block / edge, etc.
+void __sanitizer_cov_trace_pc_guard(uint32_t * guard)
+{
+    /// Duplicate the guard check.
+    if (!*guard)
+        return;
+    *guard = 0;
+
+    /// If you set *guard to 0 this code will not be called again for this edge.
+    /// Now we can get the PC and do whatever you want:
+    /// - store it somewhere or symbolize it and print right away.
+    /// The values of `*guard` are as you set them in
+    /// __sanitizer_cov_trace_pc_guard_init and so you can make them consecutive
+    /// and use them to dereference an array or a bit vector.
+    void * pc = __builtin_return_address(0);
+
+    coverage_array[guard - guards_start] = reinterpret_cast<uintptr_t>(pc);
+}
+
+}
+
+__attribute__((no_sanitize("coverage"))) std::span<uintptr_t> getCoverage()
+{
+    return {coverage_array, coverage_array_size};
+}
+
+__attribute__((no_sanitize("coverage"))) void resetCoverage()
+{
+    memset(coverage_array, 0, coverage_array_size * sizeof(*coverage_array));
+
+    /// The guard defines whether the __sanitizer_cov_trace_pc_guard should be called.
+    /// For example, you can unset it after first invocation to prevent excessive work.
+    /// Initially set all the guards to 1 to enable callbacks.
+    for (uint32_t * x = guards_start; x < guards_end; ++x)
+        *x = 1;
+}
+
+#else
+
+std::span<uintptr_t> getCoverage()
+{
+    return {};
+}
+
+void resetCoverage()
+{
+}
+
+#endif
diff --git a/base/base/coverage.h b/base/base/coverage.h
index 4a57528b0ce..b6664bec223 100644
--- a/base/base/coverage.h
+++ b/base/base/coverage.h
@@ -1,5 +1,8 @@
 #pragma once
 
+#include <span>
+#include <cstdint>
+
 /// Flush coverage report to file, depending on coverage system
 /// proposed by compiler (llvm for clang and gcov for gcc).
 ///
@@ -7,3 +10,6 @@
 /// Thread safe (use exclusive lock).
 /// Idempotent, may be called multiple times.
 void dumpCoverageReportIfPossible();
+
+std::span<uintptr_t> getCoverage();
+void resetCoverage();
diff --git a/base/glibc-compatibility/memcpy/memcpy.cpp b/base/glibc-compatibility/memcpy/memcpy.cpp
index ec43a2c3649..8bab35934d3 100644
--- a/base/glibc-compatibility/memcpy/memcpy.cpp
+++ b/base/glibc-compatibility/memcpy/memcpy.cpp
@@ -1,5 +1,6 @@
 #include "memcpy.h"
 
+__attribute__((no_sanitize("coverage")))
 extern "C" void * memcpy(void * __restrict dst, const void * __restrict src, size_t size)
 {
     return inline_memcpy(dst, src, size);
diff --git a/base/glibc-compatibility/memcpy/memcpy.h b/base/glibc-compatibility/memcpy/memcpy.h
index 0930dfb5c67..86439dda061 100644
--- a/base/glibc-compatibility/memcpy/memcpy.h
+++ b/base/glibc-compatibility/memcpy/memcpy.h
@@ -93,7 +93,7 @@
   * See https://habr.com/en/company/yandex/blog/457612/
   */
 
-
+__attribute__((no_sanitize("coverage")))
 static inline void * inline_memcpy(void * __restrict dst_, const void * __restrict src_, size_t size)
 {
     /// We will use pointer arithmetic, so char pointer will be used.
diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index f17283774eb..0c901f1aa36 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -58,3 +58,21 @@ if (SANITIZE)
         message (FATAL_ERROR "Unknown sanitizer type: ${SANITIZE}")
     endif ()
 endif()
+
+# Default coverage instrumentation (dumping the coverage map on exit)
+option(WITH_COVERAGE "Instrumentation for code coverage with default implementation" OFF)
+
+if (WITH_COVERAGE)
+    message (INFORMATION "Enabled instrumentation for code coverage")
+    set(COVERAGE_FLAGS "-fprofile-instr-generate -fcoverage-mapping")
+endif()
+
+option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)
+
+if (SANITIZE_COVERAGE)
+    message (INFORMATION "Enabled instrumentation for code coverage")
+    add_definitions(-DSANITIZE_COVERAGE=1)
+    set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard")
+endif()
+
+set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard")
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 390b0241e7d..fa97e59eefc 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -3,15 +3,6 @@
 set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w")
 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
 
-if (WITH_COVERAGE)
-  set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE})
-  separate_arguments(WITHOUT_COVERAGE_LIST)
-  # disable coverage for contib files and build with optimisations
-  if (COMPILER_CLANG)
-      add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST})
-  endif()
-endif()
-
 if (SANITIZE STREQUAL "undefined")
     # 3rd-party libraries usually not intended to work with UBSan.
     add_compile_options(-fno-sanitize=undefined)
diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt
index 268f0fbe0e4..fbb7d6ea018 100644
--- a/contrib/google-protobuf-cmake/CMakeLists.txt
+++ b/contrib/google-protobuf-cmake/CMakeLists.txt
@@ -278,38 +278,6 @@ else ()
             COMMAND_ECHO STDOUT)
     endif ()
 
-#    add_custom_command (
-#        OUTPUT ${PROTOC_BUILD_DIR}
-#        COMMAND mkdir -p ${PROTOC_BUILD_DIR})
-#
-#    add_custom_command (
-#        OUTPUT "${PROTOC_BUILD_DIR}/CMakeCache.txt"
-#
-#        COMMAND ${CMAKE_COMMAND}
-#             -G"${CMAKE_GENERATOR}"
-#             -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}"
-#             -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}"
-#             -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}"
-#             -Dprotobuf_BUILD_TESTS=0
-#             -Dprotobuf_BUILD_CONFORMANCE=0
-#             -Dprotobuf_BUILD_EXAMPLES=0
-#             -Dprotobuf_BUILD_PROTOC_BINARIES=1
-#             "${protobuf_source_dir}/cmake"
-#
-#         DEPENDS "${PROTOC_BUILD_DIR}"
-#         WORKING_DIRECTORY "${PROTOC_BUILD_DIR}"
-#         COMMENT "Configuring 'protoc' for host architecture."
-#         USES_TERMINAL)
-#
-#     add_custom_command (
-#         OUTPUT "${PROTOC_BUILD_DIR}/protoc"
-#         COMMAND ${CMAKE_COMMAND} --build "${PROTOC_BUILD_DIR}"
-#         DEPENDS "${PROTOC_BUILD_DIR}/CMakeCache.txt"
-#         COMMENT "Building 'protoc' for host architecture."
-#         USES_TERMINAL)
-#
-#     add_custom_target (protoc-host DEPENDS "${PROTOC_BUILD_DIR}/protoc")
-
     add_executable(protoc IMPORTED GLOBAL)
     set_target_properties (protoc PROPERTIES IMPORTED_LOCATION "${PROTOC_BUILD_DIR}/protoc")
     add_dependencies(protoc "${PROTOC_BUILD_DIR}/protoc")
diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt
index b7e59e2c9a3..c77d5d8319e 100644
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@@ -1,5 +1,3 @@
-include(CheckCXXCompilerFlag)
-
 set(LIBCXX_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/libcxx")
 
 set(SRCS
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index eb4a898d472..fce6894ed11 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
+
 if (USE_CLANG_TIDY)
     set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
 endif ()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d2985665db3..f88a6cff6c0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
+
 if (USE_INCLUDE_WHAT_YOU_USE)
     set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
 endif ()
@@ -282,7 +284,8 @@ set_source_files_properties(
         Common/Elf.cpp
         Common/Dwarf.cpp
         Common/SymbolIndex.cpp
-        PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE}")
+        Common/ThreadFuzzer.cpp
+        PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE_FLAGS}")
 
 target_link_libraries (clickhouse_common_io
         PRIVATE
diff --git a/src/Functions/coverage.cpp b/src/Functions/coverage.cpp
new file mode 100644
index 00000000000..1825e6aa826
--- /dev/null
+++ b/src/Functions/coverage.cpp
@@ -0,0 +1,91 @@
+#if defined(SANITIZE_COVERAGE)
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnConst.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunction.h>
+#include <Interpreters/Context.h>
+
+#include <base/coverage.h>
+
+
+namespace DB
+{
+
+namespace
+{
+
+/** If ClickHouse is build with coverage instrumentation, returns an array
+  * of currently accumulated unique code addresses.
+  */
+class FunctionCoverage : public IFunction
+{
+public:
+    static constexpr auto name = "coverage";
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    explicit FunctionCoverage()
+    {
+    }
+
+    static FunctionPtr create(ContextPtr)
+    {
+        return std::make_shared<FunctionCoverage>();
+    }
+
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
+    {
+        return false;
+    }
+
+    size_t getNumberOfArguments() const override
+    {
+        return 0;
+    }
+
+    bool isDeterministic() const override
+    {
+        return false;
+    }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
+    {
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        auto coverage_table = getCoverage();
+
+        auto column_addresses = ColumnUInt64::create();
+        auto & data = column_addresses->getData();
+
+        for (auto ptr : coverage_table)
+            if (ptr)
+                data.push_back(ptr);
+
+        auto column_array = ColumnArray::create(
+            std::move(column_addresses),
+            ColumnArray::ColumnOffsets::create(1, data.size()));
+
+        return ColumnConst::create(std::move(column_array), input_rows_count);
+    }
+};
+
+}
+
+REGISTER_FUNCTION(Coverage)
+{
+    factory.registerFunction<FunctionCoverage>();
+}
+
+}
+
+#endif
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 07a1ae7d170..4e1d32bd3cb 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -60,6 +60,7 @@
 #include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Common/ThreadFuzzer.h>
+#include <base/coverage.h>
 #include <csignal>
 #include <algorithm>
 #include <unistd.h>
@@ -690,6 +691,12 @@ BlockIO InterpreterSystemQuery::execute()
             FailPointInjection::disableFailPoint(query.fail_point_name);
             break;
         }
+        case Type::RESET_COVERAGE:
+        {
+            getContext()->checkAccess(AccessType::SYSTEM);
+            resetCoverage();
+            break;
+        }
         default:
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown type of SYSTEM query");
     }
@@ -1299,6 +1306,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
         case Type::START_THREAD_FUZZER:
         case Type::ENABLE_FAILPOINT:
         case Type::DISABLE_FAILPOINT:
+        case Type::RESET_COVERAGE:
         case Type::UNKNOWN:
         case Type::END: break;
     }
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index cc06e0fdcb5..5f7ba5be330 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -86,6 +86,7 @@ public:
         START_PULLING_REPLICATION_LOG,
         STOP_CLEANUP,
         START_CLEANUP,
+        RESET_COVERAGE,
         END
     };
 
diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index a26fdc1396b..f0fc38d6adb 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -453,14 +453,14 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
         }
         case Type::DROP_FORMAT_SCHEMA_CACHE:
         {
-                if (ParserKeyword{"FOR"}.ignore(pos, expected))
-                {
-                    if (ParserKeyword{"Protobuf"}.ignore(pos, expected))
-                        res->schema_cache_format = "Protobuf";
-                    else
-                        return false;
-                }
-                break;
+            if (ParserKeyword{"FOR"}.ignore(pos, expected))
+            {
+                if (ParserKeyword{"Protobuf"}.ignore(pos, expected))
+                    res->schema_cache_format = "Protobuf";
+                else
+                    return false;
+            }
+            break;
         }
         case Type::UNFREEZE:
         {

From 3142921bb4dcb8b7169f7d32a05110c9a5baa351 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 29 Oct 2023 11:15:11 +0100
Subject: [PATCH 0113/1097] Taming query profiler

---
 src/Common/ProfileEvents.cpp |  1 +
 src/Common/QueryProfiler.cpp | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index f4d7242f70a..aadbc8b2471 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -316,6 +316,7 @@ The server successfully detected this situation and will download merged part fr
     \
     M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
     M(QueryProfilerSignalOverruns, "Number of times we drop processing of a query profiler signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
+    M(QueryProfilerConcurrencyOverruns, "Number of times we drop processing of a query profiler signal due to too many concurrent query profilers in other threads, which may indicate overload.") \
     M(QueryProfilerRuns, "Number of times QueryProfiler had been run.") \
     \
     M(CreatedLogEntryForMerge, "Successfully created log entry to merge parts in ReplicatedMergeTree.") \
diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp
index dc9f3610513..c656e7f992f 100644
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@@ -22,6 +22,7 @@ namespace CurrentMetrics
 namespace ProfileEvents
 {
     extern const Event QueryProfilerSignalOverruns;
+    extern const Event QueryProfilerConcurrencyOverruns;
     extern const Event QueryProfilerRuns;
 }
 
@@ -40,8 +41,19 @@ namespace
     /// to ignore delivered signals after timer_delete().
     thread_local bool signal_handler_disarmed = true;
 
+    /// Don't permit too many threads be busy inside profiler,
+    /// which could slow down the system in some environments.
+    std::atomic<Int64> concurrent_invocations = 0;
+
     void writeTraceInfo(TraceType trace_type, int /* sig */, siginfo_t * info, void * context)
     {
+        SCOPE_EXIT({ concurrent_invocations.fetch_sub(1, std::memory_order_relaxed); });
+        if (concurrent_invocations.fetch_add(1, std::memory_order_relaxed) > 100)
+        {
+            ProfileEvents::incrementNoTrace(ProfileEvents::QueryProfilerConcurrencyOverruns);
+            return;
+        }
+
         auto saved_errno = errno;   /// We must restore previous value of errno in signal handler.
 
 #if defined(OS_LINUX)

From e15815ee974dafada9ec0de9996d2d29eb26e6cb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 29 Oct 2023 11:32:45 +0100
Subject: [PATCH 0114/1097] Add a test

---
 ...ry_profiler_concurrency_overruns.reference |  1 +
 ...907_query_profiler_concurrency_overruns.sh | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference
 create mode 100755 tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh

diff --git a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference
new file mode 100644
index 00000000000..8f75b7cccf2
--- /dev/null
+++ b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference
@@ -0,0 +1 @@
+1000000000 1 1 
diff --git a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
new file mode 100755
index 00000000000..f3f37704e23
--- /dev/null
+++ b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# The check is probablistic, so make sure that it passes at least sometimes:
+
+while true
+do
+    ${CLICKHOUSE_CLIENT} -n --query="
+    SELECT count() FROM numbers_mt(1000000000) SETTINGS
+        query_profiler_real_time_period_ns = 1000000,
+        query_profiler_cpu_time_period_ns = 1000000,
+        max_threads = 1000;
+    SELECT anyIf(value, event = 'QueryProfilerRuns') > 0, anyIf(value, event = 'QueryProfilerConcurrencyOverruns') > 0 FROM system.events;
+    " | tr '\t\n' '  ' | grep '1000000000 1 1' && break
+    sleep 1
+done

From 56de2333f9c69097e57ec2134f6270271f1d5b3b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 29 Oct 2023 16:55:47 +0100
Subject: [PATCH 0115/1097] Add warning

---
 programs/server/Server.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index af460ccc7d9..854168a2041 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -666,6 +666,10 @@ try
     global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
 #endif
 
+#if defined(SANITIZE_COVERAGE) || WITH_COVERAGE
+    global_context->addWarningMessage("Server was built with code coverage. It will work slowly.");
+#endif
+
     const size_t physical_server_memory = getMemoryAmount();
 
     LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",

From 8e0f48738710f2715f04006db9ddfb0d76c0a865 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 29 Oct 2023 17:21:45 +0100
Subject: [PATCH 0116/1097] Initial support in clickhouse-test

---
 cmake/sanitize.cmake  |  8 +++++++-
 tests/clickhouse-test | 15 +++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 0c901f1aa36..7d25a85ef62 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -71,7 +71,13 @@ option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbac
 
 if (SANITIZE_COVERAGE)
     message (INFORMATION "Enabled instrumentation for code coverage")
-    add_definitions(-DSANITIZE_COVERAGE=1)
+
+    # We set this define for whole build to indicate that at least some parts are compiled with coverage.
+    # And to expose it in system.build_options.
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSANITIZE_COVERAGE=1")
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSANITIZE_COVERAGE=1")
+
+    # But the actual coverage will be enabled on per-library basis: for ClickHouse code, but not for 3rd-party.
     set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard")
 endif()
 
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index cab7d7e79ff..2a4ed865dd5 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1173,6 +1173,16 @@ class TestCase:
             description_full += result.reason.value
 
         description_full += result.description
+
+        if BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+            coverage = clickhouse_execute(
+                args,
+                f"SELECT length(coverage())",
+                retry_error_codes=True,
+            ).decode()
+
+            description_full += f" Coverage: {coverage}"
+
         description_full += "\n"
 
         if result.status == TestStatus.FAIL and self.testcase_args:
@@ -1872,6 +1882,7 @@ class BuildFlags:
     UNDEFINED = "ubsan"
     MEMORY = "msan"
     DEBUG = "debug"
+    SANITIZE_COVERAGE = "sanitize-coverage"
     RELEASE = "release"
     ORDINARY_DATABASE = "ordinary-database"
     POLYMORPHIC_PARTS = "polymorphic-parts"
@@ -1891,6 +1902,8 @@ def collect_build_flags(args):
         result.append(BuildFlags.UNDEFINED)
     elif b"-fsanitize=memory" in value:
         result.append(BuildFlags.MEMORY)
+    elif b"-DSANITIZE_COVERAGE=1" in value:
+        result.append(BuildFlags.SANITIZE_COVERAGE)
 
     value = clickhouse_execute(
         args, "SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'"
@@ -2072,6 +2085,8 @@ def reportCoverageFor(args, what, query, permissive=False):
     return True
 
 
+# This is high-level coverage on per-component basis (functions, data types, etc.)
+# Don't be confused with the code coverage.
 def reportCoverage(args):
     clickhouse_execute(args, "SYSTEM FLUSH LOGS")
 

From 4288cb3b7895df917f982e03d0d0b55029ecc5cc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 29 Oct 2023 17:43:01 +0100
Subject: [PATCH 0117/1097] Make clickhouse-test to calculate coverage on a
 per-test basis

---
 tests/clickhouse-test | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 2a4ed865dd5..e827a596ada 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1175,6 +1175,12 @@ class TestCase:
         description_full += result.description
 
         if BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+            clickhouse_execute(
+                args,
+                f"INSERT INTO system.coverage SELECT '{self.case}', coverage()",
+                retry_error_codes=True,
+            )
+
             coverage = clickhouse_execute(
                 args,
                 f"SELECT length(coverage())",
@@ -1241,6 +1247,14 @@ class TestCase:
                 + pattern
             )
 
+        # We want to calculate per-test code coverage. That's why we reset it before each test.
+        if BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+            clickhouse_execute(
+                args,
+                "SYSTEM RESET COVERAGE",
+                retry_error_codes=True,
+            )
+
         command = pattern.format(**params)
 
         proc = Popen(command, shell=True, env=os.environ)
@@ -2349,6 +2363,18 @@ def main(args):
         print(f"Failed to create databases for tests: {e}")
         server_died.set()
 
+    if BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+        clickhouse_execute(
+            args,
+            """
+                CREATE TABLE IF NOT EXISTS system.coverage
+                (
+                    test_name String,
+                    coverage Array(UInt64)
+                ) ENGINE = MergeTree ORDER BY test_name;
+            """,
+        )
+
     total_tests_run = 0
 
     for suite in sorted(os.listdir(base_dir), key=suite_key_func):

From 8e6a7fdff09430378a6b13e87ded874524327e3b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 29 Oct 2023 23:43:36 +0100
Subject: [PATCH 0118/1097] Fix f-string

---
 CMakeLists.txt                              | 7 ++++---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 tests/clickhouse-test                       | 8 +++++++-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d259b105a0a..d19bb521c70 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -187,9 +187,10 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
     endif ()
 endif()
 
-if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
-    OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
-    OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
+if (NOT (SANITIZE_COVERAGE OR WITH_COVERAGE)
+    AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
+        OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
+        OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL"))
     set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT ON)
 else()
     set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT OFF)
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 069ed20c730..705972da8f4 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -8795,7 +8795,7 @@ void StorageReplicatedMergeTree::createTableSharedID() const
         else if (code == Coordination::Error::ZNONODE) /// table completely dropped, we can choose any id we want
         {
             id = toString(UUIDHelpers::Nil);
-            LOG_DEBUG(log, "Table was completely drop, we can use anything as ID (will use {})", id);
+            LOG_DEBUG(log, "Table was completely dropped, and we can use anything as ID (will use {})", id);
         }
         else if (code != Coordination::Error::ZOK)
         {
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e827a596ada..e5659e8fca4 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1183,7 +1183,7 @@ class TestCase:
 
             coverage = clickhouse_execute(
                 args,
-                f"SELECT length(coverage())",
+                "SELECT length(coverage())",
                 retry_error_codes=True,
             ).decode()
 
@@ -2375,6 +2375,12 @@ def main(args):
             """,
         )
 
+        # Coverage collected at the system startup before running any tests:
+        clickhouse_execute(
+            args,
+            "INSERT INTO system.coverage SELECT '', coverage()",
+        )
+
     total_tests_run = 0
 
     for suite in sorted(os.listdir(base_dir), key=suite_key_func):

From f85e9138da6990fa95ec1c757cdf6207e6040ddc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 30 Oct 2023 02:31:54 +0300
Subject: [PATCH 0119/1097] Update 02907_query_profiler_concurrency_overruns.sh

---
 .../0_stateless/02907_query_profiler_concurrency_overruns.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
index f3f37704e23..c43889d78b2 100755
--- a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
+++ b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
@@ -10,10 +10,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 while true
 do
     ${CLICKHOUSE_CLIENT} -n --query="
-    SELECT count() FROM numbers_mt(1000000000) SETTINGS
+    SELECT count() FROM numbers_mt(1000000) SETTINGS
         query_profiler_real_time_period_ns = 1000000,
         query_profiler_cpu_time_period_ns = 1000000,
-        max_threads = 1000;
+        max_threads = 1000,
+        max_block_size = 100;
     SELECT anyIf(value, event = 'QueryProfilerRuns') > 0, anyIf(value, event = 'QueryProfilerConcurrencyOverruns') > 0 FROM system.events;
     " | tr '\t\n' '  ' | grep '1000000000 1 1' && break
     sleep 1

From c544a0221010d812c68fb805aee3eed1a252b50c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 30 Oct 2023 02:32:22 +0300
Subject: [PATCH 0120/1097] Update 02907_query_profiler_concurrency_overruns.sh

---
 .../0_stateless/02907_query_profiler_concurrency_overruns.sh  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
index c43889d78b2..7c5e4209124 100755
--- a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
+++ b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
@@ -10,12 +10,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 while true
 do
     ${CLICKHOUSE_CLIENT} -n --query="
-    SELECT count() FROM numbers_mt(1000000) SETTINGS
+    SELECT count() FROM zeros_mt(1000000) SETTINGS
         query_profiler_real_time_period_ns = 1000000,
         query_profiler_cpu_time_period_ns = 1000000,
         max_threads = 1000,
         max_block_size = 100;
     SELECT anyIf(value, event = 'QueryProfilerRuns') > 0, anyIf(value, event = 'QueryProfilerConcurrencyOverruns') > 0 FROM system.events;
-    " | tr '\t\n' '  ' | grep '1000000000 1 1' && break
+    " | tr '\t\n' '  ' | grep '1000000 1 1' && break
     sleep 1
 done

From e6644c17736a07ad32dcf0a848dec05a94a3505a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 30 Oct 2023 02:32:32 +0300
Subject: [PATCH 0121/1097] Update
 02907_query_profiler_concurrency_overruns.reference

---
 .../02907_query_profiler_concurrency_overruns.reference         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference
index 8f75b7cccf2..45d53fbec54 100644
--- a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference
+++ b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference
@@ -1 +1 @@
-1000000000 1 1 
+1000000 1 1 

From df1e0192680a63fb6870ef226f964921abcea94b Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Mon, 30 Oct 2023 00:39:16 +0100
Subject: [PATCH 0122/1097] address comments

---
 .../mergetree-family/mergetree.md             |  6 +--
 src/Parsers/ParserAlterQuery.cpp              |  1 -
 src/Storages/AlterCommands.cpp                |  2 +-
 src/Storages/ColumnsDescription.cpp           |  1 +
 src/Storages/MergeTree/MergeTreeData.cpp      |  6 +--
 src/Storages/Statistic/Estimator.cpp          | 32 ++++++++--------
 src/Storages/Statistic/Estimator.h            |  4 +-
 src/Storages/Statistic/Statistic.cpp          | 30 +++++----------
 src/Storages/Statistic/Statistic.h            | 38 ++-----------------
 src/Storages/Statistic/TDigestStatistic.cpp   | 38 +++++++++++++++++++
 src/Storages/Statistic/TDigestStatistic.h     | 28 ++++++++++++++
 src/Storages/StatisticsDescription.h          |  5 +++
 12 files changed, 110 insertions(+), 81 deletions(-)
 create mode 100644 src/Storages/Statistic/TDigestStatistic.cpp
 create mode 100644 src/Storages/Statistic/TDigestStatistic.h

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index d4c5f8084d9..8e8a5ea7850 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -1370,11 +1370,11 @@ ENGINE = MergeTree
 ORDER BY a
 ```
 
-We can also manipulate statistics with `ATLER` statements.
+We can also manipulate statistics with `ALTER` statements.
 
 ```sql
-ATLER TABLE example_table ADD STATISTIC b TYPE tdigest;
-ATLER TABLE example_table DROP STATISTIC a TYPE tdigest;
+ALTER TABLE example_table ADD STATISTIC b TYPE tdigest;
+ALTER TABLE example_table DROP STATISTIC a TYPE tdigest;
 ```
 
 These lightweight statistics aggregate information about distribution of values in columns.
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index 0051136fa1f..c616c6e0441 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -352,7 +352,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
                     return false;
 
                 command->type = ASTAlterCommand::DROP_STATISTIC;
-                command->detach = false;
             }
             else if (s_clear_statistic.ignore(pos, expected))
             {
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 6e9e034c2a9..e7885cbace7 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -597,7 +597,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
             {
                 throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Cannot add statistic {} with type {}: this column is not found", statistic_column_name, statistic_type);
             }
-            if (metadata.columns.get(statistic_column_name).stat)
+            if (!if_exists && metadata.columns.get(statistic_column_name).stat)
                 throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type);
         }
 
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 6261a4c87e2..31d2ab52382 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -61,6 +61,7 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
         && type->equals(*other.type)
         && default_desc == other.default_desc
         && comment == other.comment
+        && stat == other.stat
         && ast_to_str(codec) == ast_to_str(other.codec)
         && ast_to_str(ttl) == ast_to_str(other.ttl);
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 9bcd5620171..5ac8f1830c3 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -459,15 +459,11 @@ ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(const SelectQ
     }
 
     ASTPtr expression_ast;
-    Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */);
-    //
-    // Generate valid expressions for filtering
-    bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, local_context, virtual_columns_block, expression_ast);
 
     ConditionEstimator result;
     PartitionPruner partition_pruner(metadata_snapshot, query_info, local_context, true /* strict */);
 
-    if (partition_pruner.isUseless() && !valid)
+    if (partition_pruner.isUseless())
     {
         /// Read all partitions.
         for (const auto & part : parts)
diff --git a/src/Storages/Statistic/Estimator.cpp b/src/Storages/Statistic/Estimator.cpp
index 031a38a4171..7ecd22358e5 100644
--- a/src/Storages/Statistic/Estimator.cpp
+++ b/src/Storages/Statistic/Estimator.cpp
@@ -4,34 +4,35 @@
 namespace DB
 {
 
-std::optional<std::string> ConditionEstimator::extractSingleColumn(const RPNBuilderTreeNode & node) const
+/// second return value represents how many columns in the node.
+static std::pair<std::string, Int32> tryToExtractSingleColumn(const RPNBuilderTreeNode & node)
 {
     if (node.isConstant())
     {
-        return std::nullopt;
+        return {};
     }
 
     if (!node.isFunction())
     {
         auto column_name = node.getColumnName();
-        return {column_name};
+        return {column_name, 1};
     }
 
     auto function_node = node.toFunctionNode();
     size_t arguments_size = function_node.getArgumentsSize();
-    std::optional<std::string> result;
+    std::pair<std::string, Int32> result;
     for (size_t i = 0; i < arguments_size; ++i)
     {
         auto function_argument = function_node.getArgumentAt(i);
-        auto subresult = extractSingleColumn(function_argument);
-        if (subresult == std::nullopt)
+        auto subresult = tryToExtractSingleColumn(function_argument);
+        if (subresult.second == 0) /// the subnode contains 0 column
             continue;
-        else if (subresult == "")
-            return "";
-        else if (result == std::nullopt)
+        else if (subresult.second > 1) /// the subnode contains more than 1 column
+            return subresult;
+        else if (result.second == 0 || result.first == subresult.first) /// subnodes contain same column.
             result = subresult;
-        else if (result.value() != subresult.value())
-            return "";
+        else
+            return {"", 2};
     }
     return result;
 }
@@ -88,12 +89,13 @@ std::pair<std::string, Float64> ConditionEstimator::extractBinaryOp(const RPNBui
 
 Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node) const
 {
-    auto col = extractSingleColumn(node);
-    if (col == std::nullopt || col == "")
+    auto result = tryToExtractSingleColumn(node);
+    if (result.second != 1)
     {
         return default_unknown_cond_factor;
     }
-    auto it = column_estimators.find(col.value());
+    String col = result.first;
+    auto it = column_estimators.find(col);
 
     /// If there the estimator of the column is not found or there are no data at all,
     /// we use dummy estimation.
@@ -107,7 +109,7 @@ Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node)
     {
         dummy = true;
     }
-    auto [op, val] = extractBinaryOp(node, col.value());
+    auto [op, val] = extractBinaryOp(node, col);
     if (op == "equals")
     {
         if (val < - threshold || val > threshold)
diff --git a/src/Storages/Statistic/Estimator.h b/src/Storages/Statistic/Estimator.h
index 53ea46cbfd5..3190e0698fe 100644
--- a/src/Storages/Statistic/Estimator.h
+++ b/src/Storages/Statistic/Estimator.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistic/TDigestStatistic.h>
 
 namespace DB
 {
@@ -89,7 +89,7 @@ private:
     };
 
     std::map<String, ColumnEstimator> column_estimators;
-    std::optional<std::string> extractSingleColumn(const RPNBuilderTreeNode & node) const;
+    /// std::optional<std::string> extractSingleColumn(const RPNBuilderTreeNode & node) const;
     std::pair<std::string, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const;
 
 public:
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp
index d3edcf73422..38b05c316bc 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistic/Statistic.cpp
@@ -3,6 +3,7 @@
 
 #include <DataTypes/DataTypeNullable.h>
 #include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistic/TDigestStatistic.h>
 #include <Storages/StatisticsDescription.h>
 #include <Storages/ColumnsDescription.h>
 #include <Common/Exception.h>
@@ -17,16 +18,17 @@ namespace ErrorCodes
     extern const int ILLEGAL_STATISTIC;
 }
 
-void TDigestStatistic::update(const ColumnPtr & column)
+void MergeTreeStatisticFactory::registerCreator(StatisticType stat_type, Creator creator)
 {
-    size_t size = column->size();
+    if (!creators.emplace(stat_type, std::move(creator)).second)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic creator type {} is not unique", stat_type);
+}
+
+void MergeTreeStatisticFactory::registerValidator(StatisticType stat_type, Validator validator)
+{
+    if (!validators.emplace(stat_type, std::move(validator)).second)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic validator type {} is not unique", stat_type);
 
-    for (size_t i = 0; i < size; ++i)
-    {
-        /// TODO: support more types.
-        Float64 value = column->getFloat64(i);
-        data.add(value, 1);
-    }
 }
 
 StatisticPtr TDigestCreator(const StatisticDescription & stat)
@@ -41,18 +43,6 @@ void TDigestValidator(const StatisticDescription &, DataTypePtr data_type)
         throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "TDigest does not support type {}", data_type->getName());
 }
 
-void MergeTreeStatisticFactory::registerCreator(StatisticType stat_type, Creator creator)
-{
-    if (!creators.emplace(stat_type, std::move(creator)).second)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic creator type {} is not unique", stat_type);
-}
-
-void MergeTreeStatisticFactory::registerValidator(StatisticType stat_type, Validator validator)
-{
-    if (!validators.emplace(stat_type, std::move(validator)).second)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic validator type {} is not unique", stat_type);
-
-}
 
 MergeTreeStatisticFactory::MergeTreeStatisticFactory()
 {
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h
index 0fa8ff8ff3d..132b453e465 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistic/Statistic.h
@@ -3,14 +3,16 @@
 #include <cstddef>
 #include <memory>
 #include <optional>
+
+#include <boost/core/noncopyable.hpp>
+
 #include <AggregateFunctions/QuantileTDigest.h>
 #include <Core/Block.h>
+#include <Common/logger_useful.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
 #include <Storages/StatisticsDescription.h>
-#include <Common/logger_useful.h>
 
-#include <boost/core/noncopyable.hpp>
 
 /// this is for user-defined statistic.
 constexpr auto STAT_FILE_PREFIX = "statistic_";
@@ -59,38 +61,6 @@ protected:
 
 };
 
-/// TDigestStatistic is a kind of histogram.
-class TDigestStatistic : public IStatistic
-{
-    QuantileTDigest<Float64> data;
-public:
-    explicit TDigestStatistic(const StatisticDescription & stat_) : IStatistic(stat_)
-    {
-    }
-
-    Float64 estimateLess(Float64 val) const
-    {
-        return data.getCountLessThan(val);
-    }
-
-    void serialize(WriteBuffer & buf) override
-    {
-        data.serialize(buf);
-    }
-
-    void deserialize(ReadBuffer & buf) override
-    {
-        data.deserialize(buf);
-    }
-
-    void update(const ColumnPtr & column) override;
-
-    UInt64 count() override
-    {
-        return static_cast<UInt64>(data.count);
-    }
-};
-
 class ColumnsDescription;
 
 class MergeTreeStatisticFactory : private boost::noncopyable
diff --git a/src/Storages/Statistic/TDigestStatistic.cpp b/src/Storages/Statistic/TDigestStatistic.cpp
new file mode 100644
index 00000000000..cb10902c64c
--- /dev/null
+++ b/src/Storages/Statistic/TDigestStatistic.cpp
@@ -0,0 +1,38 @@
+#include <Storages/Statistic/TDigestStatistic.h>
+
+namespace DB
+{
+
+Float64 TDigestStatistic::estimateLess(Float64 val) const
+{
+    return data.getCountLessThan(val);
+}
+
+void TDigestStatistic::serialize(WriteBuffer & buf)
+{
+    data.serialize(buf);
+}
+
+void TDigestStatistic::deserialize(ReadBuffer & buf)
+{
+    data.deserialize(buf);
+}
+
+void TDigestStatistic::update(const ColumnPtr & column)
+{
+    size_t size = column->size();
+
+    for (size_t i = 0; i < size; ++i)
+    {
+        /// TODO: support more types.
+        Float64 value = column->getFloat64(i);
+        data.add(value, 1);
+    }
+}
+
+UInt64 TDigestStatistic::count()
+{
+    return static_cast<UInt64>(data.count);
+}
+
+}
diff --git a/src/Storages/Statistic/TDigestStatistic.h b/src/Storages/Statistic/TDigestStatistic.h
new file mode 100644
index 00000000000..b7e31eef363
--- /dev/null
+++ b/src/Storages/Statistic/TDigestStatistic.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <Storages/Statistic/Statistic.h>
+
+namespace DB
+{
+
+/// TDigestStatistic is a kind of histogram.
+class TDigestStatistic : public IStatistic
+{
+    QuantileTDigest<Float64> data;
+public:
+    explicit TDigestStatistic(const StatisticDescription & stat_) : IStatistic(stat_)
+    {
+    }
+
+    Float64 estimateLess(Float64 val) const;
+
+    void serialize(WriteBuffer & buf) override;
+
+    void deserialize(ReadBuffer & buf) override;
+
+    void update(const ColumnPtr & column) override;
+
+    UInt64 count() override;
+};
+
+}
diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h
index 0918433e553..9a66951ab52 100644
--- a/src/Storages/StatisticsDescription.h
+++ b/src/Storages/StatisticsDescription.h
@@ -28,6 +28,11 @@ struct StatisticDescription
 
     StatisticDescription() = default;
 
+    bool operator==(const StatisticDescription & other) const
+    {
+        return type == other.type && column_name == other.column_name;
+    }
+
     static StatisticDescription getStatisticFromColumnDeclaration(const ASTColumnDeclaration & column);
 
     static std::vector<StatisticDescription> getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);

From ea6cb1ad0c95f194519c863bb29302e8829669a3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 30 Oct 2023 01:04:50 +0100
Subject: [PATCH 0123/1097] Maybe better

---
 tests/clickhouse-test | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e5659e8fca4..36846a4aeb1 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1177,7 +1177,7 @@ class TestCase:
         if BuildFlags.SANITIZE_COVERAGE in args.build_flags:
             clickhouse_execute(
                 args,
-                f"INSERT INTO system.coverage SELECT '{self.case}', coverage()",
+                f"INSERT INTO system.coverage SELECT now(), '{self.case}', coverage()",
                 retry_error_codes=True,
             )
 
@@ -2369,6 +2369,7 @@ def main(args):
             """
                 CREATE TABLE IF NOT EXISTS system.coverage
                 (
+                    time DateTime,
                     test_name String,
                     coverage Array(UInt64)
                 ) ENGINE = MergeTree ORDER BY test_name;
@@ -2378,7 +2379,7 @@ def main(args):
         # Coverage collected at the system startup before running any tests:
         clickhouse_execute(
             args,
-            "INSERT INTO system.coverage SELECT '', coverage()",
+            "INSERT INTO system.coverage SELECT now(), '', coverage()",
         )
 
     total_tests_run = 0

From ccf5003442eff0b60cafad3faa489fc2c7ff1aa0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 30 Oct 2023 03:20:04 +0100
Subject: [PATCH 0124/1097] Maybe smaller binary

---
 src/Functions/geometryConverters.h      |  5 +++++
 src/Functions/polygonArea.cpp           |  4 ++++
 src/Functions/polygonConvexHull.cpp     |  5 ++++-
 src/Functions/polygonPerimeter.cpp      |  5 +++++
 src/Functions/polygonsDistance.cpp      |  5 ++++-
 src/Functions/polygonsEquals.cpp        |  7 ++++++-
 src/Functions/polygonsIntersection.cpp  |  5 ++++-
 src/Functions/polygonsSymDifference.cpp |  4 ++++
 src/Functions/polygonsUnion.cpp         | 10 ++++------
 src/Functions/polygonsWithin.cpp        |  3 +++
 src/Functions/readWkt.cpp               |  4 ++++
 src/Functions/svg.cpp                   |  5 +++++
 src/Functions/wkt.cpp                   |  6 ++++++
 13 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/src/Functions/geometryConverters.h b/src/Functions/geometryConverters.h
index 97162fa9dd0..dba984b4184 100644
--- a/src/Functions/geometryConverters.h
+++ b/src/Functions/geometryConverters.h
@@ -28,6 +28,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 template <typename Point>
 using Ring = boost::geometry::model::ring<Point>;
 
@@ -371,3 +374,5 @@ static void callOnTwoGeometryDataTypes(DataTypePtr left_type, DataTypePtr right_
 }
 
 }
+
+}
diff --git a/src/Functions/polygonArea.cpp b/src/Functions/polygonArea.cpp
index e49a4eb9fb3..1c4ef9f79a3 100644
--- a/src/Functions/polygonArea.cpp
+++ b/src/Functions/polygonArea.cpp
@@ -26,6 +26,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 template <typename Point>
 class FunctionPolygonArea : public IFunction
 {
@@ -99,6 +102,7 @@ const char * FunctionPolygonArea<CartesianPoint>::name = "polygonAreaCartesian";
 template <>
 const char * FunctionPolygonArea<SphericalPoint>::name = "polygonAreaSpherical";
 
+}
 
 REGISTER_FUNCTION(PolygonArea)
 {
diff --git a/src/Functions/polygonConvexHull.cpp b/src/Functions/polygonConvexHull.cpp
index d7fca45bd1e..921c0700ca7 100644
--- a/src/Functions/polygonConvexHull.cpp
+++ b/src/Functions/polygonConvexHull.cpp
@@ -25,6 +25,9 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+
 template <typename Point>
 class FunctionPolygonConvexHull : public IFunction
 {
@@ -94,10 +97,10 @@ public:
     }
 };
 
-
 template <>
 const char * FunctionPolygonConvexHull<CartesianPoint>::name = "polygonConvexHullCartesian";
 
+}
 
 REGISTER_FUNCTION(PolygonConvexHull)
 {
diff --git a/src/Functions/polygonPerimeter.cpp b/src/Functions/polygonPerimeter.cpp
index 2d89d4e4f5b..85645118f84 100644
--- a/src/Functions/polygonPerimeter.cpp
+++ b/src/Functions/polygonPerimeter.cpp
@@ -17,13 +17,17 @@
 #include <memory>
 #include <string>
 
+
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 template <typename Point>
 class FunctionPolygonPerimeter : public IFunction
@@ -97,6 +101,7 @@ const char * FunctionPolygonPerimeter<CartesianPoint>::name = "polygonPerimeterC
 template <>
 const char * FunctionPolygonPerimeter<SphericalPoint>::name = "polygonPerimeterSpherical";
 
+}
 
 REGISTER_FUNCTION(PolygonPerimeter)
 {
diff --git a/src/Functions/polygonsDistance.cpp b/src/Functions/polygonsDistance.cpp
index d2c58105eae..d6c7d799b5e 100644
--- a/src/Functions/polygonsDistance.cpp
+++ b/src/Functions/polygonsDistance.cpp
@@ -27,6 +27,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 template <typename Point>
 class FunctionPolygonsDistance : public IFunction
 {
@@ -108,6 +111,7 @@ const char * FunctionPolygonsDistance<CartesianPoint>::name = "polygonsDistanceC
 template <>
 const char * FunctionPolygonsDistance<SphericalPoint>::name = "polygonsDistanceSpherical";
 
+}
 
 REGISTER_FUNCTION(PolygonsDistance)
 {
@@ -115,5 +119,4 @@ REGISTER_FUNCTION(PolygonsDistance)
     factory.registerFunction<FunctionPolygonsDistance<SphericalPoint>>();
 }
 
-
 }
diff --git a/src/Functions/polygonsEquals.cpp b/src/Functions/polygonsEquals.cpp
index 3c80ae1e4c5..bdc4f18042c 100644
--- a/src/Functions/polygonsEquals.cpp
+++ b/src/Functions/polygonsEquals.cpp
@@ -19,13 +19,18 @@
 #include <memory>
 #include <utility>
 
+
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 template <typename Point>
 class FunctionPolygonsEquals : public IFunction
 {
@@ -103,10 +108,10 @@ public:
     }
 };
 
-
 template <>
 const char * FunctionPolygonsEquals<CartesianPoint>::name = "polygonsEqualsCartesian";
 
+}
 
 REGISTER_FUNCTION(PolygonsEquals)
 {
diff --git a/src/Functions/polygonsIntersection.cpp b/src/Functions/polygonsIntersection.cpp
index 84e5fe0d4b7..5777f438a19 100644
--- a/src/Functions/polygonsIntersection.cpp
+++ b/src/Functions/polygonsIntersection.cpp
@@ -26,6 +26,9 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
 template <typename Point>
 class FunctionPolygonsIntersection : public IFunction
 {
@@ -107,13 +110,13 @@ public:
     }
 };
 
-
 template <>
 const char * FunctionPolygonsIntersection<CartesianPoint>::name = "polygonsIntersectionCartesian";
 
 template <>
 const char * FunctionPolygonsIntersection<SphericalPoint>::name = "polygonsIntersectionSpherical";
 
+}
 
 REGISTER_FUNCTION(PolygonsIntersection)
 {
diff --git a/src/Functions/polygonsSymDifference.cpp b/src/Functions/polygonsSymDifference.cpp
index ceb39547427..785a8f76ba6 100644
--- a/src/Functions/polygonsSymDifference.cpp
+++ b/src/Functions/polygonsSymDifference.cpp
@@ -25,6 +25,8 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 template <typename Point>
 class FunctionPolygonsSymDifference : public IFunction
@@ -109,6 +111,8 @@ const char * FunctionPolygonsSymDifference<CartesianPoint>::name = "polygonsSymD
 template <>
 const char * FunctionPolygonsSymDifference<SphericalPoint>::name = "polygonsSymDifferenceSpherical";
 
+}
+
 REGISTER_FUNCTION(PolygonsSymDifference)
 {
     factory.registerFunction<FunctionPolygonsSymDifference<CartesianPoint>>();
diff --git a/src/Functions/polygonsUnion.cpp b/src/Functions/polygonsUnion.cpp
index 4a604d0f810..a31d223ea8c 100644
--- a/src/Functions/polygonsUnion.cpp
+++ b/src/Functions/polygonsUnion.cpp
@@ -3,19 +3,14 @@
 
 #include <boost/geometry.hpp>
 #include <boost/geometry/geometries/point_xy.hpp>
-#include <boost/geometry/geometries/polygon.hpp>
 
 #include <Common/logger_useful.h>
 
-#include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
-#include <Columns/ColumnConst.h>
-#include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeCustomGeo.h>
 
 #include <memory>
-#include <string>
+
 
 namespace DB
 {
@@ -25,6 +20,8 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 template <typename Point>
 class FunctionPolygonsUnion : public IFunction
@@ -112,6 +109,7 @@ const char * FunctionPolygonsUnion<CartesianPoint>::name = "polygonsUnionCartesi
 template <>
 const char * FunctionPolygonsUnion<SphericalPoint>::name = "polygonsUnionSpherical";
 
+}
 
 REGISTER_FUNCTION(PolygonsUnion)
 {
diff --git a/src/Functions/polygonsWithin.cpp b/src/Functions/polygonsWithin.cpp
index 1b094f42060..bf4db1cf9f8 100644
--- a/src/Functions/polygonsWithin.cpp
+++ b/src/Functions/polygonsWithin.cpp
@@ -27,6 +27,8 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 template <typename Point>
 class FunctionPolygonsWithin : public IFunction
@@ -111,6 +113,7 @@ const char * FunctionPolygonsWithin<CartesianPoint>::name = "polygonsWithinCarte
 template <>
 const char * FunctionPolygonsWithin<SphericalPoint>::name = "polygonsWithinSpherical";
 
+}
 
 REGISTER_FUNCTION(PolygonsWithin)
 {
diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp
index ec20cdf3723..8dff297bcb1 100644
--- a/src/Functions/readWkt.cpp
+++ b/src/Functions/readWkt.cpp
@@ -16,6 +16,8 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
 
 template <class DataTypeName, class Geometry, class Serializer, class NameHolder>
 class FunctionReadWKT : public IFunction
@@ -95,6 +97,8 @@ struct ReadWKTMultiPolygonNameHolder
     static constexpr const char * name = "readWKTMultiPolygon";
 };
 
+}
+
 REGISTER_FUNCTION(ReadWKT)
 {
     factory.registerFunction<FunctionReadWKT<DataTypePointName, CartesianPoint, PointSerializer<CartesianPoint>, ReadWKTPointNameHolder>>();
diff --git a/src/Functions/svg.cpp b/src/Functions/svg.cpp
index f8f85216b3f..550fe29cfc4 100644
--- a/src/Functions/svg.cpp
+++ b/src/Functions/svg.cpp
@@ -16,6 +16,9 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
 class FunctionSvg : public IFunction
 {
 public:
@@ -94,6 +97,8 @@ public:
     }
 };
 
+}
+
 REGISTER_FUNCTION(Svg)
 {
     factory.registerFunction<FunctionSvg>();
diff --git a/src/Functions/wkt.cpp b/src/Functions/wkt.cpp
index fc9ef75a1e2..afcfabd0bf4 100644
--- a/src/Functions/wkt.cpp
+++ b/src/Functions/wkt.cpp
@@ -9,6 +9,9 @@
 namespace DB
 {
 
+namespace
+{
+
 class FunctionWkt : public IFunction
 {
 public:
@@ -52,6 +55,7 @@ public:
             for (size_t i = 0; i < input_rows_count; ++i)
             {
                 std::stringstream str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+                str.exceptions(std::ios::failbit);
                 str << boost::geometry::wkt(figures[i]);
                 std::string serialized = str.str();
                 res_column->insertData(serialized.c_str(), serialized.size());
@@ -68,6 +72,8 @@ public:
     }
 };
 
+}
+
 REGISTER_FUNCTION(Wkt)
 {
     factory.registerFunction<FunctionWkt>();

From aaca32f6a7f5c84ce9be36f2d7864c3f80a56b4f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 30 Oct 2023 03:22:49 +0100
Subject: [PATCH 0125/1097] Add function coverageAll

---
 base/base/coverage.cpp     | 46 +++++++++++++++++++++++++++++++++-----
 base/base/coverage.h       | 12 +++++++++-
 cmake/sanitize.cmake       |  4 ++--
 src/Functions/coverage.cpp | 27 ++++++++++++----------
 4 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp
index 60eb6fcac72..4af6a279af9 100644
--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@@ -46,13 +46,17 @@ void dumpCoverageReportIfPossible()
 
 namespace
 {
-    bool initialized = false;
+    bool pc_guards_initialized = false;
+    bool pc_table_initialized = false;
 
     uint32_t * guards_start = nullptr;
     uint32_t * guards_end = nullptr;
-    size_t coverage_array_size = 0;
 
     uintptr_t * coverage_array = nullptr;
+    size_t coverage_array_size = 0;
+
+    uintptr_t * all_addresses_array = nullptr;
+    size_t all_addresses_array_size = 0;
 }
 
 extern "C"
@@ -62,9 +66,9 @@ extern "C"
 /// But we will use it only for the main DSO.
 void __sanitizer_cov_trace_pc_guard_init(uint32_t * start, uint32_t * stop)
 {
-    if (initialized)
+    if (pc_guards_initialized)
         return;
-    initialized = true;
+    pc_guards_initialized = true;
 
     /// The function can be called multiple times, but we need to initialize only once.
     if (start == stop || *start)
@@ -80,6 +84,23 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t * start, uint32_t * stop)
     resetCoverage();
 }
 
+/// This is called at least once for every DSO for initialization
+/// and provides information about all instrumented addresses.
+void __sanitizer_cov_pcs_init(const uintptr_t * pcs_begin, const uintptr_t * pcs_end)
+{
+    if (pc_table_initialized)
+        return;
+    pc_table_initialized = true;
+
+    all_addresses_array = static_cast<uintptr_t*>(malloc(sizeof(uintptr_t) * coverage_array_size));
+    all_addresses_array_size = pcs_end - pcs_begin;
+
+    /// They are not a real pointers, but also contain a flag in the most significant bit,
+    /// in which we are not interested for now. Reset it.
+    for (size_t i = 0; i < all_addresses_array_size; ++i)
+        all_addresses_array[i] = pcs_begin[i] & 0x7FFFFFFFFFFFFFFFULL;
+}
+
 /// This is called at every basic block / edge, etc.
 void __sanitizer_cov_trace_pc_guard(uint32_t * guard)
 {
@@ -101,12 +122,20 @@ void __sanitizer_cov_trace_pc_guard(uint32_t * guard)
 
 }
 
-__attribute__((no_sanitize("coverage"))) std::span<uintptr_t> getCoverage()
+__attribute__((no_sanitize("coverage")))
+std::span<const uintptr_t> getCoverage()
 {
     return {coverage_array, coverage_array_size};
 }
 
-__attribute__((no_sanitize("coverage"))) void resetCoverage()
+__attribute__((no_sanitize("coverage")))
+std::span<const uintptr_t> getAllInstrumentedAddresses()
+{
+    return {all_addresses_array, all_addresses_array_size};
+}
+
+__attribute__((no_sanitize("coverage")))
+void resetCoverage()
 {
     memset(coverage_array, 0, coverage_array_size * sizeof(*coverage_array));
 
@@ -124,6 +153,11 @@ std::span<uintptr_t> getCoverage()
     return {};
 }
 
+std::span<const uintptr_t> getAllInstrumentedAddresses()
+{
+    return {};
+}
+
 void resetCoverage()
 {
 }
diff --git a/base/base/coverage.h b/base/base/coverage.h
index b6664bec223..f75ed2d3553 100644
--- a/base/base/coverage.h
+++ b/base/base/coverage.h
@@ -11,5 +11,15 @@
 /// Idempotent, may be called multiple times.
 void dumpCoverageReportIfPossible();
 
-std::span<uintptr_t> getCoverage();
+/// This is effective if SANITIZE_COVERAGE is enabled at build time.
+/// Get accumulated unique program addresses of the instrumented parts of the code,
+/// seen so far after program startup or after previous reset.
+/// The returned span will be represented as a sparse map, containing mostly zeros, which you should filter away.
+std::span<const uintptr_t> getCoverage();
+
+/// Get all instrumented addresses that could be in the coverage.
+std::span<const uintptr_t> getAllInstrumentedAddresses();
+
+/// Reset the accumulated coverage.
+/// This is useful to compare coverage of different tests, including differential coverage.
 void resetCoverage();
diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 7d25a85ef62..3f7a8498059 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -78,7 +78,7 @@ if (SANITIZE_COVERAGE)
     set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSANITIZE_COVERAGE=1")
 
     # But the actual coverage will be enabled on per-library basis: for ClickHouse code, but not for 3rd-party.
-    set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard")
+    set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard,pc-table")
 endif()
 
-set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard")
+set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")
diff --git a/src/Functions/coverage.cpp b/src/Functions/coverage.cpp
index 1825e6aa826..8a62469fa54 100644
--- a/src/Functions/coverage.cpp
+++ b/src/Functions/coverage.cpp
@@ -18,28 +18,30 @@ namespace DB
 namespace
 {
 
+enum class Kind
+{
+    Current,
+    All
+};
+
 /** If ClickHouse is build with coverage instrumentation, returns an array
-  * of currently accumulated unique code addresses.
+  * of currently accumulated (`coverage`) / all possible (`coverageAll`) unique code addresses.
   */
 class FunctionCoverage : public IFunction
 {
-public:
-    static constexpr auto name = "coverage";
+private:
+    Kind kind;
 
+public:
     String getName() const override
     {
-        return name;
+        return kind == Kind::Current ? "coverage" : "coverageAll";
     }
 
-    explicit FunctionCoverage()
+    explicit FunctionCoverage(Kind kind_) : kind(kind_)
     {
     }
 
-    static FunctionPtr create(ContextPtr)
-    {
-        return std::make_shared<FunctionCoverage>();
-    }
-
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
     {
         return false;
@@ -62,7 +64,7 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
     {
-        auto coverage_table = getCoverage();
+        auto coverage_table = kind == Kind::Current ? getCoverage() : getAllInstrumentedAddresses();
 
         auto column_addresses = ColumnUInt64::create();
         auto & data = column_addresses->getData();
@@ -83,7 +85,8 @@ public:
 
 REGISTER_FUNCTION(Coverage)
 {
-    factory.registerFunction<FunctionCoverage>();
+    factory.registerFunction("coverage", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::Current)); });
+    factory.registerFunction("coverageAll", [](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionCoverage>(Kind::All)); });
 }
 
 }

From ad4bde6b8bcafcb9c87454cbeeb0448533279e07 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 30 Oct 2023 03:54:11 +0100
Subject: [PATCH 0126/1097] Fix build

---
 base/base/coverage.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp
index 4af6a279af9..d70c3bcd82b 100644
--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@@ -148,7 +148,7 @@ void resetCoverage()
 
 #else
 
-std::span<uintptr_t> getCoverage()
+std::span<const uintptr_t> getCoverage()
 {
     return {};
 }

From f8e209ebd26f278ed582adf0aab8f786be8bb591 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 30 Oct 2023 13:45:18 +0300
Subject: [PATCH 0127/1097] WindowTransform decrease amount of virtual function
 calls

---
 src/Processors/Transforms/WindowTransform.cpp | 19 ++++++++++++-------
 src/Processors/Transforms/WindowTransform.h   |  3 +++
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 9565a073f48..df6246510bd 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -257,6 +257,7 @@ WindowTransform::WindowTransform(const Block & input_header_,
                 window_description.frame = *custom_default_frame;
         }
 
+        workspace.is_aggregate_function_state = workspace.aggregate_function->isState();
         workspace.aggregate_function_state.reset(
             aggregate_function->sizeOfData(),
             aggregate_function->alignOfData());
@@ -957,10 +958,7 @@ void WindowTransform::updateAggregationState()
             auto * columns = ws.argument_columns.data();
             // Removing arena.get() from the loop makes it faster somehow...
             auto * arena_ptr = arena.get();
-            for (auto row = first_row; row < past_the_end_row; ++row)
-            {
-                a->add(buf, columns, row, arena_ptr);
-            }
+            a->addBatchSinglePlaceFromInterval(first_row, past_the_end_row, buf, columns, arena_ptr);
         }
     }
 }
@@ -987,9 +985,16 @@ void WindowTransform::writeOutCurrentRow()
             // FIXME does it also allocate the result on the arena?
             // We'll have to pass it out with blocks then...
 
-            /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
-            /// correctly if result contains AggregateFunction's states
-            a->insertMergeResultInto(buf, *result_column, arena.get());
+            if (ws.is_aggregate_function_state)
+            {
+                /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
+                /// correctly if result contains AggregateFunction's states
+                a->insertMergeResultInto(buf, *result_column, arena.get());
+            }
+            else
+            {
+                a->insertResultInto(buf, *result_column, arena.get());
+            }
         }
     }
 }
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index de3e82d15ee..347c2516230 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -26,6 +26,9 @@ struct WindowFunctionWorkspace
 {
     AggregateFunctionPtr aggregate_function;
 
+    // Cached value of aggregate function isState virtual method
+    bool is_aggregate_function_state = false;
+
     // This field is set for pure window functions. When set, we ignore the
     // window_function.aggregate_function, and work through this interface
     // instead.

From 136f9841540eee70917815728765f51fe916fb11 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 19 Sep 2023 15:49:25 +0000
Subject: [PATCH 0128/1097] Update arrow to release-13.0.0

---
 contrib/arrow                                 |   2 +-
 contrib/arrow-cmake/CMakeLists.txt            |  51 ++++++++----------
 .../02735_parquet_encoder.reference           |   8 +--
 .../02884_parquet_new_encodings.reference     |   1 +
 .../02884_parquet_new_encodings.sh            |   9 ++++
 .../delta_lenght_byte_array_encoding.parquet  | Bin 0 -> 2795 bytes
 6 files changed, 37 insertions(+), 34 deletions(-)
 create mode 100644 tests/queries/0_stateless/02884_parquet_new_encodings.reference
 create mode 100755 tests/queries/0_stateless/02884_parquet_new_encodings.sh
 create mode 100644 tests/queries/0_stateless/data_parquet/delta_lenght_byte_array_encoding.parquet

diff --git a/contrib/arrow b/contrib/arrow
index 1d93838f69a..9d9c464ce68 160000
--- a/contrib/arrow
+++ b/contrib/arrow
@@ -1 +1 @@
-Subproject commit 1d93838f69a802639ca144ea5704a98e2481810d
+Subproject commit 9d9c464ce6883f52aaca9f913eec4cd50006c767
diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index 02e809c560f..c45d75bb3f2 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -230,6 +230,8 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/array/builder_nested.cc"
         "${LIBRARY_DIR}/array/builder_primitive.cc"
         "${LIBRARY_DIR}/array/builder_union.cc"
+        "${LIBRARY_DIR}/array/builder_run_end.cc"
+        "${LIBRARY_DIR}/array/array_run_end.cc"
         "${LIBRARY_DIR}/array/concatenate.cc"
         "${LIBRARY_DIR}/array/data.cc"
         "${LIBRARY_DIR}/array/diff.cc"
@@ -309,9 +311,12 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/util/debug.cc"
         "${LIBRARY_DIR}/util/tracing.cc"
         "${LIBRARY_DIR}/util/atfork_internal.cc"
+        "${LIBRARY_DIR}/util/crc32.cc"
+        "${LIBRARY_DIR}/util/hashing.cc"
+        "${LIBRARY_DIR}/util/ree_util.cc"
+        "${LIBRARY_DIR}/util/union_util.cc"
         "${LIBRARY_DIR}/vendored/base64.cpp"
         "${LIBRARY_DIR}/vendored/datetime/tz.cpp"
-
         "${LIBRARY_DIR}/vendored/musl/strptime.c"
         "${LIBRARY_DIR}/vendored/uriparser/UriCommon.c"
         "${LIBRARY_DIR}/vendored/uriparser/UriCompare.c"
@@ -328,39 +333,20 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/vendored/uriparser/UriRecompose.c"
         "${LIBRARY_DIR}/vendored/uriparser/UriResolve.c"
         "${LIBRARY_DIR}/vendored/uriparser/UriShorten.c"
+        "${LIBRARY_DIR}/vendored/double-conversion/bignum.cc"
+        "${LIBRARY_DIR}/vendored/double-conversion/bignum-dtoa.cc"
+        "${LIBRARY_DIR}/vendored/double-conversion/cached-powers.cc"
+        "${LIBRARY_DIR}/vendored/double-conversion/double-to-string.cc"
+        "${LIBRARY_DIR}/vendored/double-conversion/fast-dtoa.cc"
+        "${LIBRARY_DIR}/vendored/double-conversion/fixed-dtoa.cc"
+        "${LIBRARY_DIR}/vendored/double-conversion/string-to-double.cc"
+        "${LIBRARY_DIR}/vendored/double-conversion/strtod.cc"
 
         "${LIBRARY_DIR}/compute/api_aggregate.cc"
         "${LIBRARY_DIR}/compute/api_scalar.cc"
         "${LIBRARY_DIR}/compute/api_vector.cc"
         "${LIBRARY_DIR}/compute/cast.cc"
         "${LIBRARY_DIR}/compute/exec.cc"
-        "${LIBRARY_DIR}/compute/exec/accumulation_queue.cc"
-        "${LIBRARY_DIR}/compute/exec/accumulation_queue.h"
-        "${LIBRARY_DIR}/compute/exec/aggregate.cc"
-        "${LIBRARY_DIR}/compute/exec/aggregate_node.cc"
-        "${LIBRARY_DIR}/compute/exec/asof_join_node.cc"
-        "${LIBRARY_DIR}/compute/exec/bloom_filter.cc"
-        "${LIBRARY_DIR}/compute/exec/exec_plan.cc"
-        "${LIBRARY_DIR}/compute/exec/expression.cc"
-        "${LIBRARY_DIR}/compute/exec/filter_node.cc"
-        "${LIBRARY_DIR}/compute/exec/hash_join.cc"
-        "${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
-        "${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
-        "${LIBRARY_DIR}/compute/exec/key_hash.cc"
-        "${LIBRARY_DIR}/compute/exec/key_map.cc"
-        "${LIBRARY_DIR}/compute/exec/map_node.cc"
-        "${LIBRARY_DIR}/compute/exec/options.cc"
-        "${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
-        "${LIBRARY_DIR}/compute/exec/partition_util.cc"
-        "${LIBRARY_DIR}/compute/exec/project_node.cc"
-        "${LIBRARY_DIR}/compute/exec/query_context.cc"
-        "${LIBRARY_DIR}/compute/exec/sink_node.cc"
-        "${LIBRARY_DIR}/compute/exec/source_node.cc"
-        "${LIBRARY_DIR}/compute/exec/swiss_join.cc"
-        "${LIBRARY_DIR}/compute/exec/task_util.cc"
-        "${LIBRARY_DIR}/compute/exec/tpch_node.cc"
-        "${LIBRARY_DIR}/compute/exec/union_node.cc"
-        "${LIBRARY_DIR}/compute/exec/util.cc"
         "${LIBRARY_DIR}/compute/function.cc"
         "${LIBRARY_DIR}/compute/function_internal.cc"
         "${LIBRARY_DIR}/compute/kernel.cc"
@@ -403,8 +389,13 @@ set(ARROW_SRCS
         "${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
         "${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
         "${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
+        "${LIBRARY_DIR}/compute/kernels/vector_selection_internal.cc"
+        "${LIBRARY_DIR}/compute/kernels/vector_selection_filter_internal.cc"
+        "${LIBRARY_DIR}/compute/kernels/vector_selection_take_internal.cc"
         "${LIBRARY_DIR}/compute/light_array.cc"
         "${LIBRARY_DIR}/compute/registry.cc"
+        "${LIBRARY_DIR}/compute/expression.cc"
+        "${LIBRARY_DIR}/compute/ordering.cc"
         "${LIBRARY_DIR}/compute/row/compare_internal.cc"
         "${LIBRARY_DIR}/compute/row/encode_internal.cc"
         "${LIBRARY_DIR}/compute/row/grouper.cc"
@@ -488,10 +479,10 @@ set(PARQUET_SRCS
         "${LIBRARY_DIR}/exception.cc"
         "${LIBRARY_DIR}/file_reader.cc"
         "${LIBRARY_DIR}/file_writer.cc"
+        "${LIBRARY_DIR}/page_index.cc"
         "${LIBRARY_DIR}/level_conversion.cc"
         "${LIBRARY_DIR}/level_comparison.cc"
         "${LIBRARY_DIR}/metadata.cc"
-        "${LIBRARY_DIR}/murmur3.cc"
         "${LIBRARY_DIR}/platform.cc"
         "${LIBRARY_DIR}/printer.cc"
         "${LIBRARY_DIR}/properties.cc"
@@ -500,6 +491,8 @@ set(PARQUET_SRCS
         "${LIBRARY_DIR}/stream_reader.cc"
         "${LIBRARY_DIR}/stream_writer.cc"
         "${LIBRARY_DIR}/types.cc"
+        "${LIBRARY_DIR}/bloom_filter_reader.cc"
+        "${LIBRARY_DIR}/xxhasher.cc"
 
         "${GEN_LIBRARY_DIR}/parquet_constants.cpp"
         "${GEN_LIBRARY_DIR}/parquet_types.cpp"
diff --git a/tests/queries/0_stateless/02735_parquet_encoder.reference b/tests/queries/0_stateless/02735_parquet_encoder.reference
index a7ee82bc67f..143fde3093f 100644
--- a/tests/queries/0_stateless/02735_parquet_encoder.reference
+++ b/tests/queries/0_stateless/02735_parquet_encoder.reference
@@ -36,11 +36,11 @@ ipv6	Nullable(FixedString(16))
 1	1000000	1
 3914219105369203805
 4	1000000	1
-(1000000,0,NULL,'100','299')
-(1000000,0,NULL,'0','-1294970296')
-(1000000,0,NULL,'-2147483296','2147481000')
+(1000000,NULL,NULL,'100','299')
+(1000000,NULL,NULL,'0','-1294970296')
+(1000000,NULL,NULL,'-2147483296','2147481000')
 (100000,900000,NULL,'100009','999999')
-[(2,0,NULL,'','[]')]
+[(2,NULL,NULL,'','[]')]
 1	1
 0	1
 5090915589685802007
diff --git a/tests/queries/0_stateless/02884_parquet_new_encodings.reference b/tests/queries/0_stateless/02884_parquet_new_encodings.reference
new file mode 100644
index 00000000000..1034f208e18
--- /dev/null
+++ b/tests/queries/0_stateless/02884_parquet_new_encodings.reference
@@ -0,0 +1 @@
+SWEEP	SWETT	00459	\N	('20221206100111','+0100')	('20221206100111','+0100')	('20221206100111','+0100')	3	11	T	\N
diff --git a/tests/queries/0_stateless/02884_parquet_new_encodings.sh b/tests/queries/0_stateless/02884_parquet_new_encodings.sh
new file mode 100755
index 00000000000..01114c2e4f4
--- /dev/null
+++ b/tests/queries/0_stateless/02884_parquet_new_encodings.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/delta_lenght_byte_array_encoding.parquet')"
+
diff --git a/tests/queries/0_stateless/data_parquet/delta_lenght_byte_array_encoding.parquet b/tests/queries/0_stateless/data_parquet/delta_lenght_byte_array_encoding.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..cf785d97dc7032fdec8e51c8d3749c685ffc7a4d
GIT binary patch
literal 2795
zcmdUxO=ufO6vt;*@>-EMvg3G$9V!wiu?Q_qB+(RlDJeMM7EwfElH8V+GIAFrt+HAL
zdNYR7QcOwO9C|V-h2o~A1w${zgnW>Da6<@%LUL*_t`8w8K9tZmv%BM2$Lr=0DAme%
znD^dq{`)?5Re$$n1sm9c>*>W`zcy=F#|ZvRoW*DyAzho(vReMsnG+}Kk>4K+Pu7fd
zznPiH@8PBUH}WZd<?@@y-Uhz{T${H)exJMXOCF2hScJgiHMsu#{_&;Eb0nQBD^|<Z
zYNc9!t5PmkD)FuSc=zY0*_{LOub08~e+izz6Zqawzdss{?+nQzWl;Em`{G`=u6_2M
zO#PPE$d=a_9>br1`N=nhm?bkW3a;}a{!4M_IHJ~&=#2P3MwB2V4Kp*L0a`*ah}wf8
z?19LjQ445@`{^N|kgP1Vwr!|LQBy#MwcR>vTgZRL$i?-cmRu_Ob8%t794{5K25Hn}
z3X_>Ck{qr>&Jit=vvReh$>pYory~1`jvLW(F(Vx|GIIA$R_<OIlDqSU++8(rEhY@h
z<=QZwj&_%G;b~Z>e~(X_A9#V=a+^-zdR<HqHYat<=(FxQ=1-zp5oC+4Jw9u?J(qO@
zb$6oWwwc{$=Lf9YWOjGZX(Yoe-|5-AI33e1OPHHJ6B6ve?Eo=wIz5~Xj~;7#O{c9G
zR0X4hpc$IHR;$lqb-<;9>s)Z$w$lKWFAe-k;5*%Zi}`kQ5ZG|+OG3kwVot-lRqXd5
z*Ktkk3(W6BZQ|q}s{OG4_i$esAC&73|<cjP7AFR~GQ)MK{WJ;&?qCF)DP9`pI8
z%})oPmgjdwMWSpIofheO2^FzPCk>OvO@a~8NuAV7x%Z0jKr*q85P4R>6XdTPt|BF1
ztA%9DStaI+QHWXElZ@H1!DRT7ro?|ciV68`TYPH&3C0weDs8Ha70y`5;{%HB!>JAW
zTw@AogEUGjx*}|qL$BuX=@)`-Z)kb5iy4Lah%@I#63IH8-tp;5H&p5~PF)^JDov-R
zO1+a+q+T8a=%;2<>TWZXF80ch!oAA5znDp}yKz(H&KnB$I>%nz8_CVmNo3I7M^aR%
zMrx(ikORNXU<B@Q&dt5}9Z~g&lQOH4usAG~goJfYm@7ntXzF&FV-Y@0rb}yKcsPUe
z`yl*U0l%uWos~Z30>TGkSsmoBIOIVA27+wl;1OVw94rtut%I{}@Zq3wr0I2z_StL)
f-VCmHwCDKnE(qS3_2@dhJ6`aM77T>y@H6)xIfFfV

literal 0
HcmV?d00001


From bf9567aac4506102232f9cd1200e8ebae11d2c8a Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 25 Sep 2023 18:26:29 +0000
Subject: [PATCH 0129/1097] Fix typo in filename

---
 .../0_stateless/02884_parquet_new_encodings.sh      |   2 +-
 ...uet => delta_length_byte_array_encoding.parquet} | Bin
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename tests/queries/0_stateless/data_parquet/{delta_lenght_byte_array_encoding.parquet => delta_length_byte_array_encoding.parquet} (100%)

diff --git a/tests/queries/0_stateless/02884_parquet_new_encodings.sh b/tests/queries/0_stateless/02884_parquet_new_encodings.sh
index 01114c2e4f4..496ed126e23 100755
--- a/tests/queries/0_stateless/02884_parquet_new_encodings.sh
+++ b/tests/queries/0_stateless/02884_parquet_new_encodings.sh
@@ -5,5 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/delta_lenght_byte_array_encoding.parquet')"
+$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/delta_length_byte_array_encoding.parquet')"
 
diff --git a/tests/queries/0_stateless/data_parquet/delta_lenght_byte_array_encoding.parquet b/tests/queries/0_stateless/data_parquet/delta_length_byte_array_encoding.parquet
similarity index 100%
rename from tests/queries/0_stateless/data_parquet/delta_lenght_byte_array_encoding.parquet
rename to tests/queries/0_stateless/data_parquet/delta_length_byte_array_encoding.parquet

From ad67b6c2ea8d9733d94f71b9ba9adcd2dfdf7f15 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 30 Oct 2023 19:33:06 +0800
Subject: [PATCH 0130/1097] allow tuple field pruning

---
 .../Impl/NativeORCBlockInputFormat.cpp        | 150 +++++++++++++++++-
 1 file changed, 143 insertions(+), 7 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index b346ef3d232..0af4428b5f0 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -679,6 +679,47 @@ buildORCSearchArgument(const KeyCondition & key_condition, const Block & header,
 }
 
 
+static std::string toDotColumnPath(const std::vector<std::string> & columns)
+{
+    if (columns.empty())
+        return {};
+
+    std::ostringstream column_stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+    std::copy(columns.begin(), columns.end(), std::ostream_iterator<std::string>(column_stream, "."));
+    std::string column_path = column_stream.str();
+    return column_path.substr(0, column_path.length() - 1);
+}
+
+static void buildORCTypeNameIdMap(
+    const orc::Type * orc_type,
+    std::vector<std::string> & columns,
+    bool case_insensitive_column_matching,
+    std::map<uint64_t, const orc::Type *> & id_type_map,
+    std::map<std::string, uint64_t> & name_id_map)
+{
+    id_type_map[orc_type->getColumnId()] = orc_type;
+    if (orc::STRUCT == orc_type->getKind())
+    {
+        for (size_t i = 0; i < orc_type->getSubtypeCount(); ++i)
+        {
+            const std::string & field_name = orc_type->getFieldName(i);
+            columns.push_back(field_name);
+            auto column_path = toDotColumnPath(columns);
+            if (case_insensitive_column_matching)
+                boost::to_lower(column_path);
+            name_id_map[column_path] = orc_type->getSubtype(i)->getColumnId();
+            buildORCTypeNameIdMap(orc_type->getSubtype(i), columns, case_insensitive_column_matching, id_type_map, name_id_map);
+            columns.pop_back();
+        }
+    }
+    else
+    {
+        // other non-primitive type
+        for (size_t j = 0; j < orc_type->getSubtypeCount(); ++j)
+            buildORCTypeNameIdMap(orc_type->getSubtype(j), columns, case_insensitive_column_matching, id_type_map, name_id_map);
+    }
+}
+
 static void getFileReaderAndSchema(
     ReadBuffer & in,
     std::unique_ptr<orc::Reader> & file_reader,
@@ -706,6 +747,76 @@ static void getFileReaderAndSchema(
     }
 }
 
+static void updateIncludeIndices(
+    DataTypePtr type, const orc::Type * orc_type, bool case_insensitive_column_matching, std::unordered_set<UInt64> & column_indices)
+{
+    /// Primitive types
+    if (orc_type->getSubtypeCount() == 0)
+    {
+        column_indices.insert(orc_type->getColumnId());
+        return;
+    }
+
+    auto non_nullable_type = removeNullable(type);
+    switch (orc_type->getKind())
+    {
+        case orc::LIST: {
+            const auto * array_type = typeid_cast<const DataTypeArray *>(non_nullable_type.get());
+            if (array_type)
+            {
+                updateIncludeIndices(
+                    array_type->getNestedType(), orc_type->getSubtype(0), case_insensitive_column_matching, column_indices);
+            }
+            return;
+        }
+        case orc::MAP: {
+            const auto * map_type = typeid_cast<const DataTypeMap *>(non_nullable_type.get());
+            if (map_type)
+            {
+                updateIncludeIndices(map_type->getKeyType(), orc_type->getSubtype(0), case_insensitive_column_matching, column_indices);
+                updateIncludeIndices(map_type->getValueType(), orc_type->getSubtype(1), case_insensitive_column_matching, column_indices);
+            }
+            return;
+        }
+        case orc::STRUCT: {
+            const auto * tuple_type = typeid_cast<const DataTypeTuple *>(non_nullable_type.get());
+            if (tuple_type)
+            {
+                if (tuple_type->haveExplicitNames())
+                {
+                    const auto & names = tuple_type->getElementNames();
+                    for (size_t tuple_i = 0; tuple_i < names.size(); ++tuple_i)
+                    {
+                        const auto & name = names[tuple_i];
+                        for (size_t struct_i = 0; struct_i < orc_type->getSubtypeCount(); ++struct_i)
+                        {
+                            if (boost::equals(orc_type->getFieldName(struct_i), name)
+                                || (case_insensitive_column_matching && boost::iequals(orc_type->getFieldName(struct_i), name)))
+                            {
+                                updateIncludeIndices(
+                                    tuple_type->getElement(tuple_i),
+                                    orc_type->getSubtype(struct_i),
+                                    case_insensitive_column_matching,
+                                    column_indices);
+                                break;
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    for (size_t i = 0; i < tuple_type->getElements().size() && i < orc_type->getSubtypeCount(); ++i)
+                        updateIncludeIndices(
+                            tuple_type->getElement(i), orc_type->getSubtype(i), case_insensitive_column_matching, column_indices);
+                }
+            }
+            return;
+        }
+        default:
+            return;
+    }
+}
+
 NativeORCBlockInputFormat::NativeORCBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_)
     : IInputFormat(std::move(header_), &in_), format_settings(format_settings_), skip_stripes(format_settings.orc.skip_stripes)
 {
@@ -727,15 +838,39 @@ void NativeORCBlockInputFormat::prepareFileReader()
         format_settings.null_as_default,
         format_settings.orc.case_insensitive_column_matching);
 
-    const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
-    std::unordered_set<String> nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case);
 
-    for (size_t i = 0; i < schema.columns(); ++i)
+    const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
+    std::vector<std::string> columns;
+    std::map<uint64_t, const orc::Type *> id_type_map;
+    std::map<std::string, uint64_t> name_id_map;
+    buildORCTypeNameIdMap(&file_reader->getType(), columns, ignore_case, id_type_map, name_id_map);
+
+    // std::cout << "subtypes:" << file_reader->getType().getSubtypeCount() << std::endl;
+    // std::cout << "id type map" << std::endl;
+    // for (const auto & [k, v]: id_type_map)
+        // std::cout << "id:" << k << ", type:" << v->toString() << std::endl;
+    // std::cout << "name id map" << std::endl;
+    // for (const auto & [k, v]: name_id_map)
+        // std::cout << "name:" << k << ", id:" << v << std::endl;
+
+    const auto & header = getPort().getHeader();
+    std::unordered_set<UInt64> column_indices;
+    for (const auto & column : header)
     {
-        const auto & name = schema.getByPosition(i).name;
-        if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name))
-            include_indices.push_back(static_cast<int>(i));
+        auto name = column.name;
+        if (ignore_case)
+            boost::to_lower(name);
+
+        if (name_id_map.contains(name))
+        {
+            auto id = name_id_map[name];
+            if (id_type_map.contains(id))
+            {
+                updateIncludeIndices(column.type, id_type_map[id], ignore_case, column_indices);
+            }
+        }
     }
+    include_indices.assign(column_indices.begin(), column_indices.end());
 
     if (format_settings.orc.filter_push_down && key_condition && !sarg)
     {
@@ -816,6 +951,7 @@ Chunk NativeORCBlockInputFormat::generate()
     Chunk res;
     size_t num_rows = batch->numElements;
     const auto & schema = stripe_reader->getSelectedType();
+    // std::cout << "output schema:" << schema.toString() << std::endl;
     orc_column_to_ch_column->orcTableToCHChunk(res, &schema, batch.get(), num_rows, &block_missing_values);
 
     approx_bytes_read_for_chunk = num_rows * current_stripe_info->getLength() / current_stripe_info->getNumberOfRows();
@@ -1376,8 +1512,8 @@ static ColumnWithTypeAndName readColumnFromORCColumn(
             Columns tuple_elements;
             DataTypes tuple_types;
             std::vector<String> tuple_names;
-            const auto * tuple_type_hint = type_hint ? typeid_cast<const DataTypeTuple *>(type_hint.get()) : nullptr;
 
+            const auto * tuple_type_hint = type_hint ? typeid_cast<const DataTypeTuple *>(type_hint.get()) : nullptr;
             const auto * orc_struct_column = dynamic_cast<const orc::StructVectorBatch *>(orc_column);
             for (size_t i = 0; i < orc_type->getSubtypeCount(); ++i)
             {

From 8954b806b49b00f3c2c5a53b04ca80c31eb2e68c Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 30 Oct 2023 20:08:09 +0800
Subject: [PATCH 0131/1097] add uts

---
 .../02906_orc_tuple_field_prune.reference     | 108 ++++++++++++++++++
 .../02906_orc_tuple_field_prune.sql           |  38 ++++++
 2 files changed, 146 insertions(+)
 create mode 100644 tests/queries/0_stateless/02906_orc_tuple_field_prune.reference
 create mode 100644 tests/queries/0_stateless/02906_orc_tuple_field_prune.sql

diff --git a/tests/queries/0_stateless/02906_orc_tuple_field_prune.reference b/tests/queries/0_stateless/02906_orc_tuple_field_prune.reference
new file mode 100644
index 00000000000..dfdd38f5e8e
--- /dev/null
+++ b/tests/queries/0_stateless/02906_orc_tuple_field_prune.reference
@@ -0,0 +1,108 @@
+int64_column	Nullable(Int64)					
+string_column	Nullable(String)					
+float64_column	Nullable(Float64)					
+tuple_column	Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))					
+array_tuple_column	Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))					
+map_tuple_column	Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))					
+-- { echoOn }
+-- Test primitive types
+select int64_column, string_column, float64_column from file('02906.orc') where int64_column % 15 = 0;
+0	0	0
+15	15	15
+30	30	30
+45	45	45
+60	60	60
+75	75	75
+90	90	90
+-- Test tuple type with names
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))') where int64_column % 15 = 0;
+(NULL,NULL,NULL)
+('15',15,15)
+(NULL,NULL,NULL)
+('45',45,45)
+(NULL,NULL,NULL)
+('75',75,75)
+(NULL,NULL,NULL)
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(c Nullable(Int64))') where int64_column % 15 = 0;
+(NULL)
+(15)
+(NULL)
+(45)
+(NULL)
+(75)
+(NULL)
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(c Nullable(Int64), d Nullable(String))') where int64_column % 15 = 0;
+(NULL,NULL)
+(15,NULL)
+(NULL,NULL)
+(45,NULL)
+(NULL,NULL)
+(75,NULL)
+(NULL,NULL)
+-- Test tuple type without names
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(Nullable(String), Nullable(Float64), Nullable(Int64))') where int64_column % 15 = 0;
+(NULL,NULL,NULL)
+('15',15,15)
+(NULL,NULL,NULL)
+('45',45,45)
+(NULL,NULL,NULL)
+('75',75,75)
+(NULL,NULL,NULL)
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(Nullable(String), Nullable(Float64))') where int64_column % 15 = 0;
+(NULL,NULL)
+('15',15)
+(NULL,NULL)
+('45',45)
+(NULL,NULL)
+('75',75)
+(NULL,NULL)
+-- Test tuple nested in array
+select array_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') where int64_column % 15 = 0;
+[(NULL,NULL,NULL)]
+[('15',15,15)]
+[(NULL,NULL,NULL)]
+[('45',45,45)]
+[(NULL,NULL,NULL)]
+[('75',75,75)]
+[(NULL,NULL,NULL)]
+select array_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, array_tuple_column Array(Tuple(b Nullable(Float64), c Nullable(Int64)))') where int64_column % 15 = 0;
+[(NULL,NULL)]
+[(15,15)]
+[(NULL,NULL)]
+[(45,45)]
+[(NULL,NULL)]
+[(75,75)]
+[(NULL,NULL)]
+select array_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, array_tuple_column Array(Tuple(b Nullable(Float64), c Nullable(Int64), d Nullable(String)))') where int64_column % 15 = 0;
+[(NULL,NULL,NULL)]
+[(15,15,NULL)]
+[(NULL,NULL,NULL)]
+[(45,45,NULL)]
+[(NULL,NULL,NULL)]
+[(75,75,NULL)]
+[(NULL,NULL,NULL)]
+-- Test tuple nested in map
+select map_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') where int64_column % 15 = 0;
+{'0':(NULL,NULL,NULL)}
+{'15':('15',15,15)}
+{'30':(NULL,NULL,NULL)}
+{'45':('45',45,45)}
+{'60':(NULL,NULL,NULL)}
+{'75':('75',75,75)}
+{'90':(NULL,NULL,NULL)}
+select map_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, map_tuple_column Map(String, Tuple(b Nullable(Float64), c Nullable(Int64)))') where int64_column % 15 = 0;
+{'0':(NULL,NULL)}
+{'15':(15,15)}
+{'30':(NULL,NULL)}
+{'45':(45,45)}
+{'60':(NULL,NULL)}
+{'75':(75,75)}
+{'90':(NULL,NULL)}
+select map_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, map_tuple_column Map(String, Tuple(b Nullable(Float64), c Nullable(Int64), d Nullable(String)))') where int64_column % 15 = 0;
+{'0':(NULL,NULL,NULL)}
+{'15':(15,15,NULL)}
+{'30':(NULL,NULL,NULL)}
+{'45':(45,45,NULL)}
+{'60':(NULL,NULL,NULL)}
+{'75':(75,75,NULL)}
+{'90':(NULL,NULL,NULL)}
diff --git a/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql b/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql
new file mode 100644
index 00000000000..a7f2c31d3e1
--- /dev/null
+++ b/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql
@@ -0,0 +1,38 @@
+set engine_file_truncate_on_insert = 1;
+set flatten_nested = 0;
+
+insert into function file('02906.orc')
+select
+    number::Int64 as int64_column,
+    number::String as string_column,
+    number::Float64 as float64_column,
+    cast(if(number % 10 = 0, tuple(null, null, null), tuple(number::String, number::Float64, number::Int64)) as Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))) as tuple_column,
+    cast(if(number % 10 = 0, array(tuple(null, null, null)), array(tuple(number::String, number::Float64, number::Int64))) as Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))) as array_tuple_column,
+    cast(if(number % 10 = 0, map(number::String, tuple(null, null, null)), map(number::String, tuple(number::String, number::Float64, number::Int64))) as Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))) as map_tuple_column
+    from numbers(100);
+
+desc file('02906.orc');
+
+-- { echoOn }
+-- Test primitive types
+select int64_column, string_column, float64_column from file('02906.orc') where int64_column % 15 = 0;
+
+-- Test tuple type with names
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))') where int64_column % 15 = 0;
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(c Nullable(Int64))') where int64_column % 15 = 0;
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(c Nullable(Int64), d Nullable(String))') where int64_column % 15 = 0;
+
+-- Test tuple type without names
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(Nullable(String), Nullable(Float64), Nullable(Int64))') where int64_column % 15 = 0;
+select tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(Nullable(String), Nullable(Float64))') where int64_column % 15 = 0;
+
+-- Test tuple nested in array
+select array_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') where int64_column % 15 = 0;
+select array_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, array_tuple_column Array(Tuple(b Nullable(Float64), c Nullable(Int64)))') where int64_column % 15 = 0;
+select array_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, array_tuple_column Array(Tuple(b Nullable(Float64), c Nullable(Int64), d Nullable(String)))') where int64_column % 15 = 0;
+
+-- Test tuple nested in map
+select map_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') where int64_column % 15 = 0;
+select map_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, map_tuple_column Map(String, Tuple(b Nullable(Float64), c Nullable(Int64)))') where int64_column % 15 = 0;
+select map_tuple_column from file('02906.orc', 'ORC', 'int64_column Int64, map_tuple_column Map(String, Tuple(b Nullable(Float64), c Nullable(Int64), d Nullable(String)))') where int64_column % 15 = 0;
+-- { echoOff }

From 38f24c04558e4528ca1d9cf92ba170c400177569 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 30 Oct 2023 20:29:43 +0800
Subject: [PATCH 0132/1097] add performance tests

---
 tests/performance/orc_tuple_field_prune.xml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 tests/performance/orc_tuple_field_prune.xml

diff --git a/tests/performance/orc_tuple_field_prune.xml b/tests/performance/orc_tuple_field_prune.xml
new file mode 100644
index 00000000000..2bcd15c8635
--- /dev/null
+++ b/tests/performance/orc_tuple_field_prune.xml
@@ -0,0 +1,17 @@
+<test>
+    <settings>
+        <output_format_orc_string_as_string>1</output_format_orc_string_as_string>
+        <input_format_orc_row_batch_size>10000</input_format_orc_row_batch_size>
+        <flatten_nested>0</flatten_nested>
+    </settings>
+
+    <fill_query>
+        insert into function file('test_orc_tfp.orc') select * from generateRandom('int64_column Nullable(Int64), tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)), array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))), map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') limit 1000000
+    </fill_query>
+   
+    <drop_query>DROP TABLE IF EXISTS test_orc_tfp</drop_query>
+
+    <query>select * from file('test_orc_tfp.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(c Nullable(Int64))') format Null</query>
+    <query>select * from file('test_orc_tfp.orc', 'ORC', 'int64_column Int64, array_tuple_column Array(Tuple(c Nullable(Int64)))') format Null</query>
+    <query>select * from file('test_orc_tfp.orc', 'ORC', 'int64_column Int64, map_tuple_column Map(String, Tuple(c Nullable(Int64)))') format Null</query>
+</test>
\ No newline at end of file

From aef9ce0cf06f4541ca624e8b6711bf4575a20f40 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 30 Oct 2023 20:30:02 +0800
Subject: [PATCH 0133/1097] update orc version

---
 contrib/orc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/orc b/contrib/orc
index f31c271110a..2c31e314e4e 160000
--- a/contrib/orc
+++ b/contrib/orc
@@ -1 +1 @@
-Subproject commit f31c271110a2f0dac908a152f11708193ae209ee
+Subproject commit 2c31e314e4e36dcb1c58ca1cd7454fc4685af997

From 423df126254980d8a05e15659a8b45a479a8a5be Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 30 Oct 2023 20:38:56 +0800
Subject: [PATCH 0134/1097] update orc version

---
 contrib/orc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/orc b/contrib/orc
index 2c31e314e4e..5f8db0fb0a4 160000
--- a/contrib/orc
+++ b/contrib/orc
@@ -1 +1 @@
-Subproject commit 2c31e314e4e36dcb1c58ca1cd7454fc4685af997
+Subproject commit 5f8db0fb0a47fbc4902bf9d7f712e65309f13d2d

From 1e5703a77b50a1fb4f54afcaf0c322cdf989c3d7 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 30 Oct 2023 20:41:45 +0800
Subject: [PATCH 0135/1097] upgrade orc version

---
 contrib/orc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/orc b/contrib/orc
index 5f8db0fb0a4..5046972fbab 160000
--- a/contrib/orc
+++ b/contrib/orc
@@ -1 +1 @@
-Subproject commit 5f8db0fb0a47fbc4902bf9d7f712e65309f13d2d
+Subproject commit 5046972fbabfe3cdf77a8768228793c7c0a61085

From f53fdbeeadf7a2be2fa962e0ee0be91628348b93 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 30 Oct 2023 19:11:03 +0000
Subject: [PATCH 0136/1097] Fix vuild for s390

---
 contrib/arrow | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/arrow b/contrib/arrow
index 9d9c464ce68..8cdbf43f78a 160000
--- a/contrib/arrow
+++ b/contrib/arrow
@@ -1 +1 @@
-Subproject commit 9d9c464ce6883f52aaca9f913eec4cd50006c767
+Subproject commit 8cdbf43f78ad02615aef29dc7f9af0dea22a03e4

From 1237cef26dc3eaa2e967a6a48041e0703882e648 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 31 Oct 2023 00:53:46 +0100
Subject: [PATCH 0137/1097] address comments

---
 src/Interpreters/InterpreterExplainQuery.cpp               | 4 +++-
 src/Interpreters/InterpreterSelectQuery.cpp                | 2 +-
 .../QueryPlan/Optimizations/optimizePrewhere.cpp           | 2 +-
 src/Storages/IStorage.cpp                                  | 2 +-
 src/Storages/IStorage.h                                    | 2 +-
 src/Storages/MergeTree/MergeTreeData.cpp                   | 7 +++----
 src/Storages/MergeTree/MergeTreeData.h                     | 2 +-
 7 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index 39cc4df5c2d..5f1536a7220 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -66,8 +66,10 @@ namespace
 
         static void visit(ASTSelectQuery & select, ASTPtr & node, Data & data)
         {
+            /// we need to read statistic when `allow_statistic_optimize` is enabled.
+            bool only_analyze = !data.getContext()->getSettings().allow_statistic_optimize;
             InterpreterSelectQuery interpreter(
-                node, data.getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze().modify());
+                node, data.getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze(only_analyze).modify());
 
             const SelectQueryInfo & query_info = interpreter.getQueryInfo();
             if (query_info.view_query)
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 91dbb7b3514..cb52283d0e3 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -658,7 +658,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                 MergeTreeWhereOptimizer where_optimizer{
                     std::move(column_compressed_sizes),
                     metadata_snapshot,
-                    storage->getConditionEstimatorByPredicate(query_info, context),
+                    storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context),
                     queried_columns,
                     supported_prewhere_columns,
                     log};
diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index 544b23f884e..5c5171d4296 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -161,7 +161,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
     MergeTreeWhereOptimizer where_optimizer{
         std::move(column_compressed_sizes),
         storage_metadata,
-        storage.getConditionEstimatorByPredicate(read_from_merge_tree->getQueryInfo(), context),
+        storage.getConditionEstimatorByPredicate(read_from_merge_tree->getQueryInfo(), storage_snapshot, context),
         queried_columns,
         storage.supportedPrewhereColumns(),
         &Poco::Logger::get("QueryPlanOptimizePrewhere")};
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index 45a1b59a36e..8b4bae94b55 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -226,7 +226,7 @@ StorageID IStorage::getStorageID() const
     return storage_id;
 }
 
-ConditionEstimator IStorage::getConditionEstimatorByPredicate(const SelectQueryInfo &, ContextPtr) const
+ConditionEstimator IStorage::getConditionEstimatorByPredicate(const SelectQueryInfo &, const StorageSnapshotPtr &, ContextPtr) const
 {
     return {};
 }
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 80d6c7d5bba..a5a858f57f1 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -137,7 +137,7 @@ public:
     /// Returns true if the storage supports queries with the PREWHERE section.
     virtual bool supportsPrewhere() const { return false; }
 
-    virtual ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, ContextPtr) const;
+    virtual ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, const StorageSnapshotPtr &, ContextPtr) const;
 
     /// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported.
     /// This is needed for engines whose aggregates data from multiple tables, like Merge.
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 42ef7238409..92da5e5d75d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -444,14 +444,13 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const
     return storage_policy;
 }
 
-ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(const SelectQueryInfo & query_info, ContextPtr local_context) const
+ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr local_context) const
 {
     if (!local_context->getSettings().allow_statistic_optimize)
         return {};
 
-    auto parts = getDataPartsVectorForInternalUsage();
+    const auto & parts = assert_cast<const MergeTreeData::SnapshotData &>(*storage_snapshot->data).parts;
 
-    auto metadata_snapshot = getInMemoryMetadataPtr();
     if (parts.empty())
     {
         return {};
@@ -460,7 +459,7 @@ ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(const SelectQ
     ASTPtr expression_ast;
 
     ConditionEstimator result;
-    PartitionPruner partition_pruner(metadata_snapshot, query_info, local_context, true /* strict */);
+    PartitionPruner partition_pruner(storage_snapshot->metadata, query_info, local_context, true /* strict */);
 
     if (partition_pruner.isUseless())
     {
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 417e94cc4c4..47b134ba55b 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -432,7 +432,7 @@ public:
 
     bool supportsPrewhere() const override { return true; }
 
-    ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, ContextPtr) const override;
+    ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, const StorageSnapshotPtr &, ContextPtr) const override;
 
     bool supportsFinal() const override;
 

From e5db57204d42b578296a1d2f022f38641d702be9 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 31 Oct 2023 11:57:47 +0800
Subject: [PATCH 0138/1097] fix bugs

---
 contrib/orc                                   |  2 +-
 .../Impl/NativeORCBlockInputFormat.cpp        | 26 +++++++++----------
 .../02906_orc_tuple_field_prune.sql           |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/contrib/orc b/contrib/orc
index 5046972fbab..e24f2c2a3ca 160000
--- a/contrib/orc
+++ b/contrib/orc
@@ -1 +1 @@
-Subproject commit 5046972fbabfe3cdf77a8768228793c7c0a61085
+Subproject commit e24f2c2a3ca0769c96704ab20ad6f512a83ea2ad
diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 0af4428b5f0..88b3fbeee2b 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -52,19 +52,19 @@ ORCInputStream::ORCInputStream(SeekableReadBuffer & in_, size_t file_size_) : in
 {
 }
 
-uint64_t ORCInputStream::getLength() const
+UInt64 ORCInputStream::getLength() const
 {
     return file_size;
 }
 
-uint64_t ORCInputStream::getNaturalReadSize() const
+UInt64 ORCInputStream::getNaturalReadSize() const
 {
     return 128 * 1024;
 }
 
-void ORCInputStream::read(void * buf, uint64_t length, uint64_t offset)
+void ORCInputStream::read(void * buf, UInt64 length, UInt64 offset)
 {
-    if (offset != static_cast<uint64_t>(in.getPosition()))
+    if (offset != static_cast<UInt64>(in.getPosition()))
         in.seek(offset, SEEK_SET);
 
     in.readStrict(reinterpret_cast<char *>(buf), length);
@@ -102,7 +102,7 @@ std::unique_ptr<orc::InputStream> asORCInputStreamLoadIntoMemory(ReadBuffer & in
 
 static const orc::Type * getORCTypeByName(const orc::Type & schema, const String & name, bool case_insensitive_column_matching)
 {
-    for (uint64_t i = 0; i != schema.getSubtypeCount(); ++i)
+    for (UInt64 i = 0; i != schema.getSubtypeCount(); ++i)
         if (boost::equals(schema.getFieldName(i), name)
             || (case_insensitive_column_matching && boost::iequals(schema.getFieldName(i), name)))
             return schema.getSubtype(i);
@@ -694,8 +694,8 @@ static void buildORCTypeNameIdMap(
     const orc::Type * orc_type,
     std::vector<std::string> & columns,
     bool case_insensitive_column_matching,
-    std::map<uint64_t, const orc::Type *> & id_type_map,
-    std::map<std::string, uint64_t> & name_id_map)
+    std::map<UInt64, const orc::Type *> & id_type_map,
+    std::map<std::string, UInt64> & name_id_map)
 {
     id_type_map[orc_type->getColumnId()] = orc_type;
     if (orc::STRUCT == orc_type->getKind())
@@ -841,8 +841,8 @@ void NativeORCBlockInputFormat::prepareFileReader()
 
     const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
     std::vector<std::string> columns;
-    std::map<uint64_t, const orc::Type *> id_type_map;
-    std::map<std::string, uint64_t> name_id_map;
+    std::map<UInt64, const orc::Type *> id_type_map;
+    std::map<std::string, UInt64> name_id_map;
     buildORCTypeNameIdMap(&file_reader->getType(), columns, ignore_case, id_type_map, name_id_map);
 
     // std::cout << "subtypes:" << file_reader->getType().getSubtypeCount() << std::endl;
@@ -854,7 +854,7 @@ void NativeORCBlockInputFormat::prepareFileReader()
         // std::cout << "name:" << k << ", id:" << v << std::endl;
 
     const auto & header = getPort().getHeader();
-    std::unordered_set<UInt64> column_indices;
+    std::unordered_set<UInt64> include_typeids;
     for (const auto & column : header)
     {
         auto name = column.name;
@@ -866,11 +866,11 @@ void NativeORCBlockInputFormat::prepareFileReader()
             auto id = name_id_map[name];
             if (id_type_map.contains(id))
             {
-                updateIncludeIndices(column.type, id_type_map[id], ignore_case, column_indices);
+                updateIncludeIndices(column.type, id_type_map[id], ignore_case, include_typeids);
             }
         }
     }
-    include_indices.assign(column_indices.begin(), column_indices.end());
+    include_indices.assign(include_typeids.begin(), include_typeids.end());
 
     if (format_settings.orc.filter_push_down && key_condition && !sarg)
     {
@@ -895,7 +895,7 @@ bool NativeORCBlockInputFormat::prepareStripeReader()
         throw Exception(ErrorCodes::INCORRECT_DATA, "ORC stripe {} has no rows", current_stripe);
 
     orc::RowReaderOptions row_reader_options;
-    row_reader_options.include(include_indices);
+    row_reader_options.includeTypes(include_indices);
     row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength());
     if (format_settings.orc.filter_push_down && sarg)
     {
diff --git a/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql b/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql
index a7f2c31d3e1..834caa1da53 100644
--- a/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql
+++ b/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql
@@ -1,7 +1,7 @@
 set engine_file_truncate_on_insert = 1;
 set flatten_nested = 0;
 
-insert into function file('02906.orc')
+insert into function file('02906.orc', 'ORC')
 select
     number::Int64 as int64_column,
     number::String as string_column,

From c97b2c5be74a73305a2c0dbc905dc59ac77c0fd3 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 31 Oct 2023 12:00:45 +0800
Subject: [PATCH 0139/1097] fix code style

---
 .../Impl/NativeORCBlockInputFormat.cpp        | 24 +++++++++----------
 tests/performance/orc_tuple_field_prune.xml   |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 88b3fbeee2b..fd0f4ee0ca0 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -747,13 +747,13 @@ static void getFileReaderAndSchema(
     }
 }
 
-static void updateIncludeIndices(
-    DataTypePtr type, const orc::Type * orc_type, bool case_insensitive_column_matching, std::unordered_set<UInt64> & column_indices)
+static void updateIncludeTypeIds(
+    DataTypePtr type, const orc::Type * orc_type, bool case_insensitive_column_matching, std::unordered_set<UInt64> & include_typeids)
 {
     /// Primitive types
     if (orc_type->getSubtypeCount() == 0)
     {
-        column_indices.insert(orc_type->getColumnId());
+        include_typeids.insert(orc_type->getColumnId());
         return;
     }
 
@@ -764,8 +764,8 @@ static void updateIncludeIndices(
             const auto * array_type = typeid_cast<const DataTypeArray *>(non_nullable_type.get());
             if (array_type)
             {
-                updateIncludeIndices(
-                    array_type->getNestedType(), orc_type->getSubtype(0), case_insensitive_column_matching, column_indices);
+                updateIncludeTypeIds(
+                    array_type->getNestedType(), orc_type->getSubtype(0), case_insensitive_column_matching, include_typeids);
             }
             return;
         }
@@ -773,8 +773,8 @@ static void updateIncludeIndices(
             const auto * map_type = typeid_cast<const DataTypeMap *>(non_nullable_type.get());
             if (map_type)
             {
-                updateIncludeIndices(map_type->getKeyType(), orc_type->getSubtype(0), case_insensitive_column_matching, column_indices);
-                updateIncludeIndices(map_type->getValueType(), orc_type->getSubtype(1), case_insensitive_column_matching, column_indices);
+                updateIncludeTypeIds(map_type->getKeyType(), orc_type->getSubtype(0), case_insensitive_column_matching, include_typeids);
+                updateIncludeTypeIds(map_type->getValueType(), orc_type->getSubtype(1), case_insensitive_column_matching, include_typeids);
             }
             return;
         }
@@ -793,11 +793,11 @@ static void updateIncludeIndices(
                             if (boost::equals(orc_type->getFieldName(struct_i), name)
                                 || (case_insensitive_column_matching && boost::iequals(orc_type->getFieldName(struct_i), name)))
                             {
-                                updateIncludeIndices(
+                                updateIncludeTypeIds(
                                     tuple_type->getElement(tuple_i),
                                     orc_type->getSubtype(struct_i),
                                     case_insensitive_column_matching,
-                                    column_indices);
+                                    include_typeids);
                                 break;
                             }
                         }
@@ -806,8 +806,8 @@ static void updateIncludeIndices(
                 else
                 {
                     for (size_t i = 0; i < tuple_type->getElements().size() && i < orc_type->getSubtypeCount(); ++i)
-                        updateIncludeIndices(
-                            tuple_type->getElement(i), orc_type->getSubtype(i), case_insensitive_column_matching, column_indices);
+                        updateIncludeTypeIds(
+                            tuple_type->getElement(i), orc_type->getSubtype(i), case_insensitive_column_matching, include_typeids);
                 }
             }
             return;
@@ -866,7 +866,7 @@ void NativeORCBlockInputFormat::prepareFileReader()
             auto id = name_id_map[name];
             if (id_type_map.contains(id))
             {
-                updateIncludeIndices(column.type, id_type_map[id], ignore_case, include_typeids);
+                updateIncludeTypeIds(column.type, id_type_map[id], ignore_case, include_typeids);
             }
         }
     }
diff --git a/tests/performance/orc_tuple_field_prune.xml b/tests/performance/orc_tuple_field_prune.xml
index 2bcd15c8635..4e338733329 100644
--- a/tests/performance/orc_tuple_field_prune.xml
+++ b/tests/performance/orc_tuple_field_prune.xml
@@ -6,7 +6,7 @@
     </settings>
 
     <fill_query>
-        insert into function file('test_orc_tfp.orc') select * from generateRandom('int64_column Nullable(Int64), tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)), array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))), map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') limit 1000000
+        insert into function file('test_orc_tfp.orc', 'ORC') select * from generateRandom('int64_column Nullable(Int64), tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)), array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))), map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') limit 1000000
     </fill_query>
    
     <drop_query>DROP TABLE IF EXISTS test_orc_tfp</drop_query>

From 5e21d2459a00b38601bb2c5709795a9bda72fa65 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 31 Oct 2023 14:19:51 +0800
Subject: [PATCH 0140/1097] fix failed fast test

---
 tests/queries/0_stateless/02906_orc_tuple_field_prune.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql b/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql
index 834caa1da53..5428abc40de 100644
--- a/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql
+++ b/tests/queries/0_stateless/02906_orc_tuple_field_prune.sql
@@ -1,3 +1,5 @@
+-- Tags: no-fasttest, no-parallel
+
 set engine_file_truncate_on_insert = 1;
 set flatten_nested = 0;
 

From ce36a6475dcff34f9d4f0510c6608bdf261478e8 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 31 Oct 2023 10:12:31 +0100
Subject: [PATCH 0141/1097] Fixed cland tidy build

---
 src/Interpreters/executeQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 27fbce5311c..9655e8b2855 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -732,7 +732,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             is_create_parameterized_view = create_query->isParameterizedView();
         else if (const auto * explain_query = ast->as<ASTExplainQuery>())
         {
-            assert(explain_query->children.size() >= 1);
+            assert(!explain_query->children.empty());
             if (const auto * create_of_explain_query = explain_query->children[0]->as<ASTCreateQuery>())
                 is_create_parameterized_view = create_of_explain_query->isParameterizedView();
         }

From 7c5a7fc03aa5a6f2675c9769976413d03f2b9f01 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 31 Oct 2023 17:17:21 +0800
Subject: [PATCH 0142/1097] update orc version

---
 contrib/orc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/orc b/contrib/orc
index e24f2c2a3ca..f31c271110a 160000
--- a/contrib/orc
+++ b/contrib/orc
@@ -1 +1 @@
-Subproject commit e24f2c2a3ca0769c96704ab20ad6f512a83ea2ad
+Subproject commit f31c271110a2f0dac908a152f11708193ae209ee

From 11b00e92fdf34621a26152f607ffa55f2eb09a7a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 31 Oct 2023 12:27:21 +0100
Subject: [PATCH 0143/1097] Add more details to "Data after merge is not
 byte-identical to data on another replicas"

It should be a very rare error, but in case of error you need as much
details as there are:
- print mismatched info (hash, size)
- print all files with size and checksum (this may help for instance
  when new file is added, or when extesion changed -- compression marks)

P.S. maybe detach_not_byte_identical_parts should be enabled by default?
Thoughts?

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  1 +
 .../MergeTree/MergeFromLogEntryTask.cpp       | 13 ++++++-
 .../MergeTree/MergeTreeDataPartChecksum.cpp   | 38 +++++++++++++++----
 3 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 30c9b19fcbc..5201eefccf1 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -364,6 +364,7 @@ public:
     void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; }
 
     size_t getFileSizeOrZero(const String & file_name) const;
+    auto getFilesChecksums() const { return checksums.files; }
 
     /// Moves a part to detached/ directory and adds prefix to its name
     void renameToDetached(const String & prefix);
diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
index 883cfee89c8..5d64950ab58 100644
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@@ -357,6 +357,13 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite
 
             ProfileEvents::increment(ProfileEvents::DataAfterMergeDiffersFromReplica);
 
+            Strings files_with_size;
+            for (const auto & file : part->getFilesChecksums())
+            {
+                files_with_size.push_back(fmt::format("{}: {} ({})",
+                    file.first, file.second.file_size, getHexUIntLowercase(file.second.file_hash)));
+            }
+
             LOG_ERROR(log,
                 "{}. Data after merge is not byte-identical to data on another replicas. There could be several reasons:"
                 " 1. Using newer version of compression library after server update."
@@ -368,8 +375,10 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite
                 " 7. Manual modification of source data after server startup."
                 " 8. Manual modification of checksums stored in ZooKeeper."
                 " 9. Part format related settings like 'enable_mixed_granularity_parts' are different on different replicas."
-                " We will download merged part from replica to force byte-identical result.",
-                getCurrentExceptionMessage(false));
+                " We will download merged part from replica to force byte-identical result."
+                " List of files in local parts:\n{}",
+                getCurrentExceptionMessage(false),
+                fmt::join(files_with_size, "\n"));
 
             write_part_log(ExecutionStatus::fromCurrentException("", true));
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
index ed2202fcb19..4bda5ce469d 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
@@ -34,15 +34,27 @@ void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs
         if (!rhs.is_compressed)
             throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "No uncompressed checksum for file {}", name);
         if (rhs.uncompressed_size != uncompressed_size)
-            throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part", name);
+        {
+            throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part ({} vs {})",
+                name, uncompressed_size, rhs.uncompressed_size);
+        }
         if (rhs.uncompressed_hash != uncompressed_hash)
-            throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part", name);
+        {
+            throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part ({} vs {})",
+                name, getHexUIntLowercase(uncompressed_hash), getHexUIntLowercase(rhs.uncompressed_hash));
+        }
         return;
     }
     if (rhs.file_size != file_size)
-        throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part", name);
+    {
+        throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part ({} vs {})",
+            name, file_size, rhs.file_size);
+    }
     if (rhs.file_hash != file_hash)
-        throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part", name);
+    {
+        throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part ({} vs {})",
+            name, getHexUIntLowercase(file_hash), getHexUIntLowercase(rhs.file_hash));
+    }
 }
 
 void MergeTreeDataPartChecksum::checkSize(const IDataPartStorage & storage, const String & name) const
@@ -446,17 +458,29 @@ void MinimalisticDataPartChecksums::checkEqualImpl(const MinimalisticDataPartChe
     Strings errors;
 
     if (hash_of_uncompressed_files != rhs.hash_of_uncompressed_files)
-        errors.emplace_back("hash of uncompressed files doesn't match");
+    {
+        errors.emplace_back(fmt::format("hash of uncompressed files doesn't match ({} vs {})",
+            getHexUIntLowercase(hash_of_uncompressed_files),
+            getHexUIntLowercase(rhs.hash_of_uncompressed_files)));
+    }
 
     if (check_uncompressed_hash_in_compressed_files)
     {
         if (uncompressed_hash_of_compressed_files != rhs.uncompressed_hash_of_compressed_files)
-            errors.emplace_back("uncompressed hash of compressed files doesn't match");
+        {
+            errors.emplace_back(fmt::format("uncompressed hash of compressed files doesn't match ({} vs {})",
+                getHexUIntLowercase(uncompressed_hash_of_compressed_files),
+                getHexUIntLowercase(rhs.uncompressed_hash_of_compressed_files)));
+        }
     }
     else
     {
         if (hash_of_all_files != rhs.hash_of_all_files)
-            errors.emplace_back("total hash of all files doesn't match");
+        {
+            errors.emplace_back(fmt::format("total hash of all files doesn't match ({} vs {})",
+                getHexUIntLowercase(hash_of_all_files),
+                getHexUIntLowercase(rhs.hash_of_all_files)));
+        }
     }
 
     if (!errors.empty())

From 8a1ab02b96e234a9cdc1018b692381e8e4abf9c2 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Tue, 31 Oct 2023 12:59:16 +0100
Subject: [PATCH 0144/1097] Update s3_cache.xml

---
 tests/config/users.d/s3_cache.xml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/config/users.d/s3_cache.xml b/tests/config/users.d/s3_cache.xml
index 4740f37a90c..69b24ecbbc4 100644
--- a/tests/config/users.d/s3_cache.xml
+++ b/tests/config/users.d/s3_cache.xml
@@ -3,7 +3,6 @@
         <default>
           <enable_filesystem_cache_on_write_operations>1</enable_filesystem_cache_on_write_operations>
           <enable_filesystem_cache>1</enable_filesystem_cache>
-          <filesystem_cache_getorset_batch_size>10</filesystem_cache_getorset_batch_size>
         </default>
     </profiles>
 </clickhouse>

From 52a3d37ebe6008fc4301d369f6309587b32e648b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 27 Oct 2023 16:59:14 +0200
Subject: [PATCH 0145/1097] Try reducing number of different images

---
 .../integration/test_backward_compatibility/test.py  |  2 +-
 .../test_aggregate_fixed_key.py                      |  2 +-
 .../test_convert_ordinary.py                         |  2 +-
 .../test_cte_distributed.py                          |  2 +-
 .../test_insert_profile_events.py                    |  2 +-
 .../test_memory_bound_aggregation.py                 |  4 ++--
 .../test_normalized_count_comparison.py              |  2 +-
 .../test_select_aggregate_alias_column.py            |  2 +-
 .../test_vertical_merges_from_compact_parts.py       |  2 +-
 .../test_default_compression_codec/test.py           |  2 +-
 tests/integration/test_disk_over_web_server/test.py  |  2 +-
 .../test_distributed_backward_compatability/test.py  |  2 +-
 .../test.py                                          |  2 +-
 .../test_distributed_inter_server_secret/test.py     |  2 +-
 .../test_groupBitmapAnd_on_distributed/test.py       |  2 +-
 tests/integration/test_old_versions/test.py          |  2 +-
 tests/integration/test_polymorphic_parts/test.py     |  2 +-
 .../test_replicated_merge_tree_compatibility/test.py |  4 ++--
 tests/integration/test_replicating_constants/test.py |  2 +-
 tests/integration/test_ttl_replicated/test.py        |  6 +++---
 tests/integration/test_version_update/test.py        | 12 ++++++------
 .../test_version_update_after_mutation/test.py       |  6 +++---
 22 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/tests/integration/test_backward_compatibility/test.py b/tests/integration/test_backward_compatibility/test.py
index 6f21b184a95..847483f2b9b 100644
--- a/tests/integration/test_backward_compatibility/test.py
+++ b/tests/integration/test_backward_compatibility/test.py
@@ -7,7 +7,7 @@ node1 = cluster.add_instance(
     "node1",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="19.17.8.54",
+    tag="19.16.9.37",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py
index cf258987cbf..94bc1d3bfc9 100644
--- a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py
+++ b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py
@@ -7,7 +7,7 @@ node1 = cluster.add_instance(
     "node1",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="21.3",
+    tag="20.8.11.17",
     with_installed_binary=True,
     allow_analyzer=False,
 )
diff --git a/tests/integration/test_backward_compatibility/test_convert_ordinary.py b/tests/integration/test_backward_compatibility/test_convert_ordinary.py
index 36facdd59b1..034a68e0f30 100644
--- a/tests/integration/test_backward_compatibility/test_convert_ordinary.py
+++ b/tests/integration/test_backward_compatibility/test_convert_ordinary.py
@@ -5,7 +5,7 @@ cluster = ClickHouseCluster(__file__)
 node = cluster.add_instance(
     "node",
     image="yandex/clickhouse-server",
-    tag="19.17.8.54",
+    tag="19.16.9.37",
     stay_alive=True,
     with_zookeeper=True,
     with_installed_binary=True,
diff --git a/tests/integration/test_backward_compatibility/test_cte_distributed.py b/tests/integration/test_backward_compatibility/test_cte_distributed.py
index c68468aad75..d47ae3aa255 100644
--- a/tests/integration/test_backward_compatibility/test_cte_distributed.py
+++ b/tests/integration/test_backward_compatibility/test_cte_distributed.py
@@ -8,7 +8,7 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=False,
     image="yandex/clickhouse-server",
-    tag="21.7.3.14",
+    tag="21.6",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_backward_compatibility/test_insert_profile_events.py b/tests/integration/test_backward_compatibility/test_insert_profile_events.py
index 8564c6b5952..d38bece7855 100644
--- a/tests/integration/test_backward_compatibility/test_insert_profile_events.py
+++ b/tests/integration/test_backward_compatibility/test_insert_profile_events.py
@@ -11,7 +11,7 @@ upstream_node = cluster.add_instance("upstream_node", allow_analyzer=False)
 old_node = cluster.add_instance(
     "old_node",
     image="clickhouse/clickhouse-server",
-    tag="22.5.1.2079",
+    tag="22.6",
     with_installed_binary=True,
     allow_analyzer=False,
 )
diff --git a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py
index 96b41c81384..5261a279a4f 100644
--- a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py
+++ b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py
@@ -7,7 +7,7 @@ node1 = cluster.add_instance(
     "node1",
     with_zookeeper=False,
     image="yandex/clickhouse-server",
-    tag="21.1",
+    tag="20.8.11.17",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
@@ -16,7 +16,7 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=False,
     image="yandex/clickhouse-server",
-    tag="21.1",
+    tag="20.8.11.17",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py b/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py
index 3cd708d5029..cf7a25e8dc1 100644
--- a/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py
+++ b/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py
@@ -8,7 +8,7 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=False,
     image="yandex/clickhouse-server",
-    tag="21.7.2.7",
+    tag="21.6",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py
index 7e10b6ab430..ec1d7fedac5 100644
--- a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py
+++ b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py
@@ -8,7 +8,7 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=False,
     image="yandex/clickhouse-server",
-    tag="21.7.2.7",
+    tag="21.6",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
index 9c9d1a4d312..e0a9b5ebad6 100644
--- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
+++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
@@ -7,7 +7,7 @@ cluster = ClickHouseCluster(__file__)
 node_old = cluster.add_instance(
     "node1",
     image="clickhouse/clickhouse-server",
-    tag="22.8",
+    tag="22.6",
     stay_alive=True,
     with_installed_binary=True,
     with_zookeeper=True,
diff --git a/tests/integration/test_default_compression_codec/test.py b/tests/integration/test_default_compression_codec/test.py
index 82d5eb04d2a..db116ff42f3 100644
--- a/tests/integration/test_default_compression_codec/test.py
+++ b/tests/integration/test_default_compression_codec/test.py
@@ -29,7 +29,7 @@ node3 = cluster.add_instance(
     "node3",
     main_configs=["configs/default_compression.xml", "configs/wide_parts_only.xml"],
     image="yandex/clickhouse-server",
-    tag="20.3.16",
+    tag="19.16.9.37",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py
index 7695d235425..a71fdeff302 100644
--- a/tests/integration/test_disk_over_web_server/test.py
+++ b/tests/integration/test_disk_over_web_server/test.py
@@ -38,7 +38,7 @@ def cluster():
             stay_alive=True,
             with_installed_binary=True,
             image="clickhouse/clickhouse-server",
-            tag="22.8.14.53",
+            tag="22.6",
             allow_analyzer=False,
         )
 
diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py
index c48a7ad1fa1..319a4c08e60 100644
--- a/tests/integration/test_distributed_backward_compatability/test.py
+++ b/tests/integration/test_distributed_backward_compatability/test.py
@@ -8,7 +8,7 @@ node_old = cluster.add_instance(
     "node1",
     main_configs=["configs/remote_servers.xml"],
     image="yandex/clickhouse-server",
-    tag="20.8.9.6",
+    tag="20.8.11.17",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_distributed_insert_backward_compatibility/test.py b/tests/integration/test_distributed_insert_backward_compatibility/test.py
index 1e566d5e2da..7cfea61ffff 100644
--- a/tests/integration/test_distributed_insert_backward_compatibility/test.py
+++ b/tests/integration/test_distributed_insert_backward_compatibility/test.py
@@ -11,7 +11,7 @@ node_dist = cluster.add_instance(
     "node2",
     main_configs=["configs/remote_servers.xml"],
     image="yandex/clickhouse-server",
-    tag="21.11.9.1",
+    tag="21.6",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py
index 1aeaddcf3c5..62beeee80e1 100644
--- a/tests/integration/test_distributed_inter_server_secret/test.py
+++ b/tests/integration/test_distributed_inter_server_secret/test.py
@@ -31,7 +31,7 @@ backward = make_instance(
     "configs/remote_servers_backward.xml",
     image="clickhouse/clickhouse-server",
     # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2
-    tag="23.2.3",
+    tag="22.6",
     with_installed_binary=True,
     allow_analyzer=False,
 )
diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/test.py b/tests/integration/test_groupBitmapAnd_on_distributed/test.py
index 8cf7e0fb2c1..5d3dda8ecf2 100644
--- a/tests/integration/test_groupBitmapAnd_on_distributed/test.py
+++ b/tests/integration/test_groupBitmapAnd_on_distributed/test.py
@@ -26,7 +26,7 @@ node4 = cluster.add_instance(
     "node4",
     main_configs=["configs/clusters.xml"],
     image="yandex/clickhouse-server",
-    tag="21.5",
+    tag="21.6",
     with_zookeeper=True,
     allow_analyzer=False,
 )
diff --git a/tests/integration/test_old_versions/test.py b/tests/integration/test_old_versions/test.py
index aff07c53114..b59bfcc4f6b 100644
--- a/tests/integration/test_old_versions/test.py
+++ b/tests/integration/test_old_versions/test.py
@@ -55,7 +55,7 @@ node19_13 = cluster.add_instance(
 node19_16 = cluster.add_instance(
     "node19_16",
     image="yandex/clickhouse-server",
-    tag="19.16.2.2",
+    tag="19.16.9.37",
     with_installed_binary=True,
     main_configs=["configs/config.d/test_cluster.xml"],
     allow_analyzer=False,
diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py
index debb509de90..ba9b5ec6cac 100644
--- a/tests/integration/test_polymorphic_parts/test.py
+++ b/tests/integration/test_polymorphic_parts/test.py
@@ -360,7 +360,7 @@ node7 = cluster.add_instance(
     user_configs=["configs_old/users.d/not_optimize_count.xml"],
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="19.17.8.54",
+    tag="20.8.11.17",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_replicated_merge_tree_compatibility/test.py b/tests/integration/test_replicated_merge_tree_compatibility/test.py
index c30a0d86c98..32a44aa65b9 100644
--- a/tests/integration/test_replicated_merge_tree_compatibility/test.py
+++ b/tests/integration/test_replicated_merge_tree_compatibility/test.py
@@ -6,7 +6,7 @@ node1 = cluster.add_instance(
     "node1",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="20.12.4.5",
+    tag="20.8.11.17",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
@@ -15,7 +15,7 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="20.12.4.5",
+    tag="20.8.11.17",
     stay_alive=True,
     with_installed_binary=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_replicating_constants/test.py b/tests/integration/test_replicating_constants/test.py
index 00781e473c7..9669e890cd3 100644
--- a/tests/integration/test_replicating_constants/test.py
+++ b/tests/integration/test_replicating_constants/test.py
@@ -9,7 +9,7 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="19.1.14",
+    tag="19.16.9.37",
     with_installed_binary=True,
     allow_analyzer=False,
 )
diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py
index 117ebe37dd2..29ce2b3dc8d 100644
--- a/tests/integration/test_ttl_replicated/test.py
+++ b/tests/integration/test_ttl_replicated/test.py
@@ -17,7 +17,7 @@ node4 = cluster.add_instance(
     "node4",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="20.12.4.5",
+    tag="20.8.11.17",
     stay_alive=True,
     with_installed_binary=True,
     main_configs=[
@@ -30,7 +30,7 @@ node5 = cluster.add_instance(
     "node5",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="20.12.4.5",
+    tag="20.8.11.17",
     stay_alive=True,
     with_installed_binary=True,
     main_configs=[
@@ -42,7 +42,7 @@ node6 = cluster.add_instance(
     "node6",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="20.12.4.5",
+    tag="20.8.11.17",
     stay_alive=True,
     with_installed_binary=True,
     main_configs=[
diff --git a/tests/integration/test_version_update/test.py b/tests/integration/test_version_update/test.py
index b8fa3e7ebb4..a752960bc76 100644
--- a/tests/integration/test_version_update/test.py
+++ b/tests/integration/test_version_update/test.py
@@ -12,18 +12,18 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="21.2",
+    tag="20.8.11.17",
     with_installed_binary=True,
     stay_alive=True,
     allow_analyzer=False,
 )
 
-# Use differents nodes because if there is node.restart_from_latest_version(), then in later tests
+# Use different nodes because if there is node.restart_from_latest_version(), then in later tests
 # it will be with latest version, but shouldn't, order of tests in CI is shuffled.
 node3 = cluster.add_instance(
     "node3",
     image="yandex/clickhouse-server",
-    tag="21.5",
+    tag="21.6",
     with_installed_binary=True,
     stay_alive=True,
     allow_analyzer=False,
@@ -31,7 +31,7 @@ node3 = cluster.add_instance(
 node4 = cluster.add_instance(
     "node4",
     image="yandex/clickhouse-server",
-    tag="21.5",
+    tag="21.6",
     with_installed_binary=True,
     stay_alive=True,
     allow_analyzer=False,
@@ -39,7 +39,7 @@ node4 = cluster.add_instance(
 node5 = cluster.add_instance(
     "node5",
     image="yandex/clickhouse-server",
-    tag="21.5",
+    tag="21.6",
     with_installed_binary=True,
     stay_alive=True,
     allow_analyzer=False,
@@ -47,7 +47,7 @@ node5 = cluster.add_instance(
 node6 = cluster.add_instance(
     "node6",
     image="yandex/clickhouse-server",
-    tag="21.5",
+    tag="21.6",
     with_installed_binary=True,
     stay_alive=True,
     allow_analyzer=False,
diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py
index f3ae190ee46..9fb396b1c14 100644
--- a/tests/integration/test_version_update_after_mutation/test.py
+++ b/tests/integration/test_version_update_after_mutation/test.py
@@ -10,7 +10,7 @@ node1 = cluster.add_instance(
     "node1",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="20.4.9.110",
+    tag="20.8.11.17",
     with_installed_binary=True,
     stay_alive=True,
     main_configs=[
@@ -22,7 +22,7 @@ node2 = cluster.add_instance(
     "node2",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="20.4.9.110",
+    tag="20.8.11.17",
     with_installed_binary=True,
     stay_alive=True,
     main_configs=[
@@ -34,7 +34,7 @@ node3 = cluster.add_instance(
     "node3",
     with_zookeeper=True,
     image="yandex/clickhouse-server",
-    tag="20.4.9.110",
+    tag="20.8.11.17",
     with_installed_binary=True,
     stay_alive=True,
     main_configs=[

From 957671bf744cd173676f5be0f8ca14d0f03118df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 31 Oct 2023 18:06:28 +0100
Subject: [PATCH 0146/1097] Adapt to work with releases without DROP SYNC

---
 tests/integration/test_ttl_replicated/test.py | 222 ++++++++----------
 1 file changed, 102 insertions(+), 120 deletions(-)

diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py
index 29ce2b3dc8d..119a211ae45 100644
--- a/tests/integration/test_ttl_replicated/test.py
+++ b/tests/integration/test_ttl_replicated/test.py
@@ -66,47 +66,41 @@ def started_cluster():
         cluster.shutdown()
 
 
-def drop_table(nodes, table_name):
-    for node in nodes:
-        node.query("DROP TABLE IF EXISTS {} SYNC".format(table_name))
-
-
 # Column TTL works only with wide parts, because it's very expensive to apply it for compact parts
 def test_ttl_columns(started_cluster):
-    drop_table([node1, node2], "test_ttl")
+    table_name = f"test_ttl_{node1.name}_{node2.name}"
     for node in [node1, node2]:
         node.query(
             """
-                CREATE TABLE test_ttl(date DateTime, id UInt32, a Int32 TTL date + INTERVAL 1 DAY, b Int32 TTL date + INTERVAL 1 MONTH)
+                CREATE TABLE {table_name}(date DateTime, id UInt32, a Int32 TTL date + INTERVAL 1 DAY, b Int32 TTL date + INTERVAL 1 MONTH)
                 ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_columns', '{replica}')
                 ORDER BY id PARTITION BY toDayOfMonth(date)
                 SETTINGS merge_with_ttl_timeout=0, min_bytes_for_wide_part=0, max_merge_selecting_sleep_ms=6000;
             """.format(
-                replica=node.name
+                table_name=table_name, replica=node.name
             )
         )
 
     node1.query(
-        "INSERT INTO test_ttl VALUES (toDateTime('2000-10-10 00:00:00'), 1, 1, 3)"
+        f"INSERT INTO {table_name} VALUES (toDateTime('2000-10-10 00:00:00'), 1, 1, 3)"
     )
     node1.query(
-        "INSERT INTO test_ttl VALUES (toDateTime('2000-10-11 10:00:00'), 2, 2, 4)"
+        f"INSERT INTO {table_name} VALUES (toDateTime('2000-10-11 10:00:00'), 2, 2, 4)"
     )
     time.sleep(1)  # sleep to allow use ttl merge selector for second time
-    node1.query("OPTIMIZE TABLE test_ttl FINAL")
+    node1.query(f"OPTIMIZE TABLE {table_name} FINAL")
 
     expected = "1\t0\t0\n2\t0\t0\n"
-    assert TSV(node1.query("SELECT id, a, b FROM test_ttl ORDER BY id")) == TSV(
+    assert TSV(node1.query(f"SELECT id, a, b FROM {table_name}  ORDER BY id")) == TSV(
         expected
     )
-    assert TSV(node2.query("SELECT id, a, b FROM test_ttl ORDER BY id")) == TSV(
+    assert TSV(node2.query(f"SELECT id, a, b FROM {table_name}  ORDER BY id")) == TSV(
         expected
     )
 
 
 def test_merge_with_ttl_timeout(started_cluster):
-    table = "test_merge_with_ttl_timeout"
-    drop_table([node1, node2], table)
+    table = f"test_merge_with_ttl_timeout_{node1.name}_{node2.name}"
     for node in [node1, node2]:
         node.query(
             """
@@ -157,11 +151,11 @@ def test_merge_with_ttl_timeout(started_cluster):
 
 
 def test_ttl_many_columns(started_cluster):
-    drop_table([node1, node2], "test_ttl_2")
+    table = f"test_ttl_2{node1.name}_{node2.name}"
     for node in [node1, node2]:
         node.query(
             """
-                CREATE TABLE test_ttl_2(date DateTime, id UInt32,
+                CREATE TABLE {table}(date DateTime, id UInt32,
                     a Int32 TTL date,
                     _idx Int32 TTL date,
                     _offset Int32 TTL date,
@@ -169,44 +163,40 @@ def test_ttl_many_columns(started_cluster):
                 ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_2', '{replica}')
                 ORDER BY id PARTITION BY toDayOfMonth(date) SETTINGS merge_with_ttl_timeout=0, max_merge_selecting_sleep_ms=6000;
             """.format(
-                replica=node.name
+                table=table, replica=node.name
             )
         )
 
-    node1.query("SYSTEM STOP TTL MERGES test_ttl_2")
-    node2.query("SYSTEM STOP TTL MERGES test_ttl_2")
+    node1.query(f"SYSTEM STOP TTL MERGES {table}")
+    node2.query(f"SYSTEM STOP TTL MERGES {table}")
 
     node1.query(
-        "INSERT INTO test_ttl_2 VALUES (toDateTime('2000-10-10 00:00:00'), 1, 2, 3, 4, 5)"
+        f"INSERT INTO {table} VALUES (toDateTime('2000-10-10 00:00:00'), 1, 2, 3, 4, 5)"
     )
     node1.query(
-        "INSERT INTO test_ttl_2 VALUES (toDateTime('2100-10-10 10:00:00'), 6, 7, 8, 9, 10)"
+        f"INSERT INTO {table} VALUES (toDateTime('2100-10-10 10:00:00'), 6, 7, 8, 9, 10)"
     )
 
-    node2.query("SYSTEM SYNC REPLICA test_ttl_2", timeout=5)
+    node2.query(f"SYSTEM SYNC REPLICA {table}", timeout=5)
 
     # Check that part will appear in result of merge
-    node1.query("SYSTEM STOP FETCHES test_ttl_2")
-    node2.query("SYSTEM STOP FETCHES test_ttl_2")
+    node1.query(f"SYSTEM STOP FETCHES {table}")
+    node2.query(f"SYSTEM STOP FETCHES {table}")
 
-    node1.query("SYSTEM START TTL MERGES test_ttl_2")
-    node2.query("SYSTEM START TTL MERGES test_ttl_2")
+    node1.query(f"SYSTEM START TTL MERGES {table}")
+    node2.query(f"SYSTEM START TTL MERGES {table}")
 
     time.sleep(1)  # sleep to allow use ttl merge selector for second time
-    node1.query("OPTIMIZE TABLE test_ttl_2 FINAL", timeout=5)
+    node1.query(f"OPTIMIZE TABLE {table} FINAL", timeout=5)
 
-    node2.query("SYSTEM SYNC REPLICA test_ttl_2", timeout=5)
+    node2.query(f"SYSTEM SYNC REPLICA {table}", timeout=5)
 
     expected = "1\t0\t0\t0\t0\n6\t7\t8\t9\t10\n"
     assert TSV(
-        node1.query(
-            "SELECT id, a, _idx, _offset, _partition FROM test_ttl_2 ORDER BY id"
-        )
+        node1.query(f"SELECT id, a, _idx, _offset, _partition FROM {table} ORDER BY id")
     ) == TSV(expected)
     assert TSV(
-        node2.query(
-            "SELECT id, a, _idx, _offset, _partition FROM test_ttl_2 ORDER BY id"
-        )
+        node2.query(f"SELECT id, a, _idx, _offset, _partition FROM {table} ORDER BY id")
     ) == TSV(expected)
 
 
@@ -218,107 +208,107 @@ def test_ttl_many_columns(started_cluster):
     ],
 )
 def test_ttl_table(started_cluster, delete_suffix):
-    drop_table([node1, node2], "test_ttl")
+    table = f"test_ttl_table_{delete_suffix}_{node1.name}_{node2.name}"
     for node in [node1, node2]:
         node.query(
             """
-                CREATE TABLE test_ttl(date DateTime, id UInt32)
-                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl', '{replica}')
+                CREATE TABLE {table}(date DateTime, id UInt32)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{table}', '{replica}')
                 ORDER BY id PARTITION BY toDayOfMonth(date)
                 TTL date + INTERVAL 1 DAY {delete_suffix} SETTINGS merge_with_ttl_timeout=0, max_merge_selecting_sleep_ms=6000;
             """.format(
-                replica=node.name, delete_suffix=delete_suffix
+                table=table, replica=node.name, delete_suffix=delete_suffix
             )
         )
 
-    node1.query("INSERT INTO test_ttl VALUES (toDateTime('2000-10-10 00:00:00'), 1)")
-    node1.query("INSERT INTO test_ttl VALUES (toDateTime('2000-10-11 10:00:00'), 2)")
+    node1.query(f"INSERT INTO {table} VALUES (toDateTime('2000-10-10 00:00:00'), 1)")
+    node1.query(f"INSERT INTO {table} VALUES (toDateTime('2000-10-11 10:00:00'), 2)")
     time.sleep(1)  # sleep to allow use ttl merge selector for second time
-    node1.query("OPTIMIZE TABLE test_ttl FINAL")
+    node1.query(f"OPTIMIZE TABLE {table} FINAL")
 
-    assert TSV(node1.query("SELECT * FROM test_ttl")) == TSV("")
-    assert TSV(node2.query("SELECT * FROM test_ttl")) == TSV("")
+    assert TSV(node1.query(f"SELECT * FROM {table}")) == TSV("")
+    assert TSV(node2.query(f"SELECT * FROM {table}")) == TSV("")
 
 
 def test_modify_ttl(started_cluster):
-    drop_table([node1, node2], "test_ttl")
+    table = f"test_modify_ttl_{node1.name}_{node2.name}"
     for node in [node1, node2]:
         node.query(
             """
-                CREATE TABLE test_ttl(d DateTime, id UInt32)
-                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_modify', '{replica}')
+                CREATE TABLE {table}(d DateTime, id UInt32)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{table}', '{replica}')
                 ORDER BY id
             """.format(
-                replica=node.name
+                table=table, replica=node.name
             )
         )
 
     node1.query(
-        "INSERT INTO test_ttl VALUES (now() - INTERVAL 5 HOUR, 1), (now() - INTERVAL 3 HOUR, 2), (now() - INTERVAL 1 HOUR, 3)"
+        f"INSERT INTO {table} VALUES (now() - INTERVAL 5 HOUR, 1), (now() - INTERVAL 3 HOUR, 2), (now() - INTERVAL 1 HOUR, 3)"
     )
-    node2.query("SYSTEM SYNC REPLICA test_ttl", timeout=20)
+    node2.query(f"SYSTEM SYNC REPLICA {table}", timeout=20)
 
     node1.query(
-        "ALTER TABLE test_ttl MODIFY TTL d + INTERVAL 4 HOUR SETTINGS replication_alter_partitions_sync = 2"
+        f"ALTER TABLE {table} MODIFY TTL d + INTERVAL 4 HOUR SETTINGS replication_alter_partitions_sync = 2"
     )
-    assert node2.query("SELECT id FROM test_ttl") == "2\n3\n"
+    assert node2.query(f"SELECT id FROM {table}") == "2\n3\n"
 
     node2.query(
-        "ALTER TABLE test_ttl MODIFY TTL d + INTERVAL 2 HOUR SETTINGS replication_alter_partitions_sync = 2"
+        f"ALTER TABLE {table} MODIFY TTL d + INTERVAL 2 HOUR SETTINGS replication_alter_partitions_sync = 2"
     )
-    assert node1.query("SELECT id FROM test_ttl") == "3\n"
+    assert node1.query(f"SELECT id FROM {table}") == "3\n"
 
     node1.query(
-        "ALTER TABLE test_ttl MODIFY TTL d + INTERVAL 30 MINUTE SETTINGS replication_alter_partitions_sync = 2"
+        f"ALTER TABLE {table} MODIFY TTL d + INTERVAL 30 MINUTE SETTINGS replication_alter_partitions_sync = 2"
     )
-    assert node2.query("SELECT id FROM test_ttl") == ""
+    assert node2.query(f"SELECT id FROM {table}") == ""
 
 
 def test_modify_column_ttl(started_cluster):
-    drop_table([node1, node2], "test_ttl")
+    table = f"test_modify_column_ttl_{node1.name}_{node2.name}"
     for node in [node1, node2]:
         node.query(
             """
-                CREATE TABLE test_ttl(d DateTime, id UInt32 DEFAULT 42)
-                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_column', '{replica}')
+                CREATE TABLE {table}(d DateTime, id UInt32 DEFAULT 42)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{table}', '{replica}')
                 ORDER BY d
             """.format(
-                replica=node.name
+                table=table, replica=node.name
             )
         )
 
     node1.query(
-        "INSERT INTO test_ttl VALUES (now() - INTERVAL 5 HOUR, 1), (now() - INTERVAL 3 HOUR, 2), (now() - INTERVAL 1 HOUR, 3)"
+        f"INSERT INTO {table} VALUES (now() - INTERVAL 5 HOUR, 1), (now() - INTERVAL 3 HOUR, 2), (now() - INTERVAL 1 HOUR, 3)"
     )
-    node2.query("SYSTEM SYNC REPLICA test_ttl", timeout=20)
+    node2.query(f"SYSTEM SYNC REPLICA {table}", timeout=20)
 
     node1.query(
-        "ALTER TABLE test_ttl MODIFY COLUMN id UInt32 TTL d + INTERVAL 4 HOUR SETTINGS replication_alter_partitions_sync = 2"
+        f"ALTER TABLE {table} MODIFY COLUMN id UInt32 TTL d + INTERVAL 4 HOUR SETTINGS replication_alter_partitions_sync = 2"
     )
-    assert node2.query("SELECT id FROM test_ttl") == "42\n2\n3\n"
+    assert node2.query(f"SELECT id FROM {table}") == "42\n2\n3\n"
 
     node1.query(
-        "ALTER TABLE test_ttl MODIFY COLUMN id UInt32 TTL d + INTERVAL 2 HOUR SETTINGS replication_alter_partitions_sync = 2"
+        f"ALTER TABLE {table} MODIFY COLUMN id UInt32 TTL d + INTERVAL 2 HOUR SETTINGS replication_alter_partitions_sync = 2"
     )
-    assert node1.query("SELECT id FROM test_ttl") == "42\n42\n3\n"
+    assert node1.query(f"SELECT id FROM {table}") == "42\n42\n3\n"
 
     node1.query(
-        "ALTER TABLE test_ttl MODIFY COLUMN id UInt32 TTL d + INTERVAL 30 MINUTE SETTINGS replication_alter_partitions_sync = 2"
+        f"ALTER TABLE {table} MODIFY COLUMN id UInt32 TTL d + INTERVAL 30 MINUTE SETTINGS replication_alter_partitions_sync = 2"
     )
-    assert node2.query("SELECT id FROM test_ttl") == "42\n42\n42\n"
+    assert node2.query(f"SELECT id FROM {table}") == "42\n42\n42\n"
 
 
 def test_ttl_double_delete_rule_returns_error(started_cluster):
-    drop_table([node1, node2], "test_ttl")
+    table = "test_ttl_double_delete_rule_returns_error"
     try:
         node1.query(
             """
-            CREATE TABLE test_ttl(date DateTime, id UInt32)
-            ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_double_delete', '{replica}')
+            CREATE TABLE {table}(date DateTime, id UInt32)
+            ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{table}', '{replica}')
             ORDER BY id PARTITION BY toDayOfMonth(date)
             TTL date + INTERVAL 1 DAY, date + INTERVAL 2 DAY SETTINGS merge_with_ttl_timeout=0, max_merge_selecting_sleep_ms=6000
         """.format(
-                replica=node1.name
+                table=table, replica=node1.name
             )
         )
         assert False
@@ -364,7 +354,6 @@ def test_ttl_alter_delete(started_cluster, name, engine):
     for a table that has TTL delete expression defined but
     no explicit storage policy assigned.
     """
-    drop_table([node1], name)
 
     node1.query(
         """
@@ -426,7 +415,6 @@ def test_ttl_alter_delete(started_cluster, name, engine):
 
 
 def test_ttl_empty_parts(started_cluster):
-    drop_table([node1, node2], "test_ttl_empty_parts")
     for node in [node1, node2]:
         node.query(
             """
@@ -519,65 +507,59 @@ def test_ttl_empty_parts(started_cluster):
     [(node1, node2, 0), (node3, node4, 1), (node5, node6, 2)],
 )
 def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
-    drop_table([node_left, node_right], "test_ttl_delete")
-    drop_table([node_left, node_right], "test_ttl_group_by")
-    drop_table([node_left, node_right], "test_ttl_where")
-
+    table = f"test_ttl_compatibility_{node_left.name}_{node_right.name}_{num_run}"
     for node in [node_left, node_right]:
         node.query(
             """
-                CREATE TABLE test_ttl_delete(date DateTime, id UInt32)
-                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_delete_{suff}', '{replica}')
+                CREATE TABLE {table}_delete(date DateTime, id UInt32)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{table}_delete', '{replica}')
                 ORDER BY id PARTITION BY toDayOfMonth(date)
                 TTL date + INTERVAL 3 SECOND
-                SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100
             """.format(
-                suff=num_run, replica=node.name
+                table=table, replica=node.name
             )
         )
 
         node.query(
             """
-                CREATE TABLE test_ttl_group_by(date DateTime, id UInt32, val UInt64)
-                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_group_by_{suff}', '{replica}')
+                CREATE TABLE {table}_group_by(date DateTime, id UInt32, val UInt64)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{table}_group_by', '{replica}')
                 ORDER BY id PARTITION BY toDayOfMonth(date)
                 TTL date + INTERVAL 3 SECOND GROUP BY id SET val = sum(val)
-                SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100
             """.format(
-                suff=num_run, replica=node.name
+                table=table, replica=node.name
             )
         )
 
         node.query(
             """
-                CREATE TABLE test_ttl_where(date DateTime, id UInt32)
-                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_where_{suff}', '{replica}')
+                CREATE TABLE {table}_where(date DateTime, id UInt32)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{table}_where', '{replica}')
                 ORDER BY id PARTITION BY toDayOfMonth(date)
                 TTL date + INTERVAL 3 SECOND DELETE WHERE id % 2 = 1
-                SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100
             """.format(
-                suff=num_run, replica=node.name
+                table=table, replica=node.name
             )
         )
 
-    node_left.query("INSERT INTO test_ttl_delete VALUES (now(), 1)")
+    node_left.query(f"INSERT INTO {table}_delete VALUES (now(), 1)")
     node_left.query(
-        "INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 2)"
+        f"INSERT INTO {table}_delete VALUES (toDateTime('2100-10-11 10:00:00'), 2)"
     )
-    node_right.query("INSERT INTO test_ttl_delete VALUES (now(), 3)")
+    node_right.query(f"INSERT INTO {table}_delete VALUES (now(), 3)")
     node_right.query(
-        "INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 4)"
+        f"INSERT INTO {table}_delete VALUES (toDateTime('2100-10-11 10:00:00'), 4)"
     )
 
-    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 1)")
-    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 2)")
-    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 3)")
-    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 4)")
+    node_left.query(f"INSERT INTO {table}_group_by VALUES (now(), 0, 1)")
+    node_left.query(f"INSERT INTO {table}_group_by VALUES (now(), 0, 2)")
+    node_right.query(f"INSERT INTO {table}_group_by VALUES (now(), 0, 3)")
+    node_right.query(f"INSERT INTO {table}_group_by VALUES (now(), 0, 4)")
 
-    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 1)")
-    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 2)")
-    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 3)")
-    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 4)")
+    node_left.query(f"INSERT INTO {table}_where VALUES (now(), 1)")
+    node_left.query(f"INSERT INTO {table}_where VALUES (now(), 2)")
+    node_right.query(f"INSERT INTO {table}_where VALUES (now(), 3)")
+    node_right.query(f"INSERT INTO {table}_where VALUES (now(), 4)")
 
     if node_left.with_installed_binary:
         node_left.restart_with_latest_version()
@@ -588,13 +570,13 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
     time.sleep(5)  # Wait for TTL
 
     # after restart table can be in readonly mode
-    exec_query_with_retry(node_right, "OPTIMIZE TABLE test_ttl_delete FINAL")
-    node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL")
-    node_right.query("OPTIMIZE TABLE test_ttl_where FINAL")
+    exec_query_with_retry(node_right, f"OPTIMIZE TABLE {table}_delete FINAL")
+    node_right.query(f"OPTIMIZE TABLE {table}_group_by FINAL")
+    node_right.query(f"OPTIMIZE TABLE {table}_where FINAL")
 
-    exec_query_with_retry(node_left, "OPTIMIZE TABLE test_ttl_delete FINAL")
-    node_left.query("OPTIMIZE TABLE test_ttl_group_by FINAL", timeout=20)
-    node_left.query("OPTIMIZE TABLE test_ttl_where FINAL", timeout=20)
+    exec_query_with_retry(node_left, f"OPTIMIZE TABLE {table}_delete FINAL")
+    node_left.query(f"OPTIMIZE TABLE {table}_group_by FINAL", timeout=20)
+    node_left.query(f"OPTIMIZE TABLE {table}_where FINAL", timeout=20)
 
     # After OPTIMIZE TABLE, it is not guaranteed that everything is merged.
     # Possible scenario (for test_ttl_group_by):
@@ -605,19 +587,19 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
     # 4. OPTIMIZE FINAL does nothing, cause there is an entry for 0_3
     #
     # So, let's also sync replicas for node_right (for now).
-    exec_query_with_retry(node_right, "SYSTEM SYNC REPLICA test_ttl_delete")
-    node_right.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20)
-    node_right.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20)
+    exec_query_with_retry(node_right, f"SYSTEM SYNC REPLICA {table}_delete")
+    node_right.query(f"SYSTEM SYNC REPLICA {table}_group_by", timeout=20)
+    node_right.query(f"SYSTEM SYNC REPLICA {table}_where", timeout=20)
 
-    exec_query_with_retry(node_left, "SYSTEM SYNC REPLICA test_ttl_delete")
-    node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20)
-    node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20)
+    exec_query_with_retry(node_left, f"SYSTEM SYNC REPLICA {table}_delete")
+    node_left.query(f"SYSTEM SYNC REPLICA {table}_group_by", timeout=20)
+    node_left.query(f"SYSTEM SYNC REPLICA {table}_where", timeout=20)
 
-    assert node_left.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
-    assert node_right.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
+    assert node_left.query(f"SELECT id FROM {table}_delete ORDER BY id") == "2\n4\n"
+    assert node_right.query(f"SELECT id FROM {table}_delete ORDER BY id") == "2\n4\n"
 
-    assert node_left.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
-    assert node_right.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
+    assert node_left.query(f"SELECT val FROM {table}_group_by ORDER BY id") == "10\n"
+    assert node_right.query(f"SELECT val FROM {table}_group_by ORDER BY id") == "10\n"
 
-    assert node_left.query("SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"
-    assert node_right.query("SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"
+    assert node_left.query(f"SELECT id FROM {table}_where ORDER BY id") == "2\n4\n"
+    assert node_right.query(f"SELECT id FROM {table}_where ORDER BY id") == "2\n4\n"

From f2f84fe6b7f49ae3bad1ed6f8c19c608f73d53d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 31 Oct 2023 18:24:33 +0100
Subject: [PATCH 0147/1097] Adapt version changes

---
 .../test_vertical_merges_from_compact_parts.py              | 2 +-
 tests/integration/test_default_compression_codec/test.py    | 2 +-
 .../integration/test_version_update_after_mutation/test.py  | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
index e0a9b5ebad6..9c9d1a4d312 100644
--- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
+++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
@@ -7,7 +7,7 @@ cluster = ClickHouseCluster(__file__)
 node_old = cluster.add_instance(
     "node1",
     image="clickhouse/clickhouse-server",
-    tag="22.6",
+    tag="22.8",
     stay_alive=True,
     with_installed_binary=True,
     with_zookeeper=True,
diff --git a/tests/integration/test_default_compression_codec/test.py b/tests/integration/test_default_compression_codec/test.py
index db116ff42f3..ffe22c62325 100644
--- a/tests/integration/test_default_compression_codec/test.py
+++ b/tests/integration/test_default_compression_codec/test.py
@@ -27,7 +27,7 @@ node2 = cluster.add_instance(
 )
 node3 = cluster.add_instance(
     "node3",
-    main_configs=["configs/default_compression.xml", "configs/wide_parts_only.xml"],
+    main_configs=["configs/default_compression.xml"],
     image="yandex/clickhouse-server",
     tag="19.16.9.37",
     stay_alive=True,
diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py
index 9fb396b1c14..4e84b4c10ca 100644
--- a/tests/integration/test_version_update_after_mutation/test.py
+++ b/tests/integration/test_version_update_after_mutation/test.py
@@ -72,8 +72,8 @@ def test_mutate_and_upgrade(start_cluster):
     node1.query("DETACH TABLE mt")  # stop being leader
     node1.query("SYSTEM FLUSH LOGS")
     node2.query("SYSTEM FLUSH LOGS")
-    node1.restart_with_latest_version(signal=9, fix_metadata=True)
-    node2.restart_with_latest_version(signal=9, fix_metadata=True)
+    node1.restart_with_latest_version(signal=9, fix_metadata=False)
+    node2.restart_with_latest_version(signal=9, fix_metadata=False)
 
     # After hard restart table can be in readonly mode
     exec_query_with_retry(
@@ -129,7 +129,7 @@ def test_upgrade_while_mutation(start_cluster):
     # (We could be in process of creating some system table, which will leave empty directory on restart,
     # so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files)
     node3.query("SYSTEM FLUSH LOGS")
-    node3.restart_with_latest_version(signal=9, fix_metadata=True)
+    node3.restart_with_latest_version(signal=9, fix_metadata=False)
 
     # checks for readonly
     exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60)

From b27658742223e750902ec3f181d2a662fc7bba1f Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 1 Nov 2023 15:43:20 +0800
Subject: [PATCH 0148/1097] fix failed uts

---
 .../Impl/NativeORCBlockInputFormat.cpp        | 130 ++++++++++++------
 1 file changed, 90 insertions(+), 40 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index fd0f4ee0ca0..2c3db6432c8 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -100,11 +100,11 @@ std::unique_ptr<orc::InputStream> asORCInputStreamLoadIntoMemory(ReadBuffer & in
     return std::make_unique<ORCInputStreamFromString>(std::move(file_data), file_size);
 }
 
-static const orc::Type * getORCTypeByName(const orc::Type & schema, const String & name, bool case_insensitive_column_matching)
+static const orc::Type * getORCTypeByName(const orc::Type & schema, const String & name, bool ignore_case)
 {
     for (UInt64 i = 0; i != schema.getSubtypeCount(); ++i)
         if (boost::equals(schema.getFieldName(i), name)
-            || (case_insensitive_column_matching && boost::iequals(schema.getFieldName(i), name)))
+            || (ignore_case && boost::iequals(schema.getFieldName(i), name)))
             return schema.getSubtype(i);
     return nullptr;
 }
@@ -690,10 +690,10 @@ static std::string toDotColumnPath(const std::vector<std::string> & columns)
     return column_path.substr(0, column_path.length() - 1);
 }
 
-static void buildORCTypeNameIdMap(
+[[maybe_unused]] static void buildORCTypeNameIdMap(
     const orc::Type * orc_type,
     std::vector<std::string> & columns,
-    bool case_insensitive_column_matching,
+    bool ignore_case,
     std::map<UInt64, const orc::Type *> & id_type_map,
     std::map<std::string, UInt64> & name_id_map)
 {
@@ -705,10 +705,10 @@ static void buildORCTypeNameIdMap(
             const std::string & field_name = orc_type->getFieldName(i);
             columns.push_back(field_name);
             auto column_path = toDotColumnPath(columns);
-            if (case_insensitive_column_matching)
+            if (ignore_case)
                 boost::to_lower(column_path);
             name_id_map[column_path] = orc_type->getSubtype(i)->getColumnId();
-            buildORCTypeNameIdMap(orc_type->getSubtype(i), columns, case_insensitive_column_matching, id_type_map, name_id_map);
+            buildORCTypeNameIdMap(orc_type->getSubtype(i), columns, ignore_case, id_type_map, name_id_map);
             columns.pop_back();
         }
     }
@@ -716,7 +716,7 @@ static void buildORCTypeNameIdMap(
     {
         // other non-primitive type
         for (size_t j = 0; j < orc_type->getSubtypeCount(); ++j)
-            buildORCTypeNameIdMap(orc_type->getSubtype(j), columns, case_insensitive_column_matching, id_type_map, name_id_map);
+            buildORCTypeNameIdMap(orc_type->getSubtype(j), columns, ignore_case, id_type_map, name_id_map);
     }
 }
 
@@ -747,10 +747,54 @@ static void getFileReaderAndSchema(
     }
 }
 
-static void updateIncludeTypeIds(
-    DataTypePtr type, const orc::Type * orc_type, bool case_insensitive_column_matching, std::unordered_set<UInt64> & include_typeids)
+static const orc::Type * traverseDownORCTypeByName(
+    const std::string & target,
+    const orc::Type * orc_type,
+    DataTypePtr & type,
+    bool ignore_case)
 {
-    /// Primitive types
+    // std::cout << "target:" << target << ", orc_type:" << orc_type->toString() << ", type:" << type->getName() << std::endl;
+    if (target.empty())
+        return orc_type;
+
+    auto split = Nested::splitName(target);
+    if (orc::STRUCT == orc_type->getKind())
+    {
+        const auto * orc_field_type = getORCTypeByName(*orc_type, split.first, ignore_case);
+        return orc_field_type ? traverseDownORCTypeByName(split.second, orc_field_type, type, ignore_case) : nullptr;
+    }
+    else if (orc::LIST == orc_type->getKind())
+    {
+        /// For cases in which header contains subcolumns flattened from nested columns.
+        /// For example, "a Nested(x String, y Int64)" is flattened to "a.x Array(String), a.y Array(Int64)", and orc file schema is still "a array<struct<x string, y long>>".
+        /// In this case, we should skip possible array type and traverse down to its nested struct type.
+        const auto * array_type = typeid_cast<const DataTypeArray *>(removeNullable(type).get());
+        const auto * orc_nested_type = orc_type->getSubtype(0);
+        if (array_type && orc::STRUCT == orc_nested_type->getKind())
+        {
+            const auto * orc_field_type = getORCTypeByName(*orc_nested_type, split.first, ignore_case);
+            if (orc_field_type)
+            {
+                /// Avoid inconsistency between CH and ORC type brought by flattened Nested type.
+                type = array_type->getNestedType();
+                return traverseDownORCTypeByName(split.second, orc_field_type, type, ignore_case);
+            }
+            else
+                return nullptr;
+        }
+        else
+            return nullptr;
+    }
+    else
+        return nullptr;
+}
+
+static void updateIncludeTypeIds(
+    DataTypePtr type, const orc::Type * orc_type, bool ignore_case, std::unordered_set<UInt64> & include_typeids)
+{
+    // std::cout << "ch type:" << type->getName() << ", orc_type:" << orc_type->toString() << std::endl;
+
+    /// For primitive types, directly append column id into result
     if (orc_type->getSubtypeCount() == 0)
     {
         include_typeids.insert(orc_type->getColumnId());
@@ -765,7 +809,7 @@ static void updateIncludeTypeIds(
             if (array_type)
             {
                 updateIncludeTypeIds(
-                    array_type->getNestedType(), orc_type->getSubtype(0), case_insensitive_column_matching, include_typeids);
+                    array_type->getNestedType(), orc_type->getSubtype(0), ignore_case, include_typeids);
             }
             return;
         }
@@ -773,8 +817,8 @@ static void updateIncludeTypeIds(
             const auto * map_type = typeid_cast<const DataTypeMap *>(non_nullable_type.get());
             if (map_type)
             {
-                updateIncludeTypeIds(map_type->getKeyType(), orc_type->getSubtype(0), case_insensitive_column_matching, include_typeids);
-                updateIncludeTypeIds(map_type->getValueType(), orc_type->getSubtype(1), case_insensitive_column_matching, include_typeids);
+                updateIncludeTypeIds(map_type->getKeyType(), orc_type->getSubtype(0), ignore_case, include_typeids);
+                updateIncludeTypeIds(map_type->getValueType(), orc_type->getSubtype(1), ignore_case, include_typeids);
             }
             return;
         }
@@ -791,12 +835,12 @@ static void updateIncludeTypeIds(
                         for (size_t struct_i = 0; struct_i < orc_type->getSubtypeCount(); ++struct_i)
                         {
                             if (boost::equals(orc_type->getFieldName(struct_i), name)
-                                || (case_insensitive_column_matching && boost::iequals(orc_type->getFieldName(struct_i), name)))
+                                || (ignore_case && boost::iequals(orc_type->getFieldName(struct_i), name)))
                             {
                                 updateIncludeTypeIds(
                                     tuple_type->getElement(tuple_i),
                                     orc_type->getSubtype(struct_i),
-                                    case_insensitive_column_matching,
+                                    ignore_case,
                                     include_typeids);
                                 break;
                             }
@@ -807,7 +851,7 @@ static void updateIncludeTypeIds(
                 {
                     for (size_t i = 0; i < tuple_type->getElements().size() && i < orc_type->getSubtypeCount(); ++i)
                         updateIncludeTypeIds(
-                            tuple_type->getElement(i), orc_type->getSubtype(i), case_insensitive_column_matching, include_typeids);
+                            tuple_type->getElement(i), orc_type->getSubtype(i), ignore_case, include_typeids);
                 }
             }
             return;
@@ -838,40 +882,47 @@ void NativeORCBlockInputFormat::prepareFileReader()
         format_settings.null_as_default,
         format_settings.orc.case_insensitive_column_matching);
 
-
     const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
-    std::vector<std::string> columns;
-    std::map<UInt64, const orc::Type *> id_type_map;
-    std::map<std::string, UInt64> name_id_map;
-    buildORCTypeNameIdMap(&file_reader->getType(), columns, ignore_case, id_type_map, name_id_map);
-
-    // std::cout << "subtypes:" << file_reader->getType().getSubtypeCount() << std::endl;
-    // std::cout << "id type map" << std::endl;
-    // for (const auto & [k, v]: id_type_map)
-        // std::cout << "id:" << k << ", type:" << v->toString() << std::endl;
-    // std::cout << "name id map" << std::endl;
-    // for (const auto & [k, v]: name_id_map)
-        // std::cout << "name:" << k << ", id:" << v << std::endl;
 
     const auto & header = getPort().getHeader();
+    const auto & file_schema = file_reader->getType();
     std::unordered_set<UInt64> include_typeids;
     for (const auto & column : header)
     {
-        auto name = column.name;
-        if (ignore_case)
-            boost::to_lower(name);
-
-        if (name_id_map.contains(name))
+        auto split = Nested::splitName(column.name);
+        if (split.second.empty())
         {
-            auto id = name_id_map[name];
-            if (id_type_map.contains(id))
-            {
-                updateIncludeTypeIds(column.type, id_type_map[id], ignore_case, include_typeids);
-            }
+            const auto * orc_type = getORCTypeByName(file_schema, column.name, ignore_case);
+            updateIncludeTypeIds(column.type, orc_type, ignore_case, include_typeids);
+        }
+        else
+        {
+            auto type = column.type;
+            const auto * orc_type = traverseDownORCTypeByName(column.name, &file_schema, type, ignore_case);
+            if (orc_type)
+                updateIncludeTypeIds(type, orc_type, ignore_case, include_typeids);
         }
     }
     include_indices.assign(include_typeids.begin(), include_typeids.end());
 
+    /// Just for Debug
+    // std::vector<std::string> tmp;
+    // std::map<UInt64, const orc::Type *> id_type_map;
+    // std::map<std::string, UInt64> name_id_map;
+    // buildORCTypeNameIdMap(&file_schema, tmp, ignore_case, id_type_map, name_id_map);
+    // std::cout << "just for debug:" << std::endl;
+    // std::cout << "subtypes:" << file_reader->getType().getSubtypeCount() << std::endl;
+    // std::cout << "ch output type:" << getPort().getHeader().dumpStructure() << std::endl;
+    // std::cout << "orc ouput type:" << file_reader->getType().toString() << std::endl;
+    // std::cout << "id type map" << std::endl;
+    // for (const auto & [k, v] : id_type_map)
+    //     std::cout << "id:" << k << ", type:" << v->toString() << std::endl;
+    // std::cout << "name id map" << std::endl;
+    // for (const auto & [k, v] : name_id_map)
+    //     std::cout << "name:" << k << ", id:" << v << std::endl;
+    // for (const auto & x : include_indices)
+    //     std::cout << "choose " << x << std::endl;
+
     if (format_settings.orc.filter_push_down && key_condition && !sarg)
     {
         sarg = buildORCSearchArgument(*key_condition, getPort().getHeader(), file_reader->getType(), format_settings);
@@ -951,7 +1002,6 @@ Chunk NativeORCBlockInputFormat::generate()
     Chunk res;
     size_t num_rows = batch->numElements;
     const auto & schema = stripe_reader->getSelectedType();
-    // std::cout << "output schema:" << schema.toString() << std::endl;
     orc_column_to_ch_column->orcTableToCHChunk(res, &schema, batch.get(), num_rows, &block_missing_values);
 
     approx_bytes_read_for_chunk = num_rows * current_stripe_info->getLength() / current_stripe_info->getNumberOfRows();

From 001cbe79126ea96b8f70ae3c5e17655c73df30cd Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 1 Nov 2023 16:58:25 +0800
Subject: [PATCH 0149/1097] fix typos

---
 src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 2c3db6432c8..3f98224f8aa 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -913,7 +913,7 @@ void NativeORCBlockInputFormat::prepareFileReader()
     // std::cout << "just for debug:" << std::endl;
     // std::cout << "subtypes:" << file_reader->getType().getSubtypeCount() << std::endl;
     // std::cout << "ch output type:" << getPort().getHeader().dumpStructure() << std::endl;
-    // std::cout << "orc ouput type:" << file_reader->getType().toString() << std::endl;
+    // std::cout << "orc output type:" << file_reader->getType().toString() << std::endl;
     // std::cout << "id type map" << std::endl;
     // for (const auto & [k, v] : id_type_map)
     //     std::cout << "id:" << k << ", type:" << v->toString() << std::endl;

From 1ae951efc2aec7b3fc236cc16fa8272af1d5e515 Mon Sep 17 00:00:00 2001
From: Zhiguo Zhou <zhiguo.zhou@intel.com>
Date: Wed, 25 Oct 2023 21:41:32 +0800
Subject: [PATCH 0150/1097] Add unittest MergeTree.CombineFilters

This commit adds a unittest for validating DB::combineFilters.
---
 .../MergeTree/tests/gtest_combine_filters.cpp | 66 +++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 src/Storages/MergeTree/tests/gtest_combine_filters.cpp

diff --git a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
new file mode 100644
index 00000000000..64cccd4cbad
--- /dev/null
+++ b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
@@ -0,0 +1,66 @@
+#include <gtest/gtest.h>
+#include <Columns/ColumnVector.h>
+
+// I know that inclusion of .cpp is not good at all
+#include <Storages/MergeTree/MergeTreeRangeReader.cpp> // NOLINT
+
+using namespace DB;
+
+/* The combineFilters function from MergeTreeRangeReader.cpp could be optimized with Intel's AVX512VBMI2 intrinsic,
+ * _mm512_mask_expandloadu_epi8. And this test is added to ensure that the vectorized code outputs the exact results
+ * as the original scalar code when the required hardware feature is supported on the device.
+ *
+ * To avoid the contingency of the all-one/all-zero sequences, this test fills in the filters with alternating 1s and
+ * 0s so that only the 4i-th (i is a non-negative integer) elements in the combined filter equals 1s and others are 0s.
+ * For example, given the size of the first filter to be 11, the generated and the output filters are:
+ *
+ * first_filter:  [1 0 1 0 1 0 1 0 1 0 1]
+ * second_filter: [1 0 1 0 1 0]
+ * output_filter: [1 0 0 0 1 0 0 0 1 0 0]
+ */
+bool testCombineFilters(size_t size)
+{
+    auto generateFilterWithAlternatingOneAndZero = [](size_t len)->ColumnPtr
+    {
+        auto filter = ColumnUInt8::create(len, 0);
+        auto & filter_data = filter->getData();
+
+        for (size_t i = 0; i < len; i += 2)
+            filter_data[i] = 1;
+
+        return filter;
+    };
+
+    auto first_filter = generateFilterWithAlternatingOneAndZero(size);
+    /// The count of 1s in the first_filter is floor((size + 1) / 2), which should be the size of the second_filter.
+    auto second_filter = generateFilterWithAlternatingOneAndZero((size + 1) / 2);
+
+    auto result = combineFilters(first_filter, second_filter);
+
+    if (result->size() != size) return false;
+
+    for (size_t i = 0; i < size; i++)
+    {
+        if (i % 4 == 0)
+        {
+            if (result->get64(i) != 1) return false;
+        }
+        else
+        {
+            if (result->get64(i) != 0) return false;
+        }
+    }
+
+    return true;
+}
+
+TEST(MergeTree, CombineFilters)
+{
+    EXPECT_TRUE(testCombineFilters(1));
+    EXPECT_TRUE(testCombineFilters(2));
+    EXPECT_TRUE(testCombineFilters(63));
+    EXPECT_TRUE(testCombineFilters(64));
+    EXPECT_TRUE(testCombineFilters(65));
+    EXPECT_TRUE(testCombineFilters(200));
+    EXPECT_TRUE(testCombineFilters(201));
+}

From b66a9e8fd34eae519dee1f3120cf2a4082b602c7 Mon Sep 17 00:00:00 2001
From: Zhiguo Zhou <zhiguo.zhou@intel.com>
Date: Fri, 27 Oct 2023 16:18:30 +0800
Subject: [PATCH 0151/1097] Optimize DB::combineFilters with AVX512_VBMI2
 intrinsic

The DB::combineFilters combines two UInt8 columns (filters) by
scanning the first column for non-zero elements and replacing them
with elements in the second column contiguously.

This function is optimized with Intel's AVX512_VBMI2 intrinsic,
_mm512_mask_expandloadu_epi8, which is designed for implementing
the exact operation when the bits in the writemask are set when
the corresponding packed integers in the first filter are non-zero,
and are not set otherwise. The dynamic dispatch ensures that this
optimization only applies to devices with AVX512_VBMI2 support.
---
 .../MergeTree/MergeTreeRangeReader.cpp        | 47 +++++++++++++++++--
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index 21e526778e1..84e3c10eace 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -18,6 +18,10 @@
 #include <emmintrin.h>
 #endif
 
+#if USE_MULTITARGET_CODE
+#include <immintrin.h>
+#endif
+
 #if defined(__aarch64__) && defined(__ARM_NEON)
 #    include <arm_neon.h>
 #      pragma clang diagnostic ignored "-Wreserved-identifier"
@@ -1253,6 +1257,32 @@ static void checkCombinedFiltersSize(size_t bytes_in_first_filter, size_t second
             "does not match second filter size ({})", bytes_in_first_filter, second_filter_size);
 }
 
+DECLARE_AVX512VBMI2_SPECIFIC_CODE(
+inline void combineFiltersImpl(UInt8 * first_begin, const UInt8 * first_end, const UInt8 * second_begin)
+{
+    constexpr size_t AVX512_VEC_SIZE_IN_BYTES = 64;
+
+    while (first_begin + AVX512_VEC_SIZE_IN_BYTES <= first_end)
+    {
+        UInt64 mask = bytes64MaskToBits64Mask(first_begin);
+        __m512i src = _mm512_loadu_si512(reinterpret_cast<void *>(first_begin));
+        __m512i dst = _mm512_mask_expandloadu_epi8(src, static_cast<__mmask64>(mask), reinterpret_cast<const void *>(second_begin));
+        _mm512_storeu_si512(reinterpret_cast<void *>(first_begin), dst);
+
+        first_begin += AVX512_VEC_SIZE_IN_BYTES;
+        second_begin += std::popcount(mask);
+    }
+
+    for (/* empty */; first_begin < first_end; ++first_begin)
+    {
+        if (*first_begin)
+        {
+            *first_begin = *second_begin++;
+        }
+    }
+}
+)
+
 /// Second filter size must be equal to number of 1s in the first filter.
 /// The result has size equal to first filter size and contains 1s only where both filters contain 1s.
 static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second)
@@ -1295,12 +1325,21 @@ static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second)
     auto & first_data = typeid_cast<ColumnUInt8 *>(mut_first.get())->getData();
     const auto * second_data = second_descr.data->data();
 
-    for (auto & val : first_data)
+#if USE_MULTITARGET_CODE
+    if (isArchSupported(TargetArch::AVX512VBMI2))
     {
-        if (val)
+        TargetSpecific::AVX512VBMI2::combineFiltersImpl(first_data.begin(), first_data.end(), second_data);
+    }
+    else
+#endif
+    {
+        for (auto & val : first_data)
         {
-            val = *second_data;
-            ++second_data;
+            if (val)
+            {
+                val = *second_data;
+                ++second_data;
+            }
         }
     }
 

From 24c45a4ee060c1fb2447d8acb4f0281f97ab65f1 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 1 Nov 2023 18:47:11 +0800
Subject: [PATCH 0152/1097] fix failed uts

---
 .../Impl/NativeORCBlockInputFormat.cpp        | 20 +++++--------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 3f98224f8aa..7a835274bb2 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -775,7 +775,7 @@ static const orc::Type * traverseDownORCTypeByName(
             const auto * orc_field_type = getORCTypeByName(*orc_nested_type, split.first, ignore_case);
             if (orc_field_type)
             {
-                /// Avoid inconsistency between CH and ORC type brought by flattened Nested type.
+                /// Adjust CH type to avoid inconsistency between CH and ORC type brought by flattened Nested type.
                 type = array_type->getNestedType();
                 return traverseDownORCTypeByName(split.second, orc_field_type, type, ignore_case);
             }
@@ -883,25 +883,15 @@ void NativeORCBlockInputFormat::prepareFileReader()
         format_settings.orc.case_insensitive_column_matching);
 
     const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
-
     const auto & header = getPort().getHeader();
     const auto & file_schema = file_reader->getType();
     std::unordered_set<UInt64> include_typeids;
     for (const auto & column : header)
     {
-        auto split = Nested::splitName(column.name);
-        if (split.second.empty())
-        {
-            const auto * orc_type = getORCTypeByName(file_schema, column.name, ignore_case);
-            updateIncludeTypeIds(column.type, orc_type, ignore_case, include_typeids);
-        }
-        else
-        {
-            auto type = column.type;
-            const auto * orc_type = traverseDownORCTypeByName(column.name, &file_schema, type, ignore_case);
-            if (orc_type)
-                updateIncludeTypeIds(type, orc_type, ignore_case, include_typeids);
-        }
+        auto adjusted_type = column.type;
+        const auto * orc_type = traverseDownORCTypeByName(column.name, &file_schema, adjusted_type, ignore_case);
+        if (orc_type)
+            updateIncludeTypeIds(adjusted_type, orc_type, ignore_case, include_typeids);
     }
     include_indices.assign(include_typeids.begin(), include_typeids.end());
 

From dc897215dacea4f447b127254914e32f76ca001e Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 1 Nov 2023 20:42:07 +0800
Subject: [PATCH 0153/1097] fix failed uts
 tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.sql

---
 .../Impl/NativeORCBlockInputFormat.cpp        | 35 +++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 7a835274bb2..9501efbabb7 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -757,11 +757,32 @@ static const orc::Type * traverseDownORCTypeByName(
     if (target.empty())
         return orc_type;
 
-    auto split = Nested::splitName(target);
+    auto search_struct_field = [&](const std::string & target_, const orc::Type * type_) -> std::pair<std::string, const orc::Type *>
+    {
+        auto target_copy = target_;
+        if (ignore_case)
+            boost::to_lower(target_copy);
+
+        for (size_t i = 0; i < type_->getSubtypeCount(); ++i)
+        {
+            auto field_name = type_->getFieldName(i);
+            if (ignore_case)
+                boost::to_lower(field_name);
+
+            if (startsWith(target_copy, field_name) && (target_copy.size() == field_name.size() || target_copy[field_name.size()] == '.'))
+            {
+                return {target_copy.size() == field_name.size() ? "" : target_.substr(field_name.size() + 1), type_->getSubtype(i)};
+            }
+        }
+        return {"", nullptr};
+    };
+
     if (orc::STRUCT == orc_type->getKind())
     {
-        const auto * orc_field_type = getORCTypeByName(*orc_type, split.first, ignore_case);
-        return orc_field_type ? traverseDownORCTypeByName(split.second, orc_field_type, type, ignore_case) : nullptr;
+        auto next_type_and_target = search_struct_field(target, orc_type);
+        const auto & next_target = next_type_and_target.first;
+        const auto * next_orc_type = next_type_and_target.second;
+        return next_orc_type ? traverseDownORCTypeByName(next_target, next_orc_type, type, ignore_case) : nullptr;
     }
     else if (orc::LIST == orc_type->getKind())
     {
@@ -772,12 +793,14 @@ static const orc::Type * traverseDownORCTypeByName(
         const auto * orc_nested_type = orc_type->getSubtype(0);
         if (array_type && orc::STRUCT == orc_nested_type->getKind())
         {
-            const auto * orc_field_type = getORCTypeByName(*orc_nested_type, split.first, ignore_case);
-            if (orc_field_type)
+            auto next_type_and_target = search_struct_field(target, orc_nested_type);
+            const auto & next_target = next_type_and_target.first;
+            const auto * next_orc_type = next_type_and_target.second;
+            if (next_orc_type)
             {
                 /// Adjust CH type to avoid inconsistency between CH and ORC type brought by flattened Nested type.
                 type = array_type->getNestedType();
-                return traverseDownORCTypeByName(split.second, orc_field_type, type, ignore_case);
+                return traverseDownORCTypeByName(next_target, next_orc_type, type, ignore_case);
             }
             else
                 return nullptr;

From 77507b843b66c4994ea46401d12b67f447154a39 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 1 Nov 2023 19:09:43 +0100
Subject: [PATCH 0154/1097] Fix build

---
 src/Common/ProfileEvents.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index d0589f7d281..31a62d0ff5c 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -415,7 +415,6 @@ The server successfully detected this situation and will download merged part fr
     M(FilesystemCacheEvictMicroseconds, "Filesystem cache eviction time") \
     M(FilesystemCacheGetOrSetMicroseconds, "Filesystem cache getOrSet() time") \
     M(FilesystemCacheGetMicroseconds, "Filesystem cache get() time") \
-    M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \
     M(FileSegmentWaitMicroseconds, "Wait on DOWNLOADING state") \
     M(FileSegmentCompleteMicroseconds, "Duration of FileSegment::complete() in filesystem cache") \
     M(FileSegmentLockMicroseconds, "Lock file segment time") \

From 09c1e76982ce514e34da81aaff821b5ffcd63753 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 1 Nov 2023 21:42:34 +0100
Subject: [PATCH 0155/1097] Allow manual compaction of rocksdb via OPTIMIZE
 query

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../RocksDB/StorageEmbeddedRocksDB.cpp        | 31 +++++++++++++++++++
 src/Storages/RocksDB/StorageEmbeddedRocksDB.h | 10 ++++++
 .../02910_rocksdb_optimize.reference          |  0
 .../0_stateless/02910_rocksdb_optimize.sql    |  5 +++
 4 files changed, 46 insertions(+)
 create mode 100644 tests/queries/0_stateless/02910_rocksdb_optimize.reference
 create mode 100644 tests/queries/0_stateless/02910_rocksdb_optimize.sql

diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 5e8d54bcdf1..e81603d67b4 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -47,6 +47,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int ROCKSDB_ERROR;
+    extern const int NOT_IMPLEMENTED;
 }
 
 using FieldVectorPtr = std::shared_ptr<FieldVector>;
@@ -310,6 +311,36 @@ void StorageEmbeddedRocksDB::drop()
     rocksdb_ptr = nullptr;
 }
 
+bool StorageEmbeddedRocksDB::optimize(
+    const ASTPtr & /*query*/,
+    const StorageMetadataPtr & /*metadata_snapshot*/,
+    const ASTPtr & partition,
+    bool final,
+    bool deduplicate,
+    const Names & /* deduplicate_by_columns */,
+    bool cleanup,
+    ContextPtr /*context*/)
+{
+    if (partition)
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Partition cannot be specified when optimizing table of type EmbeddedRocksDB");
+
+    if (final)
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "FINAL cannot be specified when optimizing table of type EmbeddedRocksDB");
+
+    if (deduplicate)
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DEDUPLICATE cannot be specified when optimizing table of type EmbeddedRocksDB");
+
+    if (cleanup)
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CLEANUP cannot be specified when optimizing table of type EmbeddedRocksDB");
+
+    std::shared_lock lock(rocksdb_ptr_mx);
+    rocksdb::CompactRangeOptions compact_options;
+    auto status = rocksdb_ptr->CompactRange(compact_options, nullptr, nullptr);
+    if (!status.ok())
+        throw Exception(ErrorCodes::ROCKSDB_ERROR, "Compaction failed: {}", status.ToString());
+    return true;
+}
+
 void StorageEmbeddedRocksDB::initDB()
 {
     rocksdb::Status status;
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
index 336f6a8abe3..9f83f3f2cf0 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
@@ -55,6 +55,16 @@ public:
     void mutate(const MutationCommands &, ContextPtr) override;
     void drop() override;
 
+    bool optimize(
+        const ASTPtr & query,
+        const StorageMetadataPtr & metadata_snapshot,
+        const ASTPtr & partition,
+        bool final,
+        bool deduplicate,
+        const Names & deduplicate_by_columns,
+        bool cleanup,
+        ContextPtr context) override;
+
     bool supportsParallelInsert() const override { return true; }
     bool supportsIndexForIn() const override { return true; }
     bool mayBenefitFromIndexForIn(
diff --git a/tests/queries/0_stateless/02910_rocksdb_optimize.reference b/tests/queries/0_stateless/02910_rocksdb_optimize.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02910_rocksdb_optimize.sql b/tests/queries/0_stateless/02910_rocksdb_optimize.sql
new file mode 100644
index 00000000000..575ba6db212
--- /dev/null
+++ b/tests/queries/0_stateless/02910_rocksdb_optimize.sql
@@ -0,0 +1,5 @@
+-- Tags: use-rocksdb
+
+CREATE TABLE dict (key UInt64, value String) ENGINE = EmbeddedRocksDB PRIMARY KEY key;
+INSERT INTO dict SELECT number, toString(number) FROM numbers(1e3);
+OPTIMIZE TABLE dict;

From 1af52d3b0092c0e376075e389d8141e203e5c36b Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 2 Nov 2023 00:42:57 +0000
Subject: [PATCH 0156/1097] Better

---
 src/Core/Settings.h                            |  2 +-
 src/Interpreters/InterpreterCreateQuery.cpp    | 11 ++++++-----
 src/Storages/StorageReplicatedMergeTree.cpp    | 12 ++++++------
 ...8_replicated_merge_tree_creation_failure.sh | 18 +++++++++---------
 4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 955ad815e00..213f5575716 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -607,7 +607,7 @@ class IColumn;
     M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \
     M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \
     \
-    M(Bool, create_replicated_merge_tree_fault_injection, false, "If true, the creation of table will be aborted after creating metadata in ZooKeeper", 0) \
+    M(Float, create_replicated_merge_tree_fault_injection_probability, 0.0f, "The probability of a fault injection during table creation after creating metadata in ZooKeeper", 0) \
     \
     M(Bool, use_query_cache, false, "Enable the query cache", 0) \
     M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 3da6c9fa264..c88b1b5f968 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -104,7 +104,6 @@ namespace ErrorCodes
     extern const int UNKNOWN_STORAGE;
     extern const int SYNTAX_ERROR;
     extern const int SUPPORT_IS_DISABLED;
-    extern const int ABORTED;
 }
 
 namespace fs = std::filesystem;
@@ -1443,11 +1442,13 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
                         "ATTACH ... FROM ... query is not supported for {} table engine, "
                         "because such tables do not store any data on disk. Use CREATE instead.", res->getName());
 
-    if (getContext()->getSettingsRef().create_replicated_merge_tree_fault_injection)
+    bool is_replicated_storage = typeid_cast<const StorageReplicatedMergeTree *>(res.get()) != nullptr;
+    if (is_replicated_storage)
     {
-        bool is_replicated_storage = typeid_cast<const StorageReplicatedMergeTree *>(res.get()) != nullptr;
-        if (is_replicated_storage)
-            throw Exception(ErrorCodes::ABORTED, "Shutdown is called for table");
+        const auto probability = getContext()->getSettingsRef().create_replicated_merge_tree_fault_injection_probability;
+        std::bernoulli_distribution fault(probability);
+        if (fault(thread_local_rng))
+            throw Coordination::Exception(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (during table creation)");
     }
 
     database->createTable(getContext(), create.getTable(), res, query_ptr);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 0781684b7b7..d40e99aae1f 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -836,7 +836,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr
         ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/mutation_pointer", "",
             zkutil::CreateMode::Persistent));
 
-        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/creator_uuid", toString(ServerUUID::get()),
+        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/creator_info", toString(getStorageID().getFullTableName()) + "|" + toString(ServerUUID::get()),
             zkutil::CreateMode::Persistent));
 
         Coordination::Responses responses;
@@ -869,7 +869,7 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
     const String local_metadata = ReplicatedMergeTreeTableMetadata(*this, metadata_snapshot).toString();
     const String local_columns = metadata_snapshot->getColumns().toString();
     const String local_metadata_version = toString(metadata_snapshot->getMetadataVersion());
-    const String creator_uuid = toString(ServerUUID::get());
+    const String creator_info = toString(getStorageID().getFullTableName()) + "|" + toString(ServerUUID::get());
 
     /// It is possible for the replica to fail after creating ZK nodes without saving local metadata.
     /// Because of that we need to check whether the replica exists and is newly created.
@@ -888,7 +888,7 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
         replica_path + "/min_unprocessed_insert_time",
         replica_path + "/max_processed_insert_time",
         replica_path + "/mutation_pointer",
-        replica_path + "/creator_uuid"
+        replica_path + "/creator_info"
     };
 
     auto response_exists = zookeeper->tryGet(paths_exists);
@@ -919,7 +919,7 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
         const auto & zk_min_unprocessed_insert_time = response_exists[response_num++].data;
         const auto & zk_max_processed_insert_time   = response_exists[response_num++].data;
         const auto & zk_mutation_pointer            = response_exists[response_num++].data;
-        const auto & zk_creator_uuid                = response_exists[response_num++].data;
+        const auto & zk_creator_info                = response_exists[response_num++].data;
 
         if (zk_host.empty() &&
             zk_log_pointer.empty() &&
@@ -933,7 +933,7 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
             zk_min_unprocessed_insert_time.empty() &&
             zk_max_processed_insert_time.empty() &&
             zk_mutation_pointer.empty() &&
-            zk_creator_uuid == creator_uuid)
+            zk_creator_info == creator_info)
         {
             LOG_DEBUG(log, "Empty replica {} exists, will use it", replica_path);
             return;
@@ -986,7 +986,7 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
         ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/mutation_pointer", "",
             zkutil::CreateMode::Persistent));
 
-        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/creator_uuid", creator_uuid,
+        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/creator_info", creator_info,
             zkutil::CreateMode::Persistent));
 
         /// Check version of /replicas to see if there are any replicas created at the same moment of time.
diff --git a/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh b/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh
index 0c5705c7a35..34bddc04be1 100755
--- a/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh
+++ b/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh
@@ -11,12 +11,12 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_exception_replicated SYNC"
 
 #### 1 - There is only one replica
 
-${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection=1 \
-    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "ABORT"
+${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection_probability=1 \
+    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "Fault injected"
 
-# We will see that the replica is empty and throw the same ABORT exception as before
-${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection=1 \
-    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "ABORT"
+# We will see that the replica is empty and throw the same 'Fault injected' exception as before
+${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection_probability=1 \
+    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "Fault injected"
 
 # We will succeed
 ${CLICKHOUSE_CLIENT} \
@@ -26,10 +26,10 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE test_exception_replicated SYNC"
 
 #### 2 - There are two replicas
 
-${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection=1 \
-    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "ABORT"
-${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection=1 \
-    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r2') ORDER BY date" 2>&1 | grep -c "ABORT"
+${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection_probability=1 \
+    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "Fault injected"
+${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection_probability=1 \
+    -q "CREATE TABLE test_exception_replicated_2 (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r2') ORDER BY date" 2>&1 | grep -c "Fault injected"
 
 # We will succeed
 ${CLICKHOUSE_CLIENT} \

From c899ff9da2a8d0b22eba33471cd9bbb183021e73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com>
Date: Thu, 2 Nov 2023 10:30:14 +0800
Subject: [PATCH 0157/1097] Apply suggestions from code review

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 9501efbabb7..542d13d9363 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -779,9 +779,7 @@ static const orc::Type * traverseDownORCTypeByName(
 
     if (orc::STRUCT == orc_type->getKind())
     {
-        auto next_type_and_target = search_struct_field(target, orc_type);
-        const auto & next_target = next_type_and_target.first;
-        const auto * next_orc_type = next_type_and_target.second;
+        const auto [next_target, next_orc_type]= search_struct_field(target, orc_type);
         return next_orc_type ? traverseDownORCTypeByName(next_target, next_orc_type, type, ignore_case) : nullptr;
     }
     else if (orc::LIST == orc_type->getKind())

From 424f1bec933c580d2dfa2101f58fd26004599139 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com>
Date: Thu, 2 Nov 2023 10:30:28 +0800
Subject: [PATCH 0158/1097] Apply suggestions from code review

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 542d13d9363..1cbb3b07c00 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -800,14 +800,9 @@ static const orc::Type * traverseDownORCTypeByName(
                 type = array_type->getNestedType();
                 return traverseDownORCTypeByName(next_target, next_orc_type, type, ignore_case);
             }
-            else
-                return nullptr;
         }
-        else
-            return nullptr;
     }
-    else
-        return nullptr;
+    return nullptr;
 }
 
 static void updateIncludeTypeIds(

From b142489c3c011a58e547bc4102fa79d452f4e712 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Thu, 2 Nov 2023 10:49:18 +0800
Subject: [PATCH 0159/1097] fix code style

---
 tests/performance/orc_filter_push_down.xml  | 2 +-
 tests/performance/orc_tuple_field_prune.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/performance/orc_filter_push_down.xml b/tests/performance/orc_filter_push_down.xml
index 9f49c20a075..318c6eca991 100644
--- a/tests/performance/orc_filter_push_down.xml
+++ b/tests/performance/orc_filter_push_down.xml
@@ -23,4 +23,4 @@
 
     <!-- Test in set filter push down to ORC -->
     <query>select a % 10, length(b) % 10, count(1) from test_orc_fpd where a in (9000000, 1000) group by a % 10, length(b) % 10 </query>
-</test>
\ No newline at end of file
+</test>
diff --git a/tests/performance/orc_tuple_field_prune.xml b/tests/performance/orc_tuple_field_prune.xml
index 4e338733329..b3064f35f39 100644
--- a/tests/performance/orc_tuple_field_prune.xml
+++ b/tests/performance/orc_tuple_field_prune.xml
@@ -14,4 +14,4 @@
     <query>select * from file('test_orc_tfp.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(c Nullable(Int64))') format Null</query>
     <query>select * from file('test_orc_tfp.orc', 'ORC', 'int64_column Int64, array_tuple_column Array(Tuple(c Nullable(Int64)))') format Null</query>
     <query>select * from file('test_orc_tfp.orc', 'ORC', 'int64_column Int64, map_tuple_column Map(String, Tuple(c Nullable(Int64)))') format Null</query>
-</test>
\ No newline at end of file
+</test>

From 6ccde98943b2746d2acb3f0cafb77163f56f089d Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Thu, 2 Nov 2023 11:01:50 +0800
Subject: [PATCH 0160/1097] change as request

---
 .../Impl/NativeORCBlockInputFormat.cpp        | 40 ++++++++-----------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 1cbb3b07c00..c53d8c92b2a 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -720,12 +720,8 @@ static std::string toDotColumnPath(const std::vector<std::string> & columns)
     }
 }
 
-static void getFileReaderAndSchema(
-    ReadBuffer & in,
-    std::unique_ptr<orc::Reader> & file_reader,
-    Block & header,
-    const FormatSettings & format_settings,
-    std::atomic<int> & is_stopped)
+static void getFileReader(
+    ReadBuffer & in, std::unique_ptr<orc::Reader> & file_reader, const FormatSettings & format_settings, std::atomic<int> & is_stopped)
 {
     if (is_stopped)
         return;
@@ -733,18 +729,6 @@ static void getFileReaderAndSchema(
     orc::ReaderOptions options;
     auto input_stream = asORCInputStream(in, format_settings, is_stopped);
     file_reader = orc::createReader(std::move(input_stream), options);
-    const auto & schema = file_reader->getType();
-
-    for (size_t i = 0; i < schema.getSubtypeCount(); ++i)
-    {
-        const std::string & name = schema.getFieldName(i);
-        const orc::Type * orc_type = schema.getSubtype(i);
-
-        bool skipped = false;
-        DataTypePtr type = parseORCType(orc_type, format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference, skipped);
-        if (!skipped)
-            header.insert(ColumnWithTypeAndName{type, name});
-    }
 }
 
 static const orc::Type * traverseDownORCTypeByName(
@@ -884,8 +868,7 @@ NativeORCBlockInputFormat::NativeORCBlockInputFormat(ReadBuffer & in_, Block hea
 
 void NativeORCBlockInputFormat::prepareFileReader()
 {
-    Block schema;
-    getFileReaderAndSchema(*in, file_reader, schema, format_settings, is_stopped);
+    getFileReader(*in, file_reader, format_settings, is_stopped);
     if (is_stopped)
         return;
 
@@ -1037,17 +1020,28 @@ NativeORCSchemaReader::NativeORCSchemaReader(ReadBuffer & in_, const FormatSetti
 
 NamesAndTypesList NativeORCSchemaReader::readSchema()
 {
-    Block header;
     std::unique_ptr<orc::Reader> file_reader;
     std::atomic<int> is_stopped = 0;
-    getFileReaderAndSchema(in, file_reader, header, format_settings, is_stopped);
+    getFileReader(in, file_reader, format_settings, is_stopped);
+
+    const auto & schema = file_reader->getType();
+    Block header;
+    for (size_t i = 0; i < schema.getSubtypeCount(); ++i)
+    {
+        const std::string & name = schema.getFieldName(i);
+        const orc::Type * orc_type = schema.getSubtype(i);
+
+        bool skipped = false;
+        DataTypePtr type = parseORCType(orc_type, format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference, skipped);
+        if (!skipped)
+            header.insert(ColumnWithTypeAndName{type, name});
+    }
 
     if (format_settings.schema_inference_make_columns_nullable)
         return getNamesAndRecursivelyNullableTypes(header);
     return header.getNamesAndTypesList();
 }
 
-
 ORCColumnToCHColumn::ORCColumnToCHColumn(
     const Block & header_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_)
     : header(header_)

From 59d11559e102dde49840eb95d9ed7d3470647b56 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 2 Nov 2023 04:42:33 +0000
Subject: [PATCH 0161/1097] Fix tests

---
 .../0_stateless/02221_system_zookeeper_unrestricted.reference   | 2 ++
 .../02221_system_zookeeper_unrestricted_like.reference          | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference
index 53b44764d5c..137fb0587cc 100644
--- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference
+++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference
@@ -16,6 +16,8 @@ columns
 columns
 columns
 columns
+creator_info
+creator_info
 failed_parts
 failed_parts
 flags
diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference
index ccc3064ccbd..2893c2a845f 100644
--- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference
+++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference
@@ -7,6 +7,7 @@ block_numbers
 blocks
 columns
 columns
+creator_info
 failed_parts
 flags
 host
@@ -49,6 +50,7 @@ block_numbers
 blocks
 columns
 columns
+creator_info
 failed_parts
 flags
 host

From b8665a610cb7d8920a24a1e3753c785bf42f46a3 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Thu, 2 Nov 2023 15:27:48 +0800
Subject: [PATCH 0162/1097] fix failed perf test

---
 tests/performance/orc_tuple_field_prune.xml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/performance/orc_tuple_field_prune.xml b/tests/performance/orc_tuple_field_prune.xml
index b3064f35f39..d95787af93b 100644
--- a/tests/performance/orc_tuple_field_prune.xml
+++ b/tests/performance/orc_tuple_field_prune.xml
@@ -2,16 +2,15 @@
     <settings>
         <output_format_orc_string_as_string>1</output_format_orc_string_as_string>
         <input_format_orc_row_batch_size>10000</input_format_orc_row_batch_size>
-        <flatten_nested>0</flatten_nested>
     </settings>
 
     <fill_query>
-        insert into function file('test_orc_tfp.orc', 'ORC') select * from generateRandom('int64_column Nullable(Int64), tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)), array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))), map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') limit 1000000
+        insert into function file('test_orc_tfp.orc', 'ORC') select * from generateRandom('tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)), array_tuple_column Nested(a Nullable(String), b Nullable(Float64), c Nullable(Int64)), map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') limit 1000000
     </fill_query>
    
     <drop_query>DROP TABLE IF EXISTS test_orc_tfp</drop_query>
 
-    <query>select * from file('test_orc_tfp.orc', 'ORC', 'int64_column Int64, tuple_column Tuple(c Nullable(Int64))') format Null</query>
-    <query>select * from file('test_orc_tfp.orc', 'ORC', 'int64_column Int64, array_tuple_column Array(Tuple(c Nullable(Int64)))') format Null</query>
-    <query>select * from file('test_orc_tfp.orc', 'ORC', 'int64_column Int64, map_tuple_column Map(String, Tuple(c Nullable(Int64)))') format Null</query>
+    <query>select * from file('test_orc_tfp.orc', 'ORC', 'tuple_column Tuple(c Nullable(Int64))') format Null</query>
+    <query>select * from file('test_orc_tfp.orc', 'ORC', 'array_tuple_column Nested(c Nullable(Int64))') format Null</query>
+    <query>select * from file('test_orc_tfp.orc', 'ORC', 'map_tuple_column Map(String, Tuple(c Nullable(Int64)))') format Null</query>
 </test>

From b88048e6d9a5ce72475433ac709a01a4ae515cec Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 2 Nov 2023 16:51:50 +0000
Subject: [PATCH 0163/1097] Fix style

---
 ...e.reference => 02888_replicated_merge_tree_creation.reference} | 0
 ...reation_failure.sh => 02888_replicated_merge_tree_creation.sh} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{02888_replicated_merge_tree_creation_failure.reference => 02888_replicated_merge_tree_creation.reference} (100%)
 rename tests/queries/0_stateless/{02888_replicated_merge_tree_creation_failure.sh => 02888_replicated_merge_tree_creation.sh} (100%)

diff --git a/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.reference b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.reference
similarity index 100%
rename from tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.reference
rename to tests/queries/0_stateless/02888_replicated_merge_tree_creation.reference
diff --git a/tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh
similarity index 100%
rename from tests/queries/0_stateless/02888_replicated_merge_tree_creation_failure.sh
rename to tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh

From e1ec380339a34529c575e74b991f49a411d0bbb2 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 2 Nov 2023 17:39:41 +0000
Subject: [PATCH 0164/1097] Add test

---
 .../02910_bad_logs_level_in_local.reference       |  0
 .../0_stateless/02910_bad_logs_level_in_local.sh  | 15 +++++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 tests/queries/0_stateless/02910_bad_logs_level_in_local.reference
 create mode 100755 tests/queries/0_stateless/02910_bad_logs_level_in_local.sh

diff --git a/tests/queries/0_stateless/02910_bad_logs_level_in_local.reference b/tests/queries/0_stateless/02910_bad_logs_level_in_local.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh b/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh
new file mode 100755
index 00000000000..badf7232a95
--- /dev/null
+++ b/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 60
+match_max 100000
+
+spawn bash -c "clickhouse-local"
+
+expect ":) "
+send -- "SET send_logs_level = 't'\r"
+expect "Exception on client:"
+expect ":) "
+send -- "exit\r"
+expect eof
+

From 1078047fb59aff24306c26952026278863833721 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 24 Oct 2023 13:44:00 +0000
Subject: [PATCH 0165/1097] Fix 'Cannot read from file:' while running client
 in a background

---
 src/Client/ClientBase.cpp                     | 22 +++++++++++++++++--
 .../02903_client_insert_in_background.sh      | 13 +++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100755 tests/queries/0_stateless/02903_client_insert_in_background.sh

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 9c7bfe5974f..76f2060da93 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -108,6 +108,7 @@ namespace ErrorCodes
     extern const int FILE_ALREADY_EXISTS;
     extern const int USER_SESSION_LIMIT_EXCEEDED;
     extern const int NOT_IMPLEMENTED;
+    extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
 }
 
 }
@@ -1443,6 +1444,23 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
     }
 }
 
+namespace
+{
+    bool isStdinNotEmptyAndValid(ReadBufferFromFileDescriptor & std_in)
+    {
+        try
+        {
+            return !std_in.eof();
+        }
+        catch (const Exception & e)
+        {
+            if (e.code() == ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR)
+                return false;
+            throw;
+        }
+    }
+}
+
 
 void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query)
 {
@@ -1460,7 +1478,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
     if (!parsed_insert_query)
         return;
 
-    bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && !std_in.eof();
+    bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && isStdinNotEmptyAndValid(std_in);
 
     if (need_render_progress)
     {
@@ -1851,7 +1869,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
 
         if (is_async_insert_with_inlined_data)
         {
-            bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && !std_in.eof();
+            bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && isStdinNotEmptyAndValid(std_in);
             bool have_external_data = have_data_in_stdin || insert->infile;
 
             if (have_external_data)
diff --git a/tests/queries/0_stateless/02903_client_insert_in_background.sh b/tests/queries/0_stateless/02903_client_insert_in_background.sh
new file mode 100755
index 00000000000..d5fc56752f6
--- /dev/null
+++ b/tests/queries/0_stateless/02903_client_insert_in_background.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test"
+$CLICKHOUSE_CLIENT -q "create table test (x UInt64) engine=Memory"
+nohup $CLICKHOUSE_CLIENT -q "insert into test values (42)" 2> $CLICKHOUSE_TEST_UNIQUE_NAME.out
+tail -n +2 $CLICKHOUSE_TEST_UNIQUE_NAME.out
+$CLICKHOUSE_CLIENT -q "drop table test"
+rm $CLICKHOUSE_TEST_UNIQUE_NAME.out
+

From b306fdb11d01d3fd1d4c38d5a8facafb77052ecf Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 30 Oct 2023 10:25:58 +0000
Subject: [PATCH 0166/1097] Add refernce file

---
 .../0_stateless/02903_client_insert_in_background.reference       | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/queries/0_stateless/02903_client_insert_in_background.reference

diff --git a/tests/queries/0_stateless/02903_client_insert_in_background.reference b/tests/queries/0_stateless/02903_client_insert_in_background.reference
new file mode 100644
index 00000000000..e69de29bb2d

From 7c69dee5cd535ae64062207283b41124270ffee1 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 2 Nov 2023 18:05:02 +0000
Subject: [PATCH 0167/1097] Fix review comment

---
 src/Client/ClientBase.cpp | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 76f2060da93..d3cb828e8f7 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1385,6 +1385,23 @@ void ClientBase::addMultiquery(std::string_view query, Arguments & common_argume
     common_arguments.emplace_back(query);
 }
 
+namespace
+{
+bool isStdinNotEmptyAndValid(ReadBufferFromFileDescriptor & std_in)
+{
+    try
+    {
+        return !std_in.eof();
+    }
+    catch (const Exception & e)
+    {
+        if (e.code() == ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR)
+            return false;
+        throw;
+    }
+}
+}
+
 
 void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr parsed_query)
 {
@@ -1404,7 +1421,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
 
     /// Process the query that requires transferring data blocks to the server.
     const auto & parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
-    if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
+    if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && !isStdinNotEmptyAndValid(std_in))))
     {
         const auto & settings = global_context->getSettingsRef();
         if (settings.throw_if_no_data_to_insert)
@@ -1444,23 +1461,6 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
     }
 }
 
-namespace
-{
-    bool isStdinNotEmptyAndValid(ReadBufferFromFileDescriptor & std_in)
-    {
-        try
-        {
-            return !std_in.eof();
-        }
-        catch (const Exception & e)
-        {
-            if (e.code() == ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR)
-                return false;
-            throw;
-        }
-    }
-}
-
 
 void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query)
 {

From 4b7146d47ed2894068ba6972bdf077df1625e525 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 3 Nov 2023 16:03:26 +0800
Subject: [PATCH 0168/1097] remove useless codes

---
 .../Impl/NativeORCBlockInputFormat.cpp        | 63 -------------------
 1 file changed, 63 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index c53d8c92b2a..49379405c26 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -678,48 +678,6 @@ buildORCSearchArgument(const KeyCondition & key_condition, const Block & header,
     return builder->build();
 }
 
-
-static std::string toDotColumnPath(const std::vector<std::string> & columns)
-{
-    if (columns.empty())
-        return {};
-
-    std::ostringstream column_stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    std::copy(columns.begin(), columns.end(), std::ostream_iterator<std::string>(column_stream, "."));
-    std::string column_path = column_stream.str();
-    return column_path.substr(0, column_path.length() - 1);
-}
-
-[[maybe_unused]] static void buildORCTypeNameIdMap(
-    const orc::Type * orc_type,
-    std::vector<std::string> & columns,
-    bool ignore_case,
-    std::map<UInt64, const orc::Type *> & id_type_map,
-    std::map<std::string, UInt64> & name_id_map)
-{
-    id_type_map[orc_type->getColumnId()] = orc_type;
-    if (orc::STRUCT == orc_type->getKind())
-    {
-        for (size_t i = 0; i < orc_type->getSubtypeCount(); ++i)
-        {
-            const std::string & field_name = orc_type->getFieldName(i);
-            columns.push_back(field_name);
-            auto column_path = toDotColumnPath(columns);
-            if (ignore_case)
-                boost::to_lower(column_path);
-            name_id_map[column_path] = orc_type->getSubtype(i)->getColumnId();
-            buildORCTypeNameIdMap(orc_type->getSubtype(i), columns, ignore_case, id_type_map, name_id_map);
-            columns.pop_back();
-        }
-    }
-    else
-    {
-        // other non-primitive type
-        for (size_t j = 0; j < orc_type->getSubtypeCount(); ++j)
-            buildORCTypeNameIdMap(orc_type->getSubtype(j), columns, ignore_case, id_type_map, name_id_map);
-    }
-}
-
 static void getFileReader(
     ReadBuffer & in, std::unique_ptr<orc::Reader> & file_reader, const FormatSettings & format_settings, std::atomic<int> & is_stopped)
 {
@@ -737,7 +695,6 @@ static const orc::Type * traverseDownORCTypeByName(
     DataTypePtr & type,
     bool ignore_case)
 {
-    // std::cout << "target:" << target << ", orc_type:" << orc_type->toString() << ", type:" << type->getName() << std::endl;
     if (target.empty())
         return orc_type;
 
@@ -792,8 +749,6 @@ static const orc::Type * traverseDownORCTypeByName(
 static void updateIncludeTypeIds(
     DataTypePtr type, const orc::Type * orc_type, bool ignore_case, std::unordered_set<UInt64> & include_typeids)
 {
-    // std::cout << "ch type:" << type->getName() << ", orc_type:" << orc_type->toString() << std::endl;
-
     /// For primitive types, directly append column id into result
     if (orc_type->getSubtypeCount() == 0)
     {
@@ -894,24 +849,6 @@ void NativeORCBlockInputFormat::prepareFileReader()
     }
     include_indices.assign(include_typeids.begin(), include_typeids.end());
 
-    /// Just for Debug
-    // std::vector<std::string> tmp;
-    // std::map<UInt64, const orc::Type *> id_type_map;
-    // std::map<std::string, UInt64> name_id_map;
-    // buildORCTypeNameIdMap(&file_schema, tmp, ignore_case, id_type_map, name_id_map);
-    // std::cout << "just for debug:" << std::endl;
-    // std::cout << "subtypes:" << file_reader->getType().getSubtypeCount() << std::endl;
-    // std::cout << "ch output type:" << getPort().getHeader().dumpStructure() << std::endl;
-    // std::cout << "orc output type:" << file_reader->getType().toString() << std::endl;
-    // std::cout << "id type map" << std::endl;
-    // for (const auto & [k, v] : id_type_map)
-    //     std::cout << "id:" << k << ", type:" << v->toString() << std::endl;
-    // std::cout << "name id map" << std::endl;
-    // for (const auto & [k, v] : name_id_map)
-    //     std::cout << "name:" << k << ", id:" << v << std::endl;
-    // for (const auto & x : include_indices)
-    //     std::cout << "choose " << x << std::endl;
-
     if (format_settings.orc.filter_push_down && key_condition && !sarg)
     {
         sarg = buildORCSearchArgument(*key_condition, getPort().getHeader(), file_reader->getType(), format_settings);

From 15cb8d1b8975464f7f440e116fb99a48ca0c4865 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 3 Nov 2023 13:16:08 +0000
Subject: [PATCH 0169/1097] Fix use-of-unitialized-value

---
 contrib/arrow | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/arrow b/contrib/arrow
index 8cdbf43f78a..ba5c67934e8 160000
--- a/contrib/arrow
+++ b/contrib/arrow
@@ -1 +1 @@
-Subproject commit 8cdbf43f78ad02615aef29dc7f9af0dea22a03e4
+Subproject commit ba5c67934e8274d649befcffab56731632dc5253

From 4235c690d40a082139cf13c30670f083e5c1dea1 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Fri, 3 Nov 2023 15:14:25 +0000
Subject: [PATCH 0170/1097] init commit

---
 src/Processors/Formats/Impl/NpyRowInputFormat.cpp    | 12 +++++++++++-
 src/Processors/Formats/Impl/NpyRowInputFormat.h      |  5 +++++
 .../0_stateless/02908_Npy_files_caching.reference    |  4 ++++
 tests/queries/0_stateless/02908_Npy_files_caching.sh | 12 ++++++++++++
 4 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02908_Npy_files_caching.reference
 create mode 100755 tests/queries/0_stateless/02908_Npy_files_caching.sh

diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
index 9acb2909626..bd8b442f180 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
@@ -265,6 +265,11 @@ NpyRowInputFormat::NpyRowInputFormat(ReadBuffer & in_, Block header_, Params par
     nested_type = getNestedType(types[0]);
 }
 
+size_t NpyRowInputFormat::countRows(size_t max_block_size)
+{
+    return int(max_block_size) > header.shape[0] ? header.shape[0] : max_block_size;
+}
+
 template <typename ColumnValue, typename DataValue>
 void NpyRowInputFormat::readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness)
 {
@@ -395,13 +400,18 @@ NpySchemaReader::NpySchemaReader(ReadBuffer & in_)
 
 NamesAndTypesList NpySchemaReader::readSchema()
 {
-    NumpyHeader header = parseHeader(in);
+    header = parseHeader(in);
     DataTypePtr nested_type = getDataTypeFromNumpyType(header.numpy_type);
     DataTypePtr result_type = createNestedArrayType(nested_type, header.shape.size());
 
     return {{"array", result_type}};
 }
 
+std::optional<size_t> NpySchemaReader::readNumberOrRows()
+{
+    return header.shape[0];
+}
+
 void registerInputFormatNpy(FormatFactory & factory)
 {
     factory.registerInputFormat("Npy", [](
diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.h b/src/Processors/Formats/Impl/NpyRowInputFormat.h
index ad32bdba3bf..8d228a3593a 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.h
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.h
@@ -29,6 +29,9 @@ public:
     String getName() const override { return "NpyRowInputFormat"; }
 
 private:
+    bool supportsCountRows() const override { return true; }
+    size_t countRows(size_t max_block_size) override;
+
     void readPrefix() override;
     bool readRow(MutableColumns & columns, RowReadExtension &) override;
     void readData(MutableColumns & columns);
@@ -59,7 +62,9 @@ public:
     explicit NpySchemaReader(ReadBuffer & in_);
 
 private:
+    std::optional<size_t> readNumberOrRows() override;
     NamesAndTypesList readSchema() override;
+    NumpyHeader header;
 };
 
 }
diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.reference b/tests/queries/0_stateless/02908_Npy_files_caching.reference
new file mode 100644
index 00000000000..75baea619a9
--- /dev/null
+++ b/tests/queries/0_stateless/02908_Npy_files_caching.reference
@@ -0,0 +1,4 @@
+3
+3
+array	Int64					
+3
diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.sh b/tests/queries/0_stateless/02908_Npy_files_caching.sh
new file mode 100755
index 00000000000..1c27d35b1c8
--- /dev/null
+++ b/tests/queries/0_stateless/02908_Npy_files_caching.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=0"
+$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=1"
+$CLICKHOUSE_LOCAL -nm -q "
+desc file('$CURDIR/data_npy/one_dim.npy');
+select number_of_rows from system.schema_inference_cache where format='Npy';
+"

From 6d9b517b194a83c17623b1f018b7c90b863d80a2 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Fri, 3 Nov 2023 17:21:37 +0000
Subject: [PATCH 0171/1097] fix suggestions

---
 src/Processors/Formats/Impl/NpyRowInputFormat.cpp         | 8 +++++++-
 src/Processors/Formats/Impl/NpyRowInputFormat.h           | 1 +
 .../queries/0_stateless/02908_Npy_files_caching.reference | 1 +
 tests/queries/0_stateless/02908_Npy_files_caching.sh      | 1 +
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
index bd8b442f180..7b0c8e275a9 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
@@ -267,7 +267,13 @@ NpyRowInputFormat::NpyRowInputFormat(ReadBuffer & in_, Block header_, Params par
 
 size_t NpyRowInputFormat::countRows(size_t max_block_size)
 {
-    return int(max_block_size) > header.shape[0] ? header.shape[0] : max_block_size;
+    size_t count;
+    if (counted_rows + max_block_size <= size_t(header.shape[0]))
+        count = max_block_size;
+    else
+        count = header.shape[0] - counted_rows;
+    counted_rows += count;
+    return count;
 }
 
 template <typename ColumnValue, typename DataValue>
diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.h b/src/Processors/Formats/Impl/NpyRowInputFormat.h
index 8d228a3593a..faa9dbb2ec3 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.h
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.h
@@ -54,6 +54,7 @@ private:
 
     DataTypePtr nested_type;
     NumpyHeader header;
+    size_t counted_rows = 0;
 };
 
 class NpySchemaReader : public ISchemaReader
diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.reference b/tests/queries/0_stateless/02908_Npy_files_caching.reference
index 75baea619a9..6159dd69b57 100644
--- a/tests/queries/0_stateless/02908_Npy_files_caching.reference
+++ b/tests/queries/0_stateless/02908_Npy_files_caching.reference
@@ -1,4 +1,5 @@
 3
 3
+3
 array	Int64					
 3
diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.sh b/tests/queries/0_stateless/02908_Npy_files_caching.sh
index 1c27d35b1c8..f77351f1180 100755
--- a/tests/queries/0_stateless/02908_Npy_files_caching.sh
+++ b/tests/queries/0_stateless/02908_Npy_files_caching.sh
@@ -6,6 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=0"
 $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=1"
+$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy', auto, 'array Int64') settings optimize_count_from_files=1"
 $CLICKHOUSE_LOCAL -nm -q "
 desc file('$CURDIR/data_npy/one_dim.npy');
 select number_of_rows from system.schema_inference_cache where format='Npy';

From 18484373e36e3b994fefbb31b09e77da67db57ed Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 3 Nov 2023 18:20:04 +0000
Subject: [PATCH 0172/1097] Make test non-dependable on max_threads

---
 tests/queries/0_stateless/02884_parallel_window_functions.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql
index 5e71fadb3ff..383c48e8b39 100644
--- a/tests/queries/0_stateless/02884_parallel_window_functions.sql
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql
@@ -23,7 +23,7 @@ FROM
 )
 GROUP BY nw
 ORDER BY R DESC
-LIMIT 10) where explain ilike '%ScatterByPartitionTransform%';
+LIMIT 10) where explain ilike '%ScatterByPartitionTransform%' SETTINGS max_threads = 4;
 
 -- { echoOn }
 

From 4d7b957e8a7cdccc528ffa2d1937aafcb44bd42e Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 3 Nov 2023 18:31:56 +0000
Subject: [PATCH 0173/1097] Make test non-flaky

---
 .../0_stateless/02884_parallel_window_functions.reference | 6 +++---
 .../0_stateless/02884_parallel_window_functions.sql       | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference
index cab6195b625..bac15838dc2 100644
--- a/tests/queries/0_stateless/02884_parallel_window_functions.reference
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference
@@ -16,7 +16,7 @@ FROM
     GROUP BY ac, nw
 )
 GROUP BY nw
-ORDER BY R DESC
+ORDER BY nw ASC, R DESC
 LIMIT 10;
 0	2	0
 1	2	0
@@ -36,7 +36,7 @@ FROM
     GROUP BY ac, nw
 )
 GROUP BY nw
-ORDER BY R DESC
+ORDER BY nw ASC, R DESC
 LIMIT 10
 SETTINGS max_threads = 1;
 0	2	0
@@ -93,7 +93,7 @@ FROM
         nw
 )
 GROUP BY nw
-ORDER BY R DESC
+ORDER BY nw ASC, R DESC
 LIMIT 10;
 0	2	0
 1	2	0
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql
index 383c48e8b39..3151b42f896 100644
--- a/tests/queries/0_stateless/02884_parallel_window_functions.sql
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql
@@ -22,7 +22,7 @@ FROM
     GROUP BY ac, nw
 )
 GROUP BY nw
-ORDER BY R DESC
+ORDER BY nw ASC, R DESC
 LIMIT 10) where explain ilike '%ScatterByPartitionTransform%' SETTINGS max_threads = 4;
 
 -- { echoOn }
@@ -42,7 +42,7 @@ FROM
     GROUP BY ac, nw
 )
 GROUP BY nw
-ORDER BY R DESC
+ORDER BY nw ASC, R DESC
 LIMIT 10;
 
 SELECT
@@ -60,7 +60,7 @@ FROM
     GROUP BY ac, nw
 )
 GROUP BY nw
-ORDER BY R DESC
+ORDER BY nw ASC, R DESC
 LIMIT 10
 SETTINGS max_threads = 1;
 
@@ -115,5 +115,5 @@ FROM
         nw
 )
 GROUP BY nw
-ORDER BY R DESC
+ORDER BY nw ASC, R DESC
 LIMIT 10;

From 8a6ae6e150271d2a421f7fd3aa95f232d447b582 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Sat, 4 Nov 2023 09:41:01 +0800
Subject: [PATCH 0174/1097] change as request

---
 .../Impl/NativeORCBlockInputFormat.cpp        | 37 ++++++++++++-------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 49379405c26..60d43cc049a 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -783,22 +783,31 @@ static void updateIncludeTypeIds(
             {
                 if (tuple_type->haveExplicitNames())
                 {
-                    const auto & names = tuple_type->getElementNames();
-                    for (size_t tuple_i = 0; tuple_i < names.size(); ++tuple_i)
+                    std::unordered_map<String, size_t> orc_field_name_to_index;
+                    orc_field_name_to_index.reserve(orc_type->getSubtypeCount());
+                    for (size_t struct_i = 0; struct_i < orc_type->getSubtypeCount(); ++struct_i)
                     {
-                        const auto & name = names[tuple_i];
-                        for (size_t struct_i = 0; struct_i < orc_type->getSubtypeCount(); ++struct_i)
+                        String field_name = orc_type->getFieldName(struct_i);
+                        if (ignore_case)
+                            boost::to_lower(field_name);
+
+                        orc_field_name_to_index[field_name] = struct_i;
+                    }
+
+                    const auto & element_names = tuple_type->getElementNames();
+                    for (size_t tuple_i = 0; tuple_i < element_names.size(); ++tuple_i)
+                    {
+                        String element_name = element_names[tuple_i];
+                        if (ignore_case)
+                            boost::to_lower(element_name);
+
+                        if (orc_field_name_to_index.contains(element_name))
                         {
-                            if (boost::equals(orc_type->getFieldName(struct_i), name)
-                                || (ignore_case && boost::iequals(orc_type->getFieldName(struct_i), name)))
-                            {
-                                updateIncludeTypeIds(
-                                    tuple_type->getElement(tuple_i),
-                                    orc_type->getSubtype(struct_i),
-                                    ignore_case,
-                                    include_typeids);
-                                break;
-                            }
+                            updateIncludeTypeIds(
+                                tuple_type->getElement(tuple_i),
+                                orc_type->getSubtype(orc_field_name_to_index[element_name]),
+                                ignore_case,
+                                include_typeids);
                         }
                     }
                 }

From 8e23dd909024ba8109b41b8d54d6e355f11ea0ff Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Sat, 4 Nov 2023 10:08:26 +0800
Subject: [PATCH 0175/1097] add some comments

---
 src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 60d43cc049a..3be4b20524f 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -778,6 +778,9 @@ static void updateIncludeTypeIds(
             return;
         }
         case orc::STRUCT: {
+            /// To make sure tuple field pruning work fine, we should include only the fields of orc struct type which are also contained in CH tuple types, instead of all fields of orc struct type.
+            /// For example, CH tupe type in header is "x Tuple(a String)", ORC struct type is "x struct<a:string, b:long>", then only type id of field "x.a" should be included.
+            /// For tuple field pruning purpose, we should never include "x.b" for it is not required in format header.
             const auto * tuple_type = typeid_cast<const DataTypeTuple *>(non_nullable_type.get());
             if (tuple_type)
             {

From 83689c2a04b60288cbeda25d2c57762180273c29 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 4 Nov 2023 14:35:39 +0000
Subject: [PATCH 0176/1097] Support create and materialized index in the same
 alter query

---
 src/Interpreters/InterpreterAlterQuery.cpp    | 36 +++++++++++--------
 ..._add_index_and_materialize_index.reference |  0
 .../02911_add_index_and_materialize_index.sql | 16 +++++++++
 3 files changed, 38 insertions(+), 14 deletions(-)
 create mode 100644 tests/queries/0_stateless/02911_add_index_and_materialize_index.reference
 create mode 100644 tests/queries/0_stateless/02911_add_index_and_materialize_index.sql

diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index f851607000c..c9a1bd17a46 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -128,10 +128,6 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
         }
         else if (auto mut_command = MutationCommand::parse(command_ast))
         {
-            if (mut_command->type == MutationCommand::MATERIALIZE_TTL && !metadata_snapshot->hasAnyTTL())
-                throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot MATERIALIZE TTL as there is no TTL set for table {}",
-                    table->getStorageID().getNameForLogs());
-
             if (mut_command->type == MutationCommand::UPDATE || mut_command->type == MutationCommand::DELETE)
             {
                 /// TODO: add a check for result query size.
@@ -162,8 +158,30 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
                                                          "to execute ALTERs of different types (replicated and non replicated) in single query");
     }
 
+    if (!alter_commands.empty())
+    {
+        auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout);
+        StorageInMemoryMetadata metadata = table->getInMemoryMetadata();
+        alter_commands.validate(table, getContext());
+        alter_commands.prepare(metadata);
+        table->checkAlterIsPossible(alter_commands, getContext());
+        table->alter(alter_commands, getContext(), alter_lock);
+    }
+
+    /// Get newest metadata_snapshot after execute ALTER command, in order to
+    /// support like materialize index in the same ALTER query that creates it.
+    metadata_snapshot = table->getInMemoryMetadataPtr();
+
     if (mutation_commands.hasNonEmptyMutationCommands())
     {
+        for (const auto & command : mutation_commands)
+        {
+            /// Check it after alter finished, so we can add TTL and materialize TTL in the same ALTER query.
+            if (command.type == MutationCommand::MATERIALIZE_TTL && !metadata_snapshot->hasAnyTTL())
+                throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot MATERIALIZE TTL as there is no TTL set for table {}",
+                    table->getStorageID().getNameForLogs());
+
+        }
         table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
         MutationsInterpreter::Settings settings(false);
         MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate();
@@ -178,16 +196,6 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
             res.pipeline = QueryPipeline(std::move(partition_commands_pipe));
     }
 
-    if (!alter_commands.empty())
-    {
-        auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout);
-        StorageInMemoryMetadata metadata = table->getInMemoryMetadata();
-        alter_commands.validate(table, getContext());
-        alter_commands.prepare(metadata);
-        table->checkAlterIsPossible(alter_commands, getContext());
-        table->alter(alter_commands, getContext(), alter_lock);
-    }
-
     return res;
 }
 
diff --git a/tests/queries/0_stateless/02911_add_index_and_materialize_index.reference b/tests/queries/0_stateless/02911_add_index_and_materialize_index.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02911_add_index_and_materialize_index.sql b/tests/queries/0_stateless/02911_add_index_and_materialize_index.sql
new file mode 100644
index 00000000000..57b144a3a8d
--- /dev/null
+++ b/tests/queries/0_stateless/02911_add_index_and_materialize_index.sql
@@ -0,0 +1,16 @@
+DROP TABLE IF EXISTS index_test;
+
+CREATE TABLE index_test
+(
+	x UInt32,
+	y UInt32,
+	z UInt32
+) ENGINE = MergeTree order by x;
+
+ALTER TABLE index_test
+    ADD INDEX i_x mortonDecode(2, z).1 TYPE minmax GRANULARITY 1,
+    ADD INDEX i_y mortonDecode(2, z).2 TYPE minmax GRANULARITY 1,
+    MATERIALIZE INDEX i_x,
+    MATERIALIZE INDEX i_y;
+
+drop table index_test;

From d8b44dadd5c28a16e627c4c815996af75c37036e Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 4 Nov 2023 16:16:55 +0000
Subject: [PATCH 0177/1097] update test

---
 .../0_stateless/02911_add_index_and_materialize_index.sql       | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02911_add_index_and_materialize_index.sql b/tests/queries/0_stateless/02911_add_index_and_materialize_index.sql
index 57b144a3a8d..f8785ec9a38 100644
--- a/tests/queries/0_stateless/02911_add_index_and_materialize_index.sql
+++ b/tests/queries/0_stateless/02911_add_index_and_materialize_index.sql
@@ -1,3 +1,5 @@
+-- Tags: no-replicated-database
+
 DROP TABLE IF EXISTS index_test;
 
 CREATE TABLE index_test

From f5e439d9dfe32a679f84a6720b461c5a3c5e3a4a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 4 Nov 2023 21:53:05 +0100
Subject: [PATCH 0178/1097] Add an option to enable or disable coverage
 collection in clickhouse-test

---
 tests/clickhouse-test | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 36846a4aeb1..debbb5116da 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1174,7 +1174,7 @@ class TestCase:
 
         description_full += result.description
 
-        if BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+        if args.collect_per_test_coverage and BuildFlags.SANITIZE_COVERAGE in args.build_flags:
             clickhouse_execute(
                 args,
                 f"INSERT INTO system.coverage SELECT now(), '{self.case}', coverage()",
@@ -1248,7 +1248,7 @@ class TestCase:
             )
 
         # We want to calculate per-test code coverage. That's why we reset it before each test.
-        if BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+        if args.collect_per_test_coverage and BuildFlags.SANITIZE_COVERAGE in args.build_flags:
             clickhouse_execute(
                 args,
                 "SYSTEM RESET COVERAGE",
@@ -2363,7 +2363,7 @@ def main(args):
         print(f"Failed to create databases for tests: {e}")
         server_died.set()
 
-    if BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+    if args.collect_per_test_coverage and BuildFlags.SANITIZE_COVERAGE in args.build_flags:
         clickhouse_execute(
             args,
             """
@@ -2726,6 +2726,12 @@ def parse_args():
         default=False,
         help="Check what high-level server components were covered by tests",
     )
+    parser.add_argument(
+        "--collect-per-test-coverage",
+        action="store_true",
+        default=False,
+        help="Create `system.coverage` table on the server and collect information about low-level code coverage on a per test basis there",
+    )
     parser.add_argument(
         "--report-logs-stats",
         action="store_true",

From 501d2106473f7963d10eb37554c42cbab7d5fe4d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 4 Nov 2023 21:04:39 +0000
Subject: [PATCH 0179/1097] Automatic style fix

---
 tests/clickhouse-test | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index debbb5116da..a1a270ec033 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1174,7 +1174,10 @@ class TestCase:
 
         description_full += result.description
 
-        if args.collect_per_test_coverage and BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+        if (
+            args.collect_per_test_coverage
+            and BuildFlags.SANITIZE_COVERAGE in args.build_flags
+        ):
             clickhouse_execute(
                 args,
                 f"INSERT INTO system.coverage SELECT now(), '{self.case}', coverage()",
@@ -1248,7 +1251,10 @@ class TestCase:
             )
 
         # We want to calculate per-test code coverage. That's why we reset it before each test.
-        if args.collect_per_test_coverage and BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+        if (
+            args.collect_per_test_coverage
+            and BuildFlags.SANITIZE_COVERAGE in args.build_flags
+        ):
             clickhouse_execute(
                 args,
                 "SYSTEM RESET COVERAGE",
@@ -2363,7 +2369,10 @@ def main(args):
         print(f"Failed to create databases for tests: {e}")
         server_died.set()
 
-    if args.collect_per_test_coverage and BuildFlags.SANITIZE_COVERAGE in args.build_flags:
+    if (
+        args.collect_per_test_coverage
+        and BuildFlags.SANITIZE_COVERAGE in args.build_flags
+    ):
         clickhouse_execute(
             args,
             """

From cc5179078c3c01bc797732edfabf852befcfaf2f Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sun, 5 Nov 2023 02:29:15 +0000
Subject: [PATCH 0180/1097] remove unused code

Fix
---
 src/Interpreters/InterpreterAlterQuery.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index c9a1bd17a46..54b4334eda9 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -105,7 +105,6 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
     if (table->isStaticStorage())
         throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only");
     auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
-    auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
     /// Add default database to table identifiers that we can encounter in e.g. default expressions, mutation expression, etc.
     AddDefaultDatabaseVisitor visitor(getContext(), table_id.getDatabaseName());
@@ -170,7 +169,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
 
     /// Get newest metadata_snapshot after execute ALTER command, in order to
     /// support like materialize index in the same ALTER query that creates it.
-    metadata_snapshot = table->getInMemoryMetadataPtr();
+    auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
     if (mutation_commands.hasNonEmptyMutationCommands())
     {

From 8182d51978727e9e91b80764151228b9b3daf046 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 01:11:53 +0100
Subject: [PATCH 0181/1097] Add table `system.symbols`

---
 src/Storages/System/StorageSystemSymbols.cpp | 111 +++++++++++++++++++
 src/Storages/System/StorageSystemSymbols.h   |  33 ++++++
 src/Storages/System/attachSystemTables.cpp   |   2 +
 3 files changed, 146 insertions(+)
 create mode 100644 src/Storages/System/StorageSystemSymbols.cpp
 create mode 100644 src/Storages/System/StorageSystemSymbols.h

diff --git a/src/Storages/System/StorageSystemSymbols.cpp b/src/Storages/System/StorageSystemSymbols.cpp
new file mode 100644
index 00000000000..62d8b7ca58f
--- /dev/null
+++ b/src/Storages/System/StorageSystemSymbols.cpp
@@ -0,0 +1,111 @@
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Storages/System/StorageSystemSymbols.h>
+#include <Storages/System/getQueriedColumnsMaskAndHeader.h>
+#include <Access/ContextAccess.h>
+#include <Interpreters/Context.h>
+#include <Processors/ISource.h>
+#include <QueryPipeline/Pipe.h>
+#include <Common/SymbolIndex.h>
+
+
+namespace DB
+{
+
+
+StorageSystemSymbols::StorageSystemSymbols(const StorageID & table_id_)
+    : IStorage(table_id_)
+{
+    StorageInMemoryMetadata storage_metadata;
+    storage_metadata.setColumns(ColumnsDescription(
+    {
+        {"symbol", std::make_shared<DataTypeString>()},
+        {"address_begin", std::make_shared<DataTypeUInt64>()},
+        {"address_end", std::make_shared<DataTypeUInt64>()},
+    }));
+    setInMemoryMetadata(storage_metadata);
+}
+
+
+namespace
+{
+
+class SymbolsBlockSource : public ISource
+{
+private:
+    using Iterator = std::vector<SymbolIndex::Symbol>::const_iterator;
+    Iterator it;
+    const Iterator end;
+    std::vector<UInt8> columns_mask;
+    UInt64 max_block_size;
+
+public:
+    SymbolsBlockSource(
+        Iterator begin_,
+        Iterator end_,
+        std::vector<UInt8> columns_mask_,
+        Block header,
+        UInt64 max_block_size_)
+        : ISource(std::move(header))
+        , it(begin_), end(end_), columns_mask(std::move(columns_mask_)), max_block_size(max_block_size_)
+    {
+    }
+
+    String getName() const override { return "Symbols"; }
+
+protected:
+    Chunk generate() override
+    {
+        if (it == end)
+            return {};
+
+        MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns();
+
+        size_t rows_count = 0;
+        while (rows_count < max_block_size && it != end)
+        {
+            size_t src_index = 0;
+            size_t res_index = 0;
+
+            if (columns_mask[src_index++])
+                res_columns[res_index++]->insert(it->name);
+            if (columns_mask[src_index++])
+                res_columns[res_index++]->insert(reinterpret_cast<uintptr_t>(it->address_begin));
+            if (columns_mask[src_index++])
+                res_columns[res_index++]->insert(reinterpret_cast<uintptr_t>(it->address_end));
+
+            ++rows_count;
+            ++it;
+        }
+
+        return Chunk(std::move(res_columns), rows_count);
+    }
+};
+
+}
+
+
+Pipe StorageSystemSymbols::read(
+    const Names & column_names,
+    const StorageSnapshotPtr & storage_snapshot,
+    SelectQueryInfo & /* query_info */,
+    ContextPtr context,
+    QueryProcessingStage::Enum /*processed_stage*/,
+    const size_t max_block_size,
+    const size_t /*num_streams*/)
+{
+    context->getAccess()->checkAccess(AccessType::INTROSPECTION);
+
+    storage_snapshot->check(column_names);
+    Block sample_block = storage_snapshot->metadata->getSampleBlock();
+    auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names);
+
+    const auto & symbols = SymbolIndex::instance().symbols();
+
+    return Pipe(std::make_shared<SymbolsBlockSource>(
+        symbols.cbegin(), symbols.cend(), std::move(columns_mask), std::move(res_block), max_block_size));
+}
+
+}
diff --git a/src/Storages/System/StorageSystemSymbols.h b/src/Storages/System/StorageSystemSymbols.h
new file mode 100644
index 00000000000..808c406b91d
--- /dev/null
+++ b/src/Storages/System/StorageSystemSymbols.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <Storages/IStorage.h>
+
+
+namespace DB
+{
+
+class Context;
+
+
+/** Implements the system table `symbols` for introspection of symbols in the ClickHouse binary.
+  */
+class StorageSystemSymbols final : public IStorage
+{
+public:
+    explicit StorageSystemSymbols(const StorageID & table_id_);
+
+    std::string getName() const override { return "SystemSymbols"; }
+
+    Pipe read(
+        const Names & column_names,
+        const StorageSnapshotPtr & storage_snapshot,
+        SelectQueryInfo & query_info,
+        ContextPtr context,
+        QueryProcessingStage::Enum processed_stage,
+        size_t max_block_size,
+        size_t num_streams) override;
+
+    bool isSystemStorage() const override { return true; }
+};
+
+}
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index ffa225fb929..56525f5b948 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -85,6 +85,7 @@
 #include <Storages/System/StorageSystemJemalloc.h>
 #include <Storages/System/StorageSystemScheduler.h>
 #include <Storages/System/StorageSystemS3Queue.h>
+#include <Storages/System/StorageSystemSymbols.h>
 
 #if USE_RDKAFKA
 #include <Storages/System/StorageSystemKafkaConsumers.h>
@@ -151,6 +152,7 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database)
     attach<StorageSystemSchemaInferenceCache>(context, system_database, "schema_inference_cache");
     attach<StorageSystemDroppedTables>(context, system_database, "dropped_tables");
     attach<StorageSystemScheduler>(context, system_database, "scheduler");
+    attach<StorageSystemSymbols>(context, system_database, "symbols");
 #if USE_RDKAFKA
     attach<StorageSystemKafkaConsumers>(context, system_database, "kafka_consumers");
 #endif

From 99fe7a46fcb1b9bdb3429f0c3d721c80707053fe Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 01:12:13 +0100
Subject: [PATCH 0182/1097] Minor changes

---
 src/Storages/System/StorageSystemParts.cpp     |  9 +++------
 src/Storages/System/StorageSystemTables.cpp    | 12 ++++++++----
 src/Storages/System/StorageSystemTimeZones.cpp |  3 ++-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index ac38c9c97b1..d8d85725e21 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -1,25 +1,22 @@
-#include "StorageSystemParts.h"
+#include <Storages/System/StorageSystemParts.h>
 #include <atomic>
 #include <memory>
 #include <string_view>
 
-#include <Common/escapeForFileName.h>
-#include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeUUID.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Databases/IDatabase.h>
 #include <Parsers/queryToString.h>
-#include <base/hex.h>
 #include <Interpreters/TransactionVersionMetadata.h>
 #include <Interpreters/Context.h>
 
+
 namespace
 {
+
 std::string_view getRemovalStateDescription(DB::DataPartRemovalState state)
 {
     switch (state)
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index 715c98ee92a..d888813f6ce 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -7,7 +7,6 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/VirtualColumnUtils.h>
-#include <Databases/IDatabase.h>
 #include <Access/ContextAccess.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/formatWithPossiblyHidingSecrets.h>
@@ -70,7 +69,10 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_)
 }
 
 
-static ColumnPtr getFilteredDatabases(const SelectQueryInfo & query_info, ContextPtr context)
+namespace
+{
+
+ColumnPtr getFilteredDatabases(const SelectQueryInfo & query_info, ContextPtr context)
 {
     MutableColumnPtr column = ColumnString::create();
 
@@ -88,7 +90,7 @@ static ColumnPtr getFilteredDatabases(const SelectQueryInfo & query_info, Contex
     return block.getByPosition(0).column;
 }
 
-static ColumnPtr getFilteredTables(const ASTPtr & query, const ColumnPtr & filtered_databases_column, ContextPtr context)
+ColumnPtr getFilteredTables(const ASTPtr & query, const ColumnPtr & filtered_databases_column, ContextPtr context)
 {
     MutableColumnPtr column = ColumnString::create();
 
@@ -110,7 +112,7 @@ static ColumnPtr getFilteredTables(const ASTPtr & query, const ColumnPtr & filte
 
 /// Avoid heavy operation on tables if we only queried columns that we can get without table object.
 /// Otherwise it will require table initialization for Lazy database.
-static bool needTable(const DatabasePtr & database, const Block & header)
+bool needTable(const DatabasePtr & database, const Block & header)
 {
     if (database->getEngineName() != "Lazy")
         return true;
@@ -602,6 +604,8 @@ private:
     std::string database_name;
 };
 
+}
+
 
 Pipe StorageSystemTables::read(
     const Names & column_names,
diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp
index dc3711812a6..e0d7d2a5c42 100644
--- a/src/Storages/System/StorageSystemTimeZones.cpp
+++ b/src/Storages/System/StorageSystemTimeZones.cpp
@@ -10,7 +10,8 @@ namespace DB
 {
 NamesAndTypesList StorageSystemTimeZones::getNamesAndTypes()
 {
-    return {
+    return
+    {
         {"time_zone", std::make_shared<DataTypeString>()},
     };
 }

From 995e8a91e535af1de62bc6551f2186278b17699d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 01:12:29 +0100
Subject: [PATCH 0183/1097] Remove fat symbols

---
 src/Core/callOnTypeIndex.h           | 9 +++++----
 src/Functions/array/arrayElement.cpp | 8 ++++++--
 src/Functions/castTypeToEither.h     | 6 ++++--
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h
index 39ce37c4c13..c6a9e4c842b 100644
--- a/src/Core/callOnTypeIndex.h
+++ b/src/Core/callOnTypeIndex.h
@@ -4,6 +4,7 @@
 
 #include <Core/Types.h>
 
+
 namespace DB
 {
 
@@ -16,7 +17,7 @@ struct TypePair
 
 
 template <typename T, bool _int, bool _float, bool _decimal, bool _datetime, typename F>
-bool callOnBasicType(TypeIndex number, F && f)
+static bool NO_INLINE callOnBasicType(TypeIndex number, F && f)
 {
     if constexpr (_int)
     {
@@ -86,7 +87,7 @@ bool callOnBasicType(TypeIndex number, F && f)
 
 /// Unroll template using TypeIndex
 template <bool _int, bool _float, bool _decimal, bool _datetime, typename F>
-inline bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f)
+static NO_INLINE bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f)
 {
     if constexpr (_int)
     {
@@ -170,7 +171,7 @@ template <is_decimal T> class DataTypeDecimal;
 
 
 template <typename T, typename F, typename... ExtraArgs>
-bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args)
+static NO_INLINE bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args)
 {
     switch (number)
     {
@@ -219,7 +220,7 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args)
 }
 
 template <typename F>
-static bool callOnTwoTypeIndexes(TypeIndex left_type, TypeIndex right_type, F && func)
+static NO_INLINE bool callOnTwoTypeIndexes(TypeIndex left_type, TypeIndex right_type, F && func)
 {
     return callOnIndexAndDataType<void>(left_type, [&](const auto & left_types) -> bool
     {
diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp
index d7c29070c91..fe4353a8878 100644
--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@@ -31,6 +31,9 @@ namespace ErrorCodes
     extern const int ZERO_ARRAY_OR_TUPLE_INDEX;
 }
 
+namespace
+{
+
 namespace ArrayImpl
 {
     class NullMapBuilder;
@@ -130,7 +133,6 @@ class NullMapBuilder
 {
 public:
     explicit operator bool() const { return src_null_map; }
-    bool operator!() const { return !src_null_map; }
 
     void initSource(const UInt8 * src_null_map_)
     {
@@ -949,7 +951,7 @@ static constexpr bool areConvertibleTypes =
             && std::is_convertible_v<FromType, ToType>);
 
 template <typename F>
-static bool castColumnNumeric(const IColumn * column, F && f)
+static NO_INLINE bool castColumnNumeric(const IColumn * column, F && f)
 {
     return castTypeToEither<
         ColumnVector<UInt8>,
@@ -1250,6 +1252,8 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments
     return res;
 }
 
+}
+
 
 REGISTER_FUNCTION(ArrayElement)
 {
diff --git a/src/Functions/castTypeToEither.h b/src/Functions/castTypeToEither.h
index aa8330366f1..e2e30c455ab 100644
--- a/src/Functions/castTypeToEither.h
+++ b/src/Functions/castTypeToEither.h
@@ -5,15 +5,17 @@
 
 namespace DB
 {
+
 template <typename... Ts, typename T, typename F>
-static bool castTypeToEither(const T * type, F && f)
+static NO_INLINE bool castTypeToEither(const T * type, F && f)
 {
     return ((typeid_cast<const Ts *>(type) && f(*typeid_cast<const Ts *>(type))) || ...);
 }
 
 template <class ...Args>
-constexpr bool castTypeToEither(TypeList<Args...>, const auto * type, auto && f)
+static NO_INLINE bool castTypeToEither(TypeList<Args...>, const auto * type, auto && f)
 {
     return ((typeid_cast<const Args *>(type) != nullptr && std::forward<decltype(f)>(f)(*typeid_cast<const Args *>(type))) || ...);
 }
+
 }

From 9976006ee6700e7f4aa08b285889051ac726a99b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 01:17:42 +0100
Subject: [PATCH 0184/1097] Add a test

---
 tests/queries/0_stateless/02911_system_symbols.reference | 1 +
 tests/queries/0_stateless/02911_system_symbols.sql       | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 tests/queries/0_stateless/02911_system_symbols.reference
 create mode 100644 tests/queries/0_stateless/02911_system_symbols.sql

diff --git a/tests/queries/0_stateless/02911_system_symbols.reference b/tests/queries/0_stateless/02911_system_symbols.reference
new file mode 100644
index 00000000000..df30df3ce57
--- /dev/null
+++ b/tests/queries/0_stateless/02911_system_symbols.reference
@@ -0,0 +1 @@
+DB::StorageSystemSymbols::StorageSystemSymbols(DB::StorageID const&)
diff --git a/tests/queries/0_stateless/02911_system_symbols.sql b/tests/queries/0_stateless/02911_system_symbols.sql
new file mode 100644
index 00000000000..398ce77fbd4
--- /dev/null
+++ b/tests/queries/0_stateless/02911_system_symbols.sql
@@ -0,0 +1 @@
+SELECT demangle(symbol) AS x FROM system.symbols WHERE symbol LIKE '%StorageSystemSymbols%' ORDER BY x LIMIT 1 SETTINGS allow_introspection_functions = 1;

From 2d1351c3a63e863e18c327ba1813729690724c3d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 01:34:06 +0100
Subject: [PATCH 0185/1097] Remove useless header

---
 .../AggregateFunctionSumMap.cpp               | 645 ++++++++++++++++-
 .../AggregateFunctionSumMap.h                 | 656 ------------------
 src/Functions/if.cpp                          |  12 +-
 3 files changed, 647 insertions(+), 666 deletions(-)
 delete mode 100644 src/AggregateFunctions/AggregateFunctionSumMap.h

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index 8204db3ff10..ecc8a978388 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -1,9 +1,23 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionSumMap.h>
 #include <AggregateFunctions/Helpers.h>
-#include <AggregateFunctions/FactoryHelpers.h>
 #include <Functions/FunctionHelpers.h>
-#include <IO/WriteHelpers.h>
+
+#include <IO/ReadHelpers.h>
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeNullable.h>
+
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnString.h>
+
+#include <Common/FieldVisitorSum.h>
+#include <Common/assert_cast.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include <map>
 
 
 namespace DB
@@ -12,13 +26,636 @@ struct Settings;
 
 namespace ErrorCodes
 {
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int LOGICAL_ERROR;
 }
 
 namespace
 {
 
+template <typename T>
+struct AggregateFunctionMapData
+{
+    // Map needs to be ordered to maintain function properties
+    std::map<T, Array> merged_maps;
+};
+
+/** Aggregate function, that takes at least two arguments: keys and values, and as a result, builds a tuple of at least 2 arrays -
+  * ordered keys and variable number of argument values aggregated by corresponding keys.
+  *
+  * sumMap function is the most useful when using SummingMergeTree to sum Nested columns, which name ends in "Map".
+  *
+  * Example: sumMap(k, v...) of:
+  *  k           v
+  *  [1,2,3]     [10,10,10]
+  *  [3,4,5]     [10,10,10]
+  *  [4,5,6]     [10,10,10]
+  *  [6,7,8]     [10,10,10]
+  *  [7,5,3]     [5,15,25]
+  *  [8,9,10]    [20,20,20]
+  * will return:
+  *  ([1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20])
+  *
+  * minMap and maxMap share the same idea, but calculate min and max correspondingly.
+  *
+  * NOTE: The implementation of these functions are "amateur grade" - not efficient and low quality.
+  */
+
+template <typename T, typename Derived, typename Visitor, bool overflow, bool tuple_argument, bool compact>
+class AggregateFunctionMapBase : public IAggregateFunctionDataHelper<
+    AggregateFunctionMapData<NearestFieldType<T>>, Derived>
+{
+private:
+    static constexpr auto STATE_VERSION_1_MIN_REVISION = 54452;
+
+    DataTypePtr keys_type;
+    SerializationPtr keys_serialization;
+    DataTypes values_types;
+    Serializations values_serializations;
+    Serializations promoted_values_serializations;
+
+public:
+    using Base = IAggregateFunctionDataHelper<
+        AggregateFunctionMapData<NearestFieldType<T>>, Derived>;
+
+    AggregateFunctionMapBase(const DataTypePtr & keys_type_,
+            const DataTypes & values_types_, const DataTypes & argument_types_)
+        : Base(argument_types_, {} /* parameters */, createResultType(keys_type_, values_types_, getName()))
+        , keys_type(keys_type_)
+        , keys_serialization(keys_type->getDefaultSerialization())
+        , values_types(values_types_)
+    {
+        values_serializations.reserve(values_types.size());
+        promoted_values_serializations.reserve(values_types.size());
+        for (const auto & type : values_types)
+        {
+            values_serializations.emplace_back(type->getDefaultSerialization());
+            if (type->canBePromoted())
+            {
+                if (type->isNullable())
+                    promoted_values_serializations.emplace_back(
+                         makeNullable(removeNullable(type)->promoteNumericType())->getDefaultSerialization());
+                else
+                    promoted_values_serializations.emplace_back(type->promoteNumericType()->getDefaultSerialization());
+            }
+            else
+            {
+                promoted_values_serializations.emplace_back(type->getDefaultSerialization());
+            }
+        }
+    }
+
+    bool isVersioned() const override { return true; }
+
+    size_t getDefaultVersion() const override { return 1; }
+
+    size_t getVersionFromRevision(size_t revision) const override
+    {
+        if (revision >= STATE_VERSION_1_MIN_REVISION)
+            return 1;
+        else
+            return 0;
+    }
+
+    static DataTypePtr createResultType(
+        const DataTypePtr & keys_type_,
+        const DataTypes & values_types_,
+        const String & name_)
+    {
+        DataTypes types;
+        types.emplace_back(std::make_shared<DataTypeArray>(keys_type_));
+
+        for (const auto & value_type : values_types_)
+        {
+            if constexpr (std::is_same_v<Visitor, FieldVisitorSum>)
+            {
+                if (!value_type->isSummable())
+                    throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Values for {} cannot be summed, passed type {}",
+                        name_, value_type->getName()};
+            }
+
+            DataTypePtr result_type;
+
+            if constexpr (overflow)
+            {
+                if (value_type->onlyNull())
+                    throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Cannot calculate {} of type {}",
+                        name_, value_type->getName()};
+
+                // Overflow, meaning that the returned type is the same as
+                // the input type. Nulls are skipped.
+                result_type = removeNullable(value_type);
+            }
+            else
+            {
+                auto value_type_without_nullable = removeNullable(value_type);
+
+                // No overflow, meaning we promote the types if necessary.
+                if (!value_type_without_nullable->canBePromoted())
+                    throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Values for {} are expected to be Numeric, Float or Decimal, passed type {}",
+                        name_, value_type->getName()};
+
+                WhichDataType value_type_to_check(value_type_without_nullable);
+
+                /// Do not promote decimal because of implementation issues of this function design
+                /// Currently we cannot get result column type in case of decimal we cannot get decimal scale
+                /// in method void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+                /// If we decide to make this function more efficient we should promote decimal type during summ
+                if (value_type_to_check.isDecimal())
+                    result_type = value_type_without_nullable;
+                else
+                    result_type = value_type_without_nullable->promoteNumericType();
+            }
+
+            types.emplace_back(std::make_shared<DataTypeArray>(result_type));
+        }
+
+        return std::make_shared<DataTypeTuple>(types);
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    static const auto & getArgumentColumns(const IColumn**& columns)
+    {
+        if constexpr (tuple_argument)
+        {
+            return assert_cast<const ColumnTuple *>(columns[0])->getColumns();
+        }
+        else
+        {
+            return columns;
+        }
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns_, const size_t row_num, Arena *) const override
+    {
+        const auto & columns = getArgumentColumns(columns_);
+
+        // Column 0 contains array of keys of known type
+        const ColumnArray & array_column0 = assert_cast<const ColumnArray &>(*columns[0]);
+        const IColumn::Offsets & offsets0 = array_column0.getOffsets();
+        const IColumn & key_column = array_column0.getData();
+        const size_t keys_vec_offset = offsets0[row_num - 1];
+        const size_t keys_vec_size = (offsets0[row_num] - keys_vec_offset);
+
+        // Columns 1..n contain arrays of numeric values to sum
+        auto & merged_maps = this->data(place).merged_maps;
+        for (size_t col = 0, size = values_types.size(); col < size; ++col)
+        {
+            const auto & array_column = assert_cast<const ColumnArray &>(*columns[col + 1]);
+            const IColumn & value_column = array_column.getData();
+            const IColumn::Offsets & offsets = array_column.getOffsets();
+            const size_t values_vec_offset = offsets[row_num - 1];
+            const size_t values_vec_size = (offsets[row_num] - values_vec_offset);
+
+            // Expect key and value arrays to be of same length
+            if (keys_vec_size != values_vec_size)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sizes of keys and values arrays do not match");
+
+            // Insert column values for all keys
+            for (size_t i = 0; i < keys_vec_size; ++i)
+            {
+                auto value = value_column[values_vec_offset + i];
+                T key = static_cast<T>(key_column[keys_vec_offset + i].get<T>());
+
+                if (!keepKey(key))
+                    continue;
+
+                decltype(merged_maps.begin()) it;
+                if constexpr (is_decimal<T>)
+                {
+                    // FIXME why is storing NearestFieldType not enough, and we
+                    // have to check for decimals again here?
+                    UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getScale();
+                    it = merged_maps.find(DecimalField<T>(key, scale));
+                }
+                else
+                    it = merged_maps.find(key);
+
+                if (it != merged_maps.end())
+                {
+                    if (!value.isNull())
+                    {
+                        if (it->second[col].isNull())
+                            it->second[col] = value;
+                        else
+                            applyVisitor(Visitor(value), it->second[col]);
+                    }
+                }
+                else
+                {
+                    // Create a value array for this key
+                    Array new_values;
+                    new_values.resize(size);
+                    new_values[col] = value;
+
+                    if constexpr (is_decimal<T>)
+                    {
+                        UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getScale();
+                        merged_maps.emplace(DecimalField<T>(key, scale), std::move(new_values));
+                    }
+                    else
+                    {
+                        merged_maps.emplace(key, std::move(new_values));
+                    }
+                }
+            }
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        auto & merged_maps = this->data(place).merged_maps;
+        const auto & rhs_maps = this->data(rhs).merged_maps;
+
+        for (const auto & elem : rhs_maps)
+        {
+            const auto & it = merged_maps.find(elem.first);
+            if (it != merged_maps.end())
+            {
+                for (size_t col = 0; col < values_types.size(); ++col)
+                    if (!elem.second[col].isNull())
+                        applyVisitor(Visitor(elem.second[col]), it->second[col]);
+            }
+            else
+                merged_maps[elem.first] = elem.second;
+        }
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
+    {
+        if (!version)
+            version = getDefaultVersion();
+
+        const auto & merged_maps = this->data(place).merged_maps;
+        size_t size = merged_maps.size();
+        writeVarUInt(size, buf);
+
+        std::function<void(size_t, const Array &)> serialize;
+        switch (*version)
+        {
+            case 0:
+            {
+                serialize = [&](size_t col_idx, const Array & values){ values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {}); };
+                break;
+            }
+            case 1:
+            {
+                serialize = [&](size_t col_idx, const Array & values){ promoted_values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {}); };
+                break;
+            }
+        }
+
+        for (const auto & elem : merged_maps)
+        {
+            keys_serialization->serializeBinary(elem.first, buf, {});
+            for (size_t col = 0; col < values_types.size(); ++col)
+                serialize(col, elem.second);
+        }
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version, Arena *) const override
+    {
+        if (!version)
+            version = getDefaultVersion();
+
+        auto & merged_maps = this->data(place).merged_maps;
+        size_t size = 0;
+        readVarUInt(size, buf);
+
+        std::function<void(size_t, Array &)> deserialize;
+        switch (*version)
+        {
+            case 0:
+            {
+                deserialize = [&](size_t col_idx, Array & values){ values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {}); };
+                break;
+            }
+            case 1:
+            {
+                deserialize = [&](size_t col_idx, Array & values){ promoted_values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {}); };
+                break;
+            }
+        }
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            Field key;
+            keys_serialization->deserializeBinary(key, buf, {});
+
+            Array values;
+            values.resize(values_types.size());
+
+            for (size_t col = 0; col < values_types.size(); ++col)
+                deserialize(col, values);
+
+            if constexpr (is_decimal<T>)
+                merged_maps[key.get<DecimalField<T>>()] = values;
+            else
+                merged_maps[key.get<T>()] = values;
+        }
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        size_t num_columns = values_types.size();
+
+        // Final step does compaction of keys that have zero values, this mutates the state
+        auto & merged_maps = this->data(place).merged_maps;
+
+        // Remove keys which are zeros or empty. This should be enabled only for sumMap.
+        if constexpr (compact)
+        {
+            for (auto it = merged_maps.cbegin(); it != merged_maps.cend();)
+            {
+                // Key is not compacted if it has at least one non-zero value
+                bool erase = true;
+                for (size_t col = 0; col < num_columns; ++col)
+                {
+                    if (!it->second[col].isNull() && it->second[col] != values_types[col]->getDefault())
+                    {
+                        erase = false;
+                        break;
+                    }
+                }
+
+                if (erase)
+                    it = merged_maps.erase(it);
+                else
+                    ++it;
+            }
+        }
+
+        size_t size = merged_maps.size();
+
+        auto & to_tuple = assert_cast<ColumnTuple &>(to);
+        auto & to_keys_arr = assert_cast<ColumnArray &>(to_tuple.getColumn(0));
+        auto & to_keys_col = to_keys_arr.getData();
+
+        // Advance column offsets
+        auto & to_keys_offsets = to_keys_arr.getOffsets();
+        to_keys_offsets.push_back(to_keys_offsets.back() + size);
+        to_keys_col.reserve(size);
+
+        for (size_t col = 0; col < num_columns; ++col)
+        {
+            auto & to_values_arr = assert_cast<ColumnArray &>(to_tuple.getColumn(col + 1));
+            auto & to_values_offsets = to_values_arr.getOffsets();
+            to_values_offsets.push_back(to_values_offsets.back() + size);
+            to_values_arr.getData().reserve(size);
+        }
+
+        // Write arrays of keys and values
+        for (const auto & elem : merged_maps)
+        {
+            // Write array of keys into column
+            to_keys_col.insert(elem.first);
+
+            // Write 0..n arrays of values
+            for (size_t col = 0; col < num_columns; ++col)
+            {
+                auto & to_values_col = assert_cast<ColumnArray &>(to_tuple.getColumn(col + 1)).getData();
+                if (elem.second[col].isNull())
+                    to_values_col.insertDefault();
+                else
+                    to_values_col.insert(elem.second[col]);
+            }
+        }
+    }
+
+    bool keepKey(const T & key) const { return static_cast<const Derived &>(*this).keepKey(key); }
+    String getName() const override { return Derived::getNameImpl(); }
+};
+
+template <typename T, bool overflow, bool tuple_argument>
+class AggregateFunctionSumMap final :
+    public AggregateFunctionMapBase<T, AggregateFunctionSumMap<T, overflow, tuple_argument>, FieldVisitorSum, overflow, tuple_argument, true>
+{
+private:
+    using Self = AggregateFunctionSumMap<T, overflow, tuple_argument>;
+    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorSum, overflow, tuple_argument, true>;
+
+public:
+    AggregateFunctionSumMap(const DataTypePtr & keys_type_,
+            DataTypes & values_types_, const DataTypes & argument_types_,
+            const Array & params_)
+        : Base{keys_type_, values_types_, argument_types_}
+    {
+        // The constructor accepts parameters to have a uniform interface with
+        // sumMapFiltered, but this function doesn't have any parameters.
+        assertNoParameters(getNameImpl(), params_);
+    }
+
+    static String getNameImpl()
+    {
+        if constexpr (overflow)
+        {
+            return "sumMapWithOverflow";
+        }
+        else
+        {
+            return "sumMap";
+        }
+    }
+
+    bool keepKey(const T &) const { return true; }
+};
+
+
+template <typename T, bool overflow, bool tuple_argument>
+class AggregateFunctionSumMapFiltered final :
+    public AggregateFunctionMapBase<T,
+        AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>,
+        FieldVisitorSum,
+        overflow,
+        tuple_argument,
+        true>
+{
+private:
+    using Self = AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>;
+    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorSum, overflow, tuple_argument, true>;
+
+    using ContainerT = std::unordered_set<T>;
+
+    ContainerT keys_to_keep;
+
+public:
+    AggregateFunctionSumMapFiltered(const DataTypePtr & keys_type_,
+            const DataTypes & values_types_, const DataTypes & argument_types_,
+            const Array & params_)
+        : Base{keys_type_, values_types_, argument_types_}
+    {
+        if (params_.size() != 1)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Aggregate function '{}' requires exactly one parameter "
+                "of Array type", getNameImpl());
+
+        Array keys_to_keep_values;
+        if (!params_.front().tryGet<Array>(keys_to_keep_values))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Aggregate function {} requires an Array as a parameter",
+                getNameImpl());
+
+        this->parameters = params_;
+
+        keys_to_keep.reserve(keys_to_keep_values.size());
+
+        for (const Field & f : keys_to_keep_values)
+            keys_to_keep.emplace(f.safeGet<T>());
+    }
+
+    static String getNameImpl()
+    {
+        if constexpr (overflow)
+        {
+            return "sumMapFilteredWithOverflow";
+        }
+        else
+        {
+            return "sumMapFiltered";
+        }
+    }
+
+    bool keepKey(const T & key) const { return keys_to_keep.count(key); }
+};
+
+
+/** Implements `Max` operation.
+ *  Returns true if changed
+ */
+class FieldVisitorMax : public StaticVisitor<bool>
+{
+private:
+    const Field & rhs;
+
+    template <typename FieldType>
+    bool compareImpl(FieldType & x) const
+    {
+        auto val = rhs.get<FieldType>();
+        if (val > x)
+        {
+            x = val;
+            return true;
+        }
+
+        return false;
+    }
+
+public:
+    explicit FieldVisitorMax(const Field & rhs_) : rhs(rhs_) {}
+
+    bool operator() (Null &) const
+    {
+        /// Do not update current value, skip nulls
+        return false;
+    }
+
+    bool operator() (AggregateFunctionStateData &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot compare AggregateFunctionStates"); }
+
+    bool operator() (Array & x) const { return compareImpl<Array>(x); }
+    bool operator() (Tuple & x) const { return compareImpl<Tuple>(x); }
+    template <typename T>
+    bool operator() (DecimalField<T> & x) const { return compareImpl<DecimalField<T>>(x); }
+    template <typename T>
+    bool operator() (T & x) const { return compareImpl<T>(x); }
+};
+
+/** Implements `Min` operation.
+ *  Returns true if changed
+ */
+class FieldVisitorMin : public StaticVisitor<bool>
+{
+private:
+    const Field & rhs;
+
+    template <typename FieldType>
+    bool compareImpl(FieldType & x) const
+    {
+        auto val = rhs.get<FieldType>();
+        if (val < x)
+        {
+            x = val;
+            return true;
+        }
+
+        return false;
+    }
+
+public:
+    explicit FieldVisitorMin(const Field & rhs_) : rhs(rhs_) {}
+
+
+    bool operator() (Null &) const
+    {
+        /// Do not update current value, skip nulls
+        return false;
+    }
+
+    bool operator() (AggregateFunctionStateData &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot sum AggregateFunctionStates"); }
+
+    bool operator() (Array & x) const { return compareImpl<Array>(x); }
+    bool operator() (Tuple & x) const { return compareImpl<Tuple>(x); }
+    template <typename T>
+    bool operator() (DecimalField<T> & x) const { return compareImpl<DecimalField<T>>(x); }
+    template <typename T>
+    bool operator() (T & x) const { return compareImpl<T>(x); }
+};
+
+
+template <typename T, bool tuple_argument>
+class AggregateFunctionMinMap final :
+    public AggregateFunctionMapBase<T, AggregateFunctionMinMap<T, tuple_argument>, FieldVisitorMin, true, tuple_argument, false>
+{
+private:
+    using Self = AggregateFunctionMinMap<T, tuple_argument>;
+    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorMin, true, tuple_argument, false>;
+
+public:
+    AggregateFunctionMinMap(const DataTypePtr & keys_type_,
+            DataTypes & values_types_, const DataTypes & argument_types_,
+            const Array & params_)
+        : Base{keys_type_, values_types_, argument_types_}
+    {
+        // The constructor accepts parameters to have a uniform interface with
+        // sumMapFiltered, but this function doesn't have any parameters.
+        assertNoParameters(getNameImpl(), params_);
+    }
+
+    static String getNameImpl() { return "minMap"; }
+
+    bool keepKey(const T &) const { return true; }
+};
+
+template <typename T, bool tuple_argument>
+class AggregateFunctionMaxMap final :
+    public AggregateFunctionMapBase<T, AggregateFunctionMaxMap<T, tuple_argument>, FieldVisitorMax, true, tuple_argument, false>
+{
+private:
+    using Self = AggregateFunctionMaxMap<T, tuple_argument>;
+    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorMax, true, tuple_argument, false>;
+
+public:
+    AggregateFunctionMaxMap(const DataTypePtr & keys_type_,
+            DataTypes & values_types_, const DataTypes & argument_types_,
+            const Array & params_)
+        : Base{keys_type_, values_types_, argument_types_}
+    {
+        // The constructor accepts parameters to have a uniform interface with
+        // sumMapFiltered, but this function doesn't have any parameters.
+        assertNoParameters(getNameImpl(), params_);
+    }
+
+    static String getNameImpl() { return "maxMap"; }
+
+    bool keepKey(const T &) const { return true; }
+};
+
+
 auto parseArguments(const std::string & name, const DataTypes & arguments)
 {
     DataTypes args;
diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
deleted file mode 100644
index b30f5ff5220..00000000000
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ /dev/null
@@ -1,656 +0,0 @@
-#pragma once
-
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeNullable.h>
-
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnTuple.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnDecimal.h>
-#include <Columns/ColumnString.h>
-
-#include <Common/FieldVisitorSum.h>
-#include <Common/assert_cast.h>
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/FactoryHelpers.h>
-#include <map>
-#include <Common/ClickHouseRevision.h>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int LOGICAL_ERROR;
-}
-
-template <typename T>
-struct AggregateFunctionMapData
-{
-    // Map needs to be ordered to maintain function properties
-    std::map<T, Array> merged_maps;
-};
-
-/** Aggregate function, that takes at least two arguments: keys and values, and as a result, builds a tuple of at least 2 arrays -
-  * ordered keys and variable number of argument values aggregated by corresponding keys.
-  *
-  * sumMap function is the most useful when using SummingMergeTree to sum Nested columns, which name ends in "Map".
-  *
-  * Example: sumMap(k, v...) of:
-  *  k           v
-  *  [1,2,3]     [10,10,10]
-  *  [3,4,5]     [10,10,10]
-  *  [4,5,6]     [10,10,10]
-  *  [6,7,8]     [10,10,10]
-  *  [7,5,3]     [5,15,25]
-  *  [8,9,10]    [20,20,20]
-  * will return:
-  *  ([1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20])
-  *
-  * minMap and maxMap share the same idea, but calculate min and max correspondingly.
-  *
-  * NOTE: The implementation of these functions are "amateur grade" - not efficient and low quality.
-  */
-
-template <typename T, typename Derived, typename Visitor, bool overflow, bool tuple_argument, bool compact>
-class AggregateFunctionMapBase : public IAggregateFunctionDataHelper<
-    AggregateFunctionMapData<NearestFieldType<T>>, Derived>
-{
-private:
-    static constexpr auto STATE_VERSION_1_MIN_REVISION = 54452;
-
-    DataTypePtr keys_type;
-    SerializationPtr keys_serialization;
-    DataTypes values_types;
-    Serializations values_serializations;
-    Serializations promoted_values_serializations;
-
-public:
-    using Base = IAggregateFunctionDataHelper<
-        AggregateFunctionMapData<NearestFieldType<T>>, Derived>;
-
-    AggregateFunctionMapBase(const DataTypePtr & keys_type_,
-            const DataTypes & values_types_, const DataTypes & argument_types_)
-        : Base(argument_types_, {} /* parameters */, createResultType(keys_type_, values_types_, getName()))
-        , keys_type(keys_type_)
-        , keys_serialization(keys_type->getDefaultSerialization())
-        , values_types(values_types_)
-    {
-        values_serializations.reserve(values_types.size());
-        promoted_values_serializations.reserve(values_types.size());
-        for (const auto & type : values_types)
-        {
-            values_serializations.emplace_back(type->getDefaultSerialization());
-            if (type->canBePromoted())
-            {
-                if (type->isNullable())
-                    promoted_values_serializations.emplace_back(
-                         makeNullable(removeNullable(type)->promoteNumericType())->getDefaultSerialization());
-                else
-                    promoted_values_serializations.emplace_back(type->promoteNumericType()->getDefaultSerialization());
-            }
-            else
-            {
-                promoted_values_serializations.emplace_back(type->getDefaultSerialization());
-            }
-        }
-    }
-
-    bool isVersioned() const override { return true; }
-
-    size_t getDefaultVersion() const override { return 1; }
-
-    size_t getVersionFromRevision(size_t revision) const override
-    {
-        if (revision >= STATE_VERSION_1_MIN_REVISION)
-            return 1;
-        else
-            return 0;
-    }
-
-    static DataTypePtr createResultType(
-        const DataTypePtr & keys_type_,
-        const DataTypes & values_types_,
-        const String & name_)
-    {
-        DataTypes types;
-        types.emplace_back(std::make_shared<DataTypeArray>(keys_type_));
-
-        for (const auto & value_type : values_types_)
-        {
-            if constexpr (std::is_same_v<Visitor, FieldVisitorSum>)
-            {
-                if (!value_type->isSummable())
-                    throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Values for {} cannot be summed, passed type {}",
-                        name_, value_type->getName()};
-            }
-
-            DataTypePtr result_type;
-
-            if constexpr (overflow)
-            {
-                if (value_type->onlyNull())
-                    throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Cannot calculate {} of type {}",
-                        name_, value_type->getName()};
-
-                // Overflow, meaning that the returned type is the same as
-                // the input type. Nulls are skipped.
-                result_type = removeNullable(value_type);
-            }
-            else
-            {
-                auto value_type_without_nullable = removeNullable(value_type);
-
-                // No overflow, meaning we promote the types if necessary.
-                if (!value_type_without_nullable->canBePromoted())
-                    throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Values for {} are expected to be Numeric, Float or Decimal, passed type {}",
-                        name_, value_type->getName()};
-
-                WhichDataType value_type_to_check(value_type_without_nullable);
-
-                /// Do not promote decimal because of implementation issues of this function design
-                /// Currently we cannot get result column type in case of decimal we cannot get decimal scale
-                /// in method void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-                /// If we decide to make this function more efficient we should promote decimal type during summ
-                if (value_type_to_check.isDecimal())
-                    result_type = value_type_without_nullable;
-                else
-                    result_type = value_type_without_nullable->promoteNumericType();
-            }
-
-            types.emplace_back(std::make_shared<DataTypeArray>(result_type));
-        }
-
-        return std::make_shared<DataTypeTuple>(types);
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    static const auto & getArgumentColumns(const IColumn**& columns)
-    {
-        if constexpr (tuple_argument)
-        {
-            return assert_cast<const ColumnTuple *>(columns[0])->getColumns();
-        }
-        else
-        {
-            return columns;
-        }
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns_, const size_t row_num, Arena *) const override
-    {
-        const auto & columns = getArgumentColumns(columns_);
-
-        // Column 0 contains array of keys of known type
-        const ColumnArray & array_column0 = assert_cast<const ColumnArray &>(*columns[0]);
-        const IColumn::Offsets & offsets0 = array_column0.getOffsets();
-        const IColumn & key_column = array_column0.getData();
-        const size_t keys_vec_offset = offsets0[row_num - 1];
-        const size_t keys_vec_size = (offsets0[row_num] - keys_vec_offset);
-
-        // Columns 1..n contain arrays of numeric values to sum
-        auto & merged_maps = this->data(place).merged_maps;
-        for (size_t col = 0, size = values_types.size(); col < size; ++col)
-        {
-            const auto & array_column = assert_cast<const ColumnArray &>(*columns[col + 1]);
-            const IColumn & value_column = array_column.getData();
-            const IColumn::Offsets & offsets = array_column.getOffsets();
-            const size_t values_vec_offset = offsets[row_num - 1];
-            const size_t values_vec_size = (offsets[row_num] - values_vec_offset);
-
-            // Expect key and value arrays to be of same length
-            if (keys_vec_size != values_vec_size)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sizes of keys and values arrays do not match");
-
-            // Insert column values for all keys
-            for (size_t i = 0; i < keys_vec_size; ++i)
-            {
-                auto value = value_column[values_vec_offset + i];
-                T key = static_cast<T>(key_column[keys_vec_offset + i].get<T>());
-
-                if (!keepKey(key))
-                    continue;
-
-                decltype(merged_maps.begin()) it;
-                if constexpr (is_decimal<T>)
-                {
-                    // FIXME why is storing NearestFieldType not enough, and we
-                    // have to check for decimals again here?
-                    UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getScale();
-                    it = merged_maps.find(DecimalField<T>(key, scale));
-                }
-                else
-                    it = merged_maps.find(key);
-
-                if (it != merged_maps.end())
-                {
-                    if (!value.isNull())
-                    {
-                        if (it->second[col].isNull())
-                            it->second[col] = value;
-                        else
-                            applyVisitor(Visitor(value), it->second[col]);
-                    }
-                }
-                else
-                {
-                    // Create a value array for this key
-                    Array new_values;
-                    new_values.resize(size);
-                    new_values[col] = value;
-
-                    if constexpr (is_decimal<T>)
-                    {
-                        UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getScale();
-                        merged_maps.emplace(DecimalField<T>(key, scale), std::move(new_values));
-                    }
-                    else
-                    {
-                        merged_maps.emplace(key, std::move(new_values));
-                    }
-                }
-            }
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        auto & merged_maps = this->data(place).merged_maps;
-        const auto & rhs_maps = this->data(rhs).merged_maps;
-
-        for (const auto & elem : rhs_maps)
-        {
-            const auto & it = merged_maps.find(elem.first);
-            if (it != merged_maps.end())
-            {
-                for (size_t col = 0; col < values_types.size(); ++col)
-                    if (!elem.second[col].isNull())
-                        applyVisitor(Visitor(elem.second[col]), it->second[col]);
-            }
-            else
-                merged_maps[elem.first] = elem.second;
-        }
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
-    {
-        if (!version)
-            version = getDefaultVersion();
-
-        const auto & merged_maps = this->data(place).merged_maps;
-        size_t size = merged_maps.size();
-        writeVarUInt(size, buf);
-
-        std::function<void(size_t, const Array &)> serialize;
-        switch (*version)
-        {
-            case 0:
-            {
-                serialize = [&](size_t col_idx, const Array & values){ values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {}); };
-                break;
-            }
-            case 1:
-            {
-                serialize = [&](size_t col_idx, const Array & values){ promoted_values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {}); };
-                break;
-            }
-        }
-
-        for (const auto & elem : merged_maps)
-        {
-            keys_serialization->serializeBinary(elem.first, buf, {});
-            for (size_t col = 0; col < values_types.size(); ++col)
-                serialize(col, elem.second);
-        }
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version, Arena *) const override
-    {
-        if (!version)
-            version = getDefaultVersion();
-
-        auto & merged_maps = this->data(place).merged_maps;
-        size_t size = 0;
-        readVarUInt(size, buf);
-
-        std::function<void(size_t, Array &)> deserialize;
-        switch (*version)
-        {
-            case 0:
-            {
-                deserialize = [&](size_t col_idx, Array & values){ values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {}); };
-                break;
-            }
-            case 1:
-            {
-                deserialize = [&](size_t col_idx, Array & values){ promoted_values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {}); };
-                break;
-            }
-        }
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            Field key;
-            keys_serialization->deserializeBinary(key, buf, {});
-
-            Array values;
-            values.resize(values_types.size());
-
-            for (size_t col = 0; col < values_types.size(); ++col)
-                deserialize(col, values);
-
-            if constexpr (is_decimal<T>)
-                merged_maps[key.get<DecimalField<T>>()] = values;
-            else
-                merged_maps[key.get<T>()] = values;
-        }
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        size_t num_columns = values_types.size();
-
-        // Final step does compaction of keys that have zero values, this mutates the state
-        auto & merged_maps = this->data(place).merged_maps;
-
-        // Remove keys which are zeros or empty. This should be enabled only for sumMap.
-        if constexpr (compact)
-        {
-            for (auto it = merged_maps.cbegin(); it != merged_maps.cend();)
-            {
-                // Key is not compacted if it has at least one non-zero value
-                bool erase = true;
-                for (size_t col = 0; col < num_columns; ++col)
-                {
-                    if (!it->second[col].isNull() && it->second[col] != values_types[col]->getDefault())
-                    {
-                        erase = false;
-                        break;
-                    }
-                }
-
-                if (erase)
-                    it = merged_maps.erase(it);
-                else
-                    ++it;
-            }
-        }
-
-        size_t size = merged_maps.size();
-
-        auto & to_tuple = assert_cast<ColumnTuple &>(to);
-        auto & to_keys_arr = assert_cast<ColumnArray &>(to_tuple.getColumn(0));
-        auto & to_keys_col = to_keys_arr.getData();
-
-        // Advance column offsets
-        auto & to_keys_offsets = to_keys_arr.getOffsets();
-        to_keys_offsets.push_back(to_keys_offsets.back() + size);
-        to_keys_col.reserve(size);
-
-        for (size_t col = 0; col < num_columns; ++col)
-        {
-            auto & to_values_arr = assert_cast<ColumnArray &>(to_tuple.getColumn(col + 1));
-            auto & to_values_offsets = to_values_arr.getOffsets();
-            to_values_offsets.push_back(to_values_offsets.back() + size);
-            to_values_arr.getData().reserve(size);
-        }
-
-        // Write arrays of keys and values
-        for (const auto & elem : merged_maps)
-        {
-            // Write array of keys into column
-            to_keys_col.insert(elem.first);
-
-            // Write 0..n arrays of values
-            for (size_t col = 0; col < num_columns; ++col)
-            {
-                auto & to_values_col = assert_cast<ColumnArray &>(to_tuple.getColumn(col + 1)).getData();
-                if (elem.second[col].isNull())
-                    to_values_col.insertDefault();
-                else
-                    to_values_col.insert(elem.second[col]);
-            }
-        }
-    }
-
-    bool keepKey(const T & key) const { return static_cast<const Derived &>(*this).keepKey(key); }
-    String getName() const override { return Derived::getNameImpl(); }
-};
-
-template <typename T, bool overflow, bool tuple_argument>
-class AggregateFunctionSumMap final :
-    public AggregateFunctionMapBase<T, AggregateFunctionSumMap<T, overflow, tuple_argument>, FieldVisitorSum, overflow, tuple_argument, true>
-{
-private:
-    using Self = AggregateFunctionSumMap<T, overflow, tuple_argument>;
-    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorSum, overflow, tuple_argument, true>;
-
-public:
-    AggregateFunctionSumMap(const DataTypePtr & keys_type_,
-            DataTypes & values_types_, const DataTypes & argument_types_,
-            const Array & params_)
-        : Base{keys_type_, values_types_, argument_types_}
-    {
-        // The constructor accepts parameters to have a uniform interface with
-        // sumMapFiltered, but this function doesn't have any parameters.
-        assertNoParameters(getNameImpl(), params_);
-    }
-
-    static String getNameImpl()
-    {
-        if constexpr (overflow)
-        {
-            return "sumMapWithOverflow";
-        }
-        else
-        {
-            return "sumMap";
-        }
-    }
-
-    bool keepKey(const T &) const { return true; }
-};
-
-
-template <typename T, bool overflow, bool tuple_argument>
-class AggregateFunctionSumMapFiltered final :
-    public AggregateFunctionMapBase<T,
-        AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>,
-        FieldVisitorSum,
-        overflow,
-        tuple_argument,
-        true>
-{
-private:
-    using Self = AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>;
-    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorSum, overflow, tuple_argument, true>;
-
-    using ContainerT = std::unordered_set<T>;
-
-    ContainerT keys_to_keep;
-
-public:
-    AggregateFunctionSumMapFiltered(const DataTypePtr & keys_type_,
-            const DataTypes & values_types_, const DataTypes & argument_types_,
-            const Array & params_)
-        : Base{keys_type_, values_types_, argument_types_}
-    {
-        if (params_.size() != 1)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Aggregate function '{}' requires exactly one parameter "
-                "of Array type", getNameImpl());
-
-        Array keys_to_keep_values;
-        if (!params_.front().tryGet<Array>(keys_to_keep_values))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Aggregate function {} requires an Array as a parameter",
-                getNameImpl());
-
-        this->parameters = params_;
-
-        keys_to_keep.reserve(keys_to_keep_values.size());
-
-        for (const Field & f : keys_to_keep_values)
-            keys_to_keep.emplace(f.safeGet<T>());
-    }
-
-    static String getNameImpl()
-    {
-        if constexpr (overflow)
-        {
-            return "sumMapFilteredWithOverflow";
-        }
-        else
-        {
-            return "sumMapFiltered";
-        }
-    }
-
-    bool keepKey(const T & key) const { return keys_to_keep.count(key); }
-};
-
-
-/** Implements `Max` operation.
- *  Returns true if changed
- */
-class FieldVisitorMax : public StaticVisitor<bool>
-{
-private:
-    const Field & rhs;
-
-    template <typename FieldType>
-    bool compareImpl(FieldType & x) const
-    {
-        auto val = rhs.get<FieldType>();
-        if (val > x)
-        {
-            x = val;
-            return true;
-        }
-
-        return false;
-    }
-
-public:
-    explicit FieldVisitorMax(const Field & rhs_) : rhs(rhs_) {}
-
-    bool operator() (Null &) const
-    {
-        /// Do not update current value, skip nulls
-        return false;
-    }
-
-    bool operator() (AggregateFunctionStateData &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot compare AggregateFunctionStates"); }
-
-    bool operator() (Array & x) const { return compareImpl<Array>(x); }
-    bool operator() (Tuple & x) const { return compareImpl<Tuple>(x); }
-    template <typename T>
-    bool operator() (DecimalField<T> & x) const { return compareImpl<DecimalField<T>>(x); }
-    template <typename T>
-    bool operator() (T & x) const { return compareImpl<T>(x); }
-};
-
-/** Implements `Min` operation.
- *  Returns true if changed
- */
-class FieldVisitorMin : public StaticVisitor<bool>
-{
-private:
-    const Field & rhs;
-
-    template <typename FieldType>
-    bool compareImpl(FieldType & x) const
-    {
-        auto val = rhs.get<FieldType>();
-        if (val < x)
-        {
-            x = val;
-            return true;
-        }
-
-        return false;
-    }
-
-public:
-    explicit FieldVisitorMin(const Field & rhs_) : rhs(rhs_) {}
-
-
-    bool operator() (Null &) const
-    {
-        /// Do not update current value, skip nulls
-        return false;
-    }
-
-    bool operator() (AggregateFunctionStateData &) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot sum AggregateFunctionStates"); }
-
-    bool operator() (Array & x) const { return compareImpl<Array>(x); }
-    bool operator() (Tuple & x) const { return compareImpl<Tuple>(x); }
-    template <typename T>
-    bool operator() (DecimalField<T> & x) const { return compareImpl<DecimalField<T>>(x); }
-    template <typename T>
-    bool operator() (T & x) const { return compareImpl<T>(x); }
-};
-
-
-template <typename T, bool tuple_argument>
-class AggregateFunctionMinMap final :
-    public AggregateFunctionMapBase<T, AggregateFunctionMinMap<T, tuple_argument>, FieldVisitorMin, true, tuple_argument, false>
-{
-private:
-    using Self = AggregateFunctionMinMap<T, tuple_argument>;
-    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorMin, true, tuple_argument, false>;
-
-public:
-    AggregateFunctionMinMap(const DataTypePtr & keys_type_,
-            DataTypes & values_types_, const DataTypes & argument_types_,
-            const Array & params_)
-        : Base{keys_type_, values_types_, argument_types_}
-    {
-        // The constructor accepts parameters to have a uniform interface with
-        // sumMapFiltered, but this function doesn't have any parameters.
-        assertNoParameters(getNameImpl(), params_);
-    }
-
-    static String getNameImpl() { return "minMap"; }
-
-    bool keepKey(const T &) const { return true; }
-};
-
-template <typename T, bool tuple_argument>
-class AggregateFunctionMaxMap final :
-    public AggregateFunctionMapBase<T, AggregateFunctionMaxMap<T, tuple_argument>, FieldVisitorMax, true, tuple_argument, false>
-{
-private:
-    using Self = AggregateFunctionMaxMap<T, tuple_argument>;
-    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorMax, true, tuple_argument, false>;
-
-public:
-    AggregateFunctionMaxMap(const DataTypePtr & keys_type_,
-            DataTypes & values_types_, const DataTypes & argument_types_,
-            const Array & params_)
-        : Base{keys_type_, values_types_, argument_types_}
-    {
-        // The constructor accepts parameters to have a uniform interface with
-        // sumMapFiltered, but this function doesn't have any parameters.
-        assertNoParameters(getNameImpl(), params_);
-    }
-
-    static String getNameImpl() { return "maxMap"; }
-
-    bool keepKey(const T &) const { return true; }
-};
-
-}
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index eba1733c683..983825a0e68 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -234,7 +234,7 @@ private:
     }
 
     template <typename T0, typename T1, typename ColVecT0, typename ColVecT1>
-    ColumnPtr executeRightType(
+    NO_INLINE ColumnPtr executeRightType(
         [[maybe_unused]] const ColumnUInt8 * cond_col,
         [[maybe_unused]] const ColumnsWithTypeAndName & arguments,
         [[maybe_unused]] const ColVecT0 * col_left) const
@@ -266,7 +266,7 @@ private:
     }
 
     template <typename T0, typename T1, typename ColVecT0, typename ColVecT1>
-    ColumnPtr executeConstRightType(
+    NO_INLINE ColumnPtr executeConstRightType(
         [[maybe_unused]] const ColumnUInt8 * cond_col,
         [[maybe_unused]] const ColumnsWithTypeAndName & arguments,
         [[maybe_unused]] const ColumnConst * col_left) const
@@ -298,7 +298,7 @@ private:
     }
 
     template <typename T0, typename T1, typename ColVecT0, typename ColVecT1>
-    ColumnPtr executeRightTypeArray(
+    NO_INLINE ColumnPtr executeRightTypeArray(
         [[maybe_unused]] const ColumnUInt8 * cond_col,
         [[maybe_unused]] const ColumnsWithTypeAndName & arguments,
         [[maybe_unused]] const DataTypePtr result_type,
@@ -355,7 +355,7 @@ private:
     }
 
     template <typename T0, typename T1, typename ColVecT0, typename ColVecT1>
-    ColumnPtr executeConstRightTypeArray(
+    NO_INLINE ColumnPtr executeConstRightTypeArray(
         [[maybe_unused]] const ColumnUInt8 * cond_col,
         [[maybe_unused]] const ColumnsWithTypeAndName & arguments,
         [[maybe_unused]] const DataTypePtr & result_type,
@@ -413,7 +413,7 @@ private:
     }
 
     template <typename T0, typename T1>
-    ColumnPtr executeTyped(
+    NO_INLINE ColumnPtr executeTyped(
         const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
     {
         using ColVecT0 = ColumnVectorOrDecimal<T0>;
@@ -1086,7 +1086,7 @@ public:
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
                 "Must be ColumnUInt8 or ColumnConstUInt8.", arg_cond.column->getName(), getName());
 
-        auto call = [&](const auto & types) -> bool
+        auto call = [&](const auto & types) NO_INLINE -> bool
         {
             using Types = std::decay_t<decltype(types)>;
             using T0 = typename Types::LeftType;

From 99d90a1430437319ceab57e56c6bbd7cf594ede2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 01:58:20 +0100
Subject: [PATCH 0186/1097] Attempt to remove garbage

---
 .../AggregateFunctionSumMap.cpp               | 50 ++++++++-----------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index ecc8a978388..f89af3a0dae 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -20,8 +20,19 @@
 #include <map>
 
 
+namespace std
+{
+    template <typename T>
+    struct hash<DB::DecimalField<T>>
+    {
+        size_t operator()(const DB::DecimalField<T> & x) const { return hash<T>()(x.getValue()); }
+    };
+}
+
+
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
@@ -65,7 +76,7 @@ struct AggregateFunctionMapData
 
 template <typename T, typename Derived, typename Visitor, bool overflow, bool tuple_argument, bool compact>
 class AggregateFunctionMapBase : public IAggregateFunctionDataHelper<
-    AggregateFunctionMapData<NearestFieldType<T>>, Derived>
+    AggregateFunctionMapData<T>, Derived>
 {
 private:
     static constexpr auto STATE_VERSION_1_MIN_REVISION = 54452;
@@ -78,7 +89,7 @@ private:
 
 public:
     using Base = IAggregateFunctionDataHelper<
-        AggregateFunctionMapData<NearestFieldType<T>>, Derived>;
+        AggregateFunctionMapData<T>, Derived>;
 
     AggregateFunctionMapBase(const DataTypePtr & keys_type_,
             const DataTypes & values_types_, const DataTypes & argument_types_)
@@ -227,15 +238,7 @@ public:
                     continue;
 
                 decltype(merged_maps.begin()) it;
-                if constexpr (is_decimal<T>)
-                {
-                    // FIXME why is storing NearestFieldType not enough, and we
-                    // have to check for decimals again here?
-                    UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getScale();
-                    it = merged_maps.find(DecimalField<T>(key, scale));
-                }
-                else
-                    it = merged_maps.find(key);
+                it = merged_maps.find(key);
 
                 if (it != merged_maps.end())
                 {
@@ -254,15 +257,7 @@ public:
                     new_values.resize(size);
                     new_values[col] = value;
 
-                    if constexpr (is_decimal<T>)
-                    {
-                        UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getScale();
-                        merged_maps.emplace(DecimalField<T>(key, scale), std::move(new_values));
-                    }
-                    else
-                    {
-                        merged_maps.emplace(key, std::move(new_values));
-                    }
+                    merged_maps.emplace(key, std::move(new_values));
                 }
             }
         }
@@ -354,10 +349,7 @@ public:
             for (size_t col = 0; col < values_types.size(); ++col)
                 deserialize(col, values);
 
-            if constexpr (is_decimal<T>)
-                merged_maps[key.get<DecimalField<T>>()] = values;
-            else
-                merged_maps[key.get<T>()] = values;
+            merged_maps[key.get<T>()] = values;
         }
     }
 
@@ -711,7 +703,7 @@ auto parseArguments(const std::string & name, const DataTypes & arguments)
 // The template parameter MappedFunction<bool template_argument> is an aggregate
 // function template that allows to choose the aggregate function variant that
 // accepts either normal arguments or tuple argument.
-template<template <bool tuple_argument> typename MappedFunction>
+template <template <bool tuple_argument> typename MappedFunction>
 AggregateFunctionPtr createAggregateFunctionMap(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
 {
     auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
@@ -754,8 +746,8 @@ struct SumMapVariants
     {
         template <typename T>
         using F = std::conditional_t<filtered,
-            AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>,
-            AggregateFunctionSumMap<T, overflow, tuple_argument>>;
+            AggregateFunctionSumMapFiltered<NearestFieldType<T>, overflow, tuple_argument>,
+            AggregateFunctionSumMap<NearestFieldType<T>, overflow, tuple_argument>>;
     };
 };
 
@@ -765,7 +757,7 @@ template <bool tuple_argument>
 struct MinMapDispatchOnTupleArgument
 {
     template <typename T>
-    using F = AggregateFunctionMinMap<T, tuple_argument>;
+    using F = AggregateFunctionMinMap<NearestFieldType<T>, tuple_argument>;
 };
 
 // This template gives an aggregate function template that is narrowed
@@ -774,7 +766,7 @@ template <bool tuple_argument>
 struct MaxMapDispatchOnTupleArgument
 {
     template <typename T>
-    using F = AggregateFunctionMaxMap<T, tuple_argument>;
+    using F = AggregateFunctionMaxMap<NearestFieldType<T>, tuple_argument>;
 };
 
 }

From f9689bd5a0166aa3ac1411cd92c93a1c1c5dd33a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 02:13:44 +0100
Subject: [PATCH 0187/1097] Continue removal of garbage

---
 .../AggregateFunctionSumMap.cpp               | 84 ++++++++-----------
 1 file changed, 34 insertions(+), 50 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index f89af3a0dae..44f55f65570 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -10,7 +10,6 @@
 
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
-#include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 
 #include <Common/FieldVisitorSum.h>
@@ -20,16 +19,6 @@
 #include <map>
 
 
-namespace std
-{
-    template <typename T>
-    struct hash<DB::DecimalField<T>>
-    {
-        size_t operator()(const DB::DecimalField<T> & x) const { return hash<T>()(x.getValue()); }
-    };
-}
-
-
 namespace DB
 {
 
@@ -46,11 +35,10 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename T>
 struct AggregateFunctionMapData
 {
     // Map needs to be ordered to maintain function properties
-    std::map<T, Array> merged_maps;
+    std::map<Field, Array> merged_maps;
 };
 
 /** Aggregate function, that takes at least two arguments: keys and values, and as a result, builds a tuple of at least 2 arrays -
@@ -74,9 +62,9 @@ struct AggregateFunctionMapData
   * NOTE: The implementation of these functions are "amateur grade" - not efficient and low quality.
   */
 
-template <typename T, typename Derived, typename Visitor, bool overflow, bool tuple_argument, bool compact>
+template <typename Derived, typename Visitor, bool overflow, bool tuple_argument, bool compact>
 class AggregateFunctionMapBase : public IAggregateFunctionDataHelper<
-    AggregateFunctionMapData<T>, Derived>
+    AggregateFunctionMapData, Derived>
 {
 private:
     static constexpr auto STATE_VERSION_1_MIN_REVISION = 54452;
@@ -88,8 +76,7 @@ private:
     Serializations promoted_values_serializations;
 
 public:
-    using Base = IAggregateFunctionDataHelper<
-        AggregateFunctionMapData<T>, Derived>;
+    using Base = IAggregateFunctionDataHelper<AggregateFunctionMapData, Derived>;
 
     AggregateFunctionMapBase(const DataTypePtr & keys_type_,
             const DataTypes & values_types_, const DataTypes & argument_types_)
@@ -232,7 +219,7 @@ public:
             for (size_t i = 0; i < keys_vec_size; ++i)
             {
                 auto value = value_column[values_vec_offset + i];
-                T key = static_cast<T>(key_column[keys_vec_offset + i].get<T>());
+                Field key = key_column[keys_vec_offset + i];
 
                 if (!keepKey(key))
                     continue;
@@ -349,7 +336,7 @@ public:
             for (size_t col = 0; col < values_types.size(); ++col)
                 deserialize(col, values);
 
-            merged_maps[key.get<T>()] = values;
+            merged_maps[key] = values;
         }
     }
 
@@ -420,17 +407,17 @@ public:
         }
     }
 
-    bool keepKey(const T & key) const { return static_cast<const Derived &>(*this).keepKey(key); }
+    bool keepKey(const Field & key) const { return static_cast<const Derived &>(*this).keepKey(key); }
     String getName() const override { return Derived::getNameImpl(); }
 };
 
-template <typename T, bool overflow, bool tuple_argument>
+template <bool overflow, bool tuple_argument>
 class AggregateFunctionSumMap final :
-    public AggregateFunctionMapBase<T, AggregateFunctionSumMap<T, overflow, tuple_argument>, FieldVisitorSum, overflow, tuple_argument, true>
+    public AggregateFunctionMapBase<AggregateFunctionSumMap<overflow, tuple_argument>, FieldVisitorSum, overflow, tuple_argument, true>
 {
 private:
-    using Self = AggregateFunctionSumMap<T, overflow, tuple_argument>;
-    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorSum, overflow, tuple_argument, true>;
+    using Self = AggregateFunctionSumMap<overflow, tuple_argument>;
+    using Base = AggregateFunctionMapBase<Self, FieldVisitorSum, overflow, tuple_argument, true>;
 
 public:
     AggregateFunctionSumMap(const DataTypePtr & keys_type_,
@@ -455,25 +442,24 @@ public:
         }
     }
 
-    bool keepKey(const T &) const { return true; }
+    bool keepKey(const Field &) const { return true; }
 };
 
 
-template <typename T, bool overflow, bool tuple_argument>
+template <bool overflow, bool tuple_argument>
 class AggregateFunctionSumMapFiltered final :
-    public AggregateFunctionMapBase<T,
-        AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>,
+    public AggregateFunctionMapBase<
+        AggregateFunctionSumMapFiltered<overflow, tuple_argument>,
         FieldVisitorSum,
         overflow,
         tuple_argument,
         true>
 {
 private:
-    using Self = AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>;
-    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorSum, overflow, tuple_argument, true>;
-
-    using ContainerT = std::unordered_set<T>;
+    using Self = AggregateFunctionSumMapFiltered<overflow, tuple_argument>;
+    using Base = AggregateFunctionMapBase<Self, FieldVisitorSum, overflow, tuple_argument, true>;
 
+    using ContainerT = std::set<Field>;
     ContainerT keys_to_keep;
 
 public:
@@ -495,10 +481,8 @@ public:
 
         this->parameters = params_;
 
-        keys_to_keep.reserve(keys_to_keep_values.size());
-
         for (const Field & f : keys_to_keep_values)
-            keys_to_keep.emplace(f.safeGet<T>());
+            keys_to_keep.emplace(f);
     }
 
     static String getNameImpl()
@@ -513,7 +497,7 @@ public:
         }
     }
 
-    bool keepKey(const T & key) const { return keys_to_keep.count(key); }
+    bool keepKey(const Field & key) const { return keys_to_keep.count(key); }
 };
 
 
@@ -599,13 +583,13 @@ public:
 };
 
 
-template <typename T, bool tuple_argument>
+template <bool tuple_argument>
 class AggregateFunctionMinMap final :
-    public AggregateFunctionMapBase<T, AggregateFunctionMinMap<T, tuple_argument>, FieldVisitorMin, true, tuple_argument, false>
+    public AggregateFunctionMapBase<AggregateFunctionMinMap<tuple_argument>, FieldVisitorMin, true, tuple_argument, false>
 {
 private:
-    using Self = AggregateFunctionMinMap<T, tuple_argument>;
-    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorMin, true, tuple_argument, false>;
+    using Self = AggregateFunctionMinMap<tuple_argument>;
+    using Base = AggregateFunctionMapBase<Self, FieldVisitorMin, true, tuple_argument, false>;
 
 public:
     AggregateFunctionMinMap(const DataTypePtr & keys_type_,
@@ -620,16 +604,16 @@ public:
 
     static String getNameImpl() { return "minMap"; }
 
-    bool keepKey(const T &) const { return true; }
+    bool keepKey(const Field &) const { return true; }
 };
 
-template <typename T, bool tuple_argument>
+template <bool tuple_argument>
 class AggregateFunctionMaxMap final :
-    public AggregateFunctionMapBase<T, AggregateFunctionMaxMap<T, tuple_argument>, FieldVisitorMax, true, tuple_argument, false>
+    public AggregateFunctionMapBase<AggregateFunctionMaxMap<tuple_argument>, FieldVisitorMax, true, tuple_argument, false>
 {
 private:
-    using Self = AggregateFunctionMaxMap<T, tuple_argument>;
-    using Base = AggregateFunctionMapBase<T, Self, FieldVisitorMax, true, tuple_argument, false>;
+    using Self = AggregateFunctionMaxMap<tuple_argument>;
+    using Base = AggregateFunctionMapBase<Self, FieldVisitorMax, true, tuple_argument, false>;
 
 public:
     AggregateFunctionMaxMap(const DataTypePtr & keys_type_,
@@ -644,7 +628,7 @@ public:
 
     static String getNameImpl() { return "maxMap"; }
 
-    bool keepKey(const T &) const { return true; }
+    bool keepKey(const Field &) const { return true; }
 };
 
 
@@ -746,8 +730,8 @@ struct SumMapVariants
     {
         template <typename T>
         using F = std::conditional_t<filtered,
-            AggregateFunctionSumMapFiltered<NearestFieldType<T>, overflow, tuple_argument>,
-            AggregateFunctionSumMap<NearestFieldType<T>, overflow, tuple_argument>>;
+            AggregateFunctionSumMapFiltered<overflow, tuple_argument>,
+            AggregateFunctionSumMap<overflow, tuple_argument>>;
     };
 };
 
@@ -757,7 +741,7 @@ template <bool tuple_argument>
 struct MinMapDispatchOnTupleArgument
 {
     template <typename T>
-    using F = AggregateFunctionMinMap<NearestFieldType<T>, tuple_argument>;
+    using F = AggregateFunctionMinMap<tuple_argument>;
 };
 
 // This template gives an aggregate function template that is narrowed
@@ -766,7 +750,7 @@ template <bool tuple_argument>
 struct MaxMapDispatchOnTupleArgument
 {
     template <typename T>
-    using F = AggregateFunctionMaxMap<NearestFieldType<T>, tuple_argument>;
+    using F = AggregateFunctionMaxMap<tuple_argument>;
 };
 
 }

From 9fe9379418a5e9bb6da12870011cf6ea21eed594 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 02:23:55 +0100
Subject: [PATCH 0188/1097] Can I get away with removing so much garbage?

---
 .../AggregateFunctionSumMap.cpp               | 133 +++++++-----------
 1 file changed, 48 insertions(+), 85 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index 44f55f65570..6b9666b8923 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -1,5 +1,4 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/Helpers.h>
 #include <Functions/FunctionHelpers.h>
 
 #include <IO/ReadHelpers.h>
@@ -682,77 +681,6 @@ auto parseArguments(const std::string & name, const DataTypes & arguments)
     return std::tuple<DataTypePtr, DataTypes, bool>{std::move(keys_type), std::move(values_types), tuple_argument};
 }
 
-// This function instantiates a particular overload of the sumMap family of
-// functions.
-// The template parameter MappedFunction<bool template_argument> is an aggregate
-// function template that allows to choose the aggregate function variant that
-// accepts either normal arguments or tuple argument.
-template <template <bool tuple_argument> typename MappedFunction>
-AggregateFunctionPtr createAggregateFunctionMap(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
-{
-    auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
-
-    AggregateFunctionPtr res;
-    if (tuple_argument)
-    {
-        res.reset(createWithNumericBasedType<MappedFunction<true>::template F>(*keys_type, keys_type, values_types, arguments, params));
-        if (!res)
-            res.reset(createWithDecimalType<MappedFunction<true>::template F>(*keys_type, keys_type, values_types, arguments, params));
-        if (!res)
-            res.reset(createWithStringType<MappedFunction<true>::template F>(*keys_type, keys_type, values_types, arguments, params));
-    }
-    else
-    {
-        res.reset(createWithNumericBasedType<MappedFunction<false>::template F>(*keys_type, keys_type, values_types, arguments, params));
-        if (!res)
-            res.reset(createWithDecimalType<MappedFunction<false>::template F>(*keys_type, keys_type, values_types, arguments, params));
-        if (!res)
-            res.reset(createWithStringType<MappedFunction<false>::template F>(*keys_type, keys_type, values_types, arguments, params));
-    }
-    if (!res)
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument for aggregate function {}", name);
-
-    return res;
-}
-
-// This template chooses the sumMap variant with given filtering and overflow
-// handling.
-template <bool filtered, bool overflow>
-struct SumMapVariants
-{
-    // SumMapVariants chooses the `overflow` and `filtered` parameters of the
-    // aggregate functions. The `tuple_argument` and the value type `T` are left
-    // as free parameters.
-    // DispatchOnTupleArgument chooses `tuple_argument`, and the value type `T`
-    // is left free.
-    template <bool tuple_argument>
-    struct DispatchOnTupleArgument
-    {
-        template <typename T>
-        using F = std::conditional_t<filtered,
-            AggregateFunctionSumMapFiltered<overflow, tuple_argument>,
-            AggregateFunctionSumMap<overflow, tuple_argument>>;
-    };
-};
-
-// This template gives an aggregate function template that is narrowed
-// to accept either tuple argumen or normal arguments.
-template <bool tuple_argument>
-struct MinMapDispatchOnTupleArgument
-{
-    template <typename T>
-    using F = AggregateFunctionMinMap<tuple_argument>;
-};
-
-// This template gives an aggregate function template that is narrowed
-// to accept either tuple argumen or normal arguments.
-template <bool tuple_argument>
-struct MaxMapDispatchOnTupleArgument
-{
-    template <typename T>
-    using F = AggregateFunctionMaxMap<tuple_argument>;
-};
-
 }
 
 void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory)
@@ -760,26 +688,61 @@ void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory)
     // these functions used to be called *Map, with now these names occupied by
     // Map combinator, which redirects calls here if was called with
     // array or tuple arguments.
-    factory.registerFunction("sumMappedArrays", createAggregateFunctionMap<
-        SumMapVariants<false, false>::DispatchOnTupleArgument>);
+    factory.registerFunction("sumMappedArrays", [](const std::string & name, const DataTypes & arguments, const Array & params, const Settings *) -> AggregateFunctionPtr
+    {
+        auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
+        if (tuple_argument)
+            return std::make_shared<AggregateFunctionSumMap<false, true>>(keys_type, values_types, arguments, params);
+        else
+            return std::make_shared<AggregateFunctionSumMap<false, false>>(keys_type, values_types, arguments, params);
+    });
 
-    factory.registerFunction("minMappedArrays",
-        createAggregateFunctionMap<MinMapDispatchOnTupleArgument>);
+    factory.registerFunction("minMappedArrays", [](const std::string & name, const DataTypes & arguments, const Array & params, const Settings *) -> AggregateFunctionPtr
+    {
+        auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
+        if (tuple_argument)
+            return std::make_shared<AggregateFunctionMinMap<true>>(keys_type, values_types, arguments, params);
+        else
+            return std::make_shared<AggregateFunctionMinMap<false>>(keys_type, values_types, arguments, params);
+    });
 
-    factory.registerFunction("maxMappedArrays",
-        createAggregateFunctionMap<MaxMapDispatchOnTupleArgument>);
+    factory.registerFunction("maxMappedArrays", [](const std::string & name, const DataTypes & arguments, const Array & params, const Settings *) -> AggregateFunctionPtr
+    {
+        auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
+        if (tuple_argument)
+            return std::make_shared<AggregateFunctionMaxMap<true>>(keys_type, values_types, arguments, params);
+        else
+            return std::make_shared<AggregateFunctionMaxMap<false>>(keys_type, values_types, arguments, params);
+    });
 
     // these functions could be renamed to *MappedArrays too, but it would
     // break backward compatibility
-    factory.registerFunction("sumMapWithOverflow", createAggregateFunctionMap<
-        SumMapVariants<false, true>::DispatchOnTupleArgument>);
+    factory.registerFunction("sumMapWithOverflow", [](const std::string & name, const DataTypes & arguments, const Array & params, const Settings *) -> AggregateFunctionPtr
+    {
+        auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
+        if (tuple_argument)
+            return std::make_shared<AggregateFunctionSumMap<true, true>>(keys_type, values_types, arguments, params);
+        else
+            return std::make_shared<AggregateFunctionSumMap<true, false>>(keys_type, values_types, arguments, params);
+    });
 
-    factory.registerFunction("sumMapFiltered", createAggregateFunctionMap<
-        SumMapVariants<true, false>::DispatchOnTupleArgument>);
+    factory.registerFunction("sumMapFiltered", [](const std::string & name, const DataTypes & arguments, const Array & params, const Settings *) -> AggregateFunctionPtr
+    {
+        auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
+        if (tuple_argument)
+            return std::make_shared<AggregateFunctionSumMapFiltered<false, true>>(keys_type, values_types, arguments, params);
+        else
+            return std::make_shared<AggregateFunctionSumMapFiltered<false, false>>(keys_type, values_types, arguments, params);
+    });
 
-    factory.registerFunction("sumMapFilteredWithOverflow",
-        createAggregateFunctionMap<
-            SumMapVariants<true, true>::DispatchOnTupleArgument>);
+    factory.registerFunction("sumMapFilteredWithOverflow", [](const std::string & name, const DataTypes & arguments, const Array & params, const Settings *) -> AggregateFunctionPtr
+    {
+        auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments);
+        if (tuple_argument)
+            return std::make_shared<AggregateFunctionSumMapFiltered<true, true>>(keys_type, values_types, arguments, params);
+        else
+            return std::make_shared<AggregateFunctionSumMapFiltered<true, false>>(keys_type, values_types, arguments, params);
+    });
 }
 
 }

From 0b5c77b26842f8eb704f74ffaa81c6c0c165faee Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 02:35:55 +0100
Subject: [PATCH 0189/1097] Add docs

---
 docs/en/operations/system-tables/symbols.md | 35 +++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 docs/en/operations/system-tables/symbols.md

diff --git a/docs/en/operations/system-tables/symbols.md b/docs/en/operations/system-tables/symbols.md
new file mode 100644
index 00000000000..9a60baafe1a
--- /dev/null
+++ b/docs/en/operations/system-tables/symbols.md
@@ -0,0 +1,35 @@
+---
+slug: /en/operations/system-tables/symbols
+---
+# symbols
+
+Contains information for introspection of `clickhouse` binary. It requires the introspection privilege to access.
+This table is only usable for C++ experts and ClickHouse engineers.
+
+Columns:
+
+- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol name in the binary. It is mangled. You can apply `demangle(symbol)` to obtain a readable name.
+- `address_begin` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Start address of the symbol in the binary.
+- `address_end` ([UInt64](../../sql-reference/data-types/int-uint.md)) — End address of the symbol in the binary.
+- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`.
+
+**Example**
+
+``` sql
+SELECT address_begin, address_end - address_begin AS size, demangle(symbol) FROM system.symbols ORDER BY size DESC LIMIT 10
+```
+
+``` text
+┌─address_begin─┬─────size─┬─demangle(symbol)──────────────────────────────────────────────────────────────────┐
+│      25000976 │ 29466000 │ icudt70_dat                                                                       │
+│     400605288 │  2097272 │ arena_emap_global                                                                 │
+│      18760592 │  1048576 │ CLD2::kQuadChrome1015_2                                                           │
+│       9807152 │   884808 │ TopLevelDomainLookupHash::isValid(char const*, unsigned long)::wordlist           │
+│      57442432 │   850608 │ llvm::X86Insts                                                                    │
+│      55682944 │   681360 │ (anonymous namespace)::X86DAGToDAGISel::SelectCode(llvm::SDNode*)::MatcherTable   │
+│      55130368 │   502840 │ (anonymous namespace)::X86InstructionSelector::getMatchTable() const::MatchTable0 │
+│     402930616 │   404032 │ qpl::ml::dispatcher::hw_dispatcher::get_instance()::instance                      │
+│     274131872 │   356795 │ DB::SettingsTraits::Accessor::instance()::$_0::operator()() const                 │
+│      58293040 │   249424 │ llvm::X86InstrNameData                                                            │
+└───────────────┴──────────┴───────────────────────────────────────────────────────────────────────────────────┘
+```

From 46af71ab7c53763467f0cf35826a91e01bc09596 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 02:36:09 +0100
Subject: [PATCH 0190/1097] This is actually better

---
 src/Functions/if.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 983825a0e68..d73b2d4e681 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -234,7 +234,7 @@ private:
     }
 
     template <typename T0, typename T1, typename ColVecT0, typename ColVecT1>
-    NO_INLINE ColumnPtr executeRightType(
+    ColumnPtr executeRightType(
         [[maybe_unused]] const ColumnUInt8 * cond_col,
         [[maybe_unused]] const ColumnsWithTypeAndName & arguments,
         [[maybe_unused]] const ColVecT0 * col_left) const
@@ -266,7 +266,7 @@ private:
     }
 
     template <typename T0, typename T1, typename ColVecT0, typename ColVecT1>
-    NO_INLINE ColumnPtr executeConstRightType(
+    ColumnPtr executeConstRightType(
         [[maybe_unused]] const ColumnUInt8 * cond_col,
         [[maybe_unused]] const ColumnsWithTypeAndName & arguments,
         [[maybe_unused]] const ColumnConst * col_left) const
@@ -298,7 +298,7 @@ private:
     }
 
     template <typename T0, typename T1, typename ColVecT0, typename ColVecT1>
-    NO_INLINE ColumnPtr executeRightTypeArray(
+    ColumnPtr executeRightTypeArray(
         [[maybe_unused]] const ColumnUInt8 * cond_col,
         [[maybe_unused]] const ColumnsWithTypeAndName & arguments,
         [[maybe_unused]] const DataTypePtr result_type,
@@ -355,7 +355,7 @@ private:
     }
 
     template <typename T0, typename T1, typename ColVecT0, typename ColVecT1>
-    NO_INLINE ColumnPtr executeConstRightTypeArray(
+    ColumnPtr executeConstRightTypeArray(
         [[maybe_unused]] const ColumnUInt8 * cond_col,
         [[maybe_unused]] const ColumnsWithTypeAndName & arguments,
         [[maybe_unused]] const DataTypePtr & result_type,
@@ -413,7 +413,7 @@ private:
     }
 
     template <typename T0, typename T1>
-    NO_INLINE ColumnPtr executeTyped(
+    ColumnPtr executeTyped(
         const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
     {
         using ColVecT0 = ColumnVectorOrDecimal<T0>;

From 9c9fbf22d4b022da544d8ee78e8ac8e4f81ab0bb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 02:42:50 +0100
Subject: [PATCH 0191/1097] This is actually better

---
 src/Core/callOnTypeIndex.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h
index c6a9e4c842b..394a783294c 100644
--- a/src/Core/callOnTypeIndex.h
+++ b/src/Core/callOnTypeIndex.h
@@ -17,7 +17,7 @@ struct TypePair
 
 
 template <typename T, bool _int, bool _float, bool _decimal, bool _datetime, typename F>
-static bool NO_INLINE callOnBasicType(TypeIndex number, F && f)
+static bool callOnBasicType(TypeIndex number, F && f)
 {
     if constexpr (_int)
     {
@@ -87,7 +87,7 @@ static bool NO_INLINE callOnBasicType(TypeIndex number, F && f)
 
 /// Unroll template using TypeIndex
 template <bool _int, bool _float, bool _decimal, bool _datetime, typename F>
-static NO_INLINE bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f)
+static bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f)
 {
     if constexpr (_int)
     {
@@ -171,7 +171,7 @@ template <is_decimal T> class DataTypeDecimal;
 
 
 template <typename T, typename F, typename... ExtraArgs>
-static NO_INLINE bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args)
+static bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args)
 {
     switch (number)
     {
@@ -220,7 +220,7 @@ static NO_INLINE bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs
 }
 
 template <typename F>
-static NO_INLINE bool callOnTwoTypeIndexes(TypeIndex left_type, TypeIndex right_type, F && func)
+static bool callOnTwoTypeIndexes(TypeIndex left_type, TypeIndex right_type, F && func)
 {
     return callOnIndexAndDataType<void>(left_type, [&](const auto & left_types) -> bool
     {

From e85c695bcfb443dd031623e6124a3b433a28556c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 02:57:26 +0100
Subject: [PATCH 0192/1097] This is actually better

---
 src/Core/callOnTypeIndex.h           | 2 +-
 src/Functions/array/arrayElement.cpp | 2 +-
 src/Functions/if.cpp                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h
index 394a783294c..f5f67df563b 100644
--- a/src/Core/callOnTypeIndex.h
+++ b/src/Core/callOnTypeIndex.h
@@ -87,7 +87,7 @@ static bool callOnBasicType(TypeIndex number, F && f)
 
 /// Unroll template using TypeIndex
 template <bool _int, bool _float, bool _decimal, bool _datetime, typename F>
-static bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f)
+static inline bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f)
 {
     if constexpr (_int)
     {
diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp
index fe4353a8878..7025bc07ed3 100644
--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@@ -951,7 +951,7 @@ static constexpr bool areConvertibleTypes =
             && std::is_convertible_v<FromType, ToType>);
 
 template <typename F>
-static NO_INLINE bool castColumnNumeric(const IColumn * column, F && f)
+static bool castColumnNumeric(const IColumn * column, F && f)
 {
     return castTypeToEither<
         ColumnVector<UInt8>,
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index d73b2d4e681..eba1733c683 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -1086,7 +1086,7 @@ public:
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
                 "Must be ColumnUInt8 or ColumnConstUInt8.", arg_cond.column->getName(), getName());
 
-        auto call = [&](const auto & types) NO_INLINE -> bool
+        auto call = [&](const auto & types) -> bool
         {
             using Types = std::decay_t<decltype(types)>;
             using T0 = typename Types::LeftType;

From a710ac60111d4aed90ad1cda39c0ee91f1c3f529 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 03:00:03 +0100
Subject: [PATCH 0193/1097] This is actually better

---
 src/Functions/castTypeToEither.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/castTypeToEither.h b/src/Functions/castTypeToEither.h
index e2e30c455ab..58b13e350b2 100644
--- a/src/Functions/castTypeToEither.h
+++ b/src/Functions/castTypeToEither.h
@@ -7,13 +7,13 @@ namespace DB
 {
 
 template <typename... Ts, typename T, typename F>
-static NO_INLINE bool castTypeToEither(const T * type, F && f)
+static bool castTypeToEither(const T * type, F && f)
 {
     return ((typeid_cast<const Ts *>(type) && f(*typeid_cast<const Ts *>(type))) || ...);
 }
 
 template <class ...Args>
-static NO_INLINE bool castTypeToEither(TypeList<Args...>, const auto * type, auto && f)
+static bool castTypeToEither(TypeList<Args...>, const auto * type, auto && f)
 {
     return ((typeid_cast<const Args *>(type) != nullptr && std::forward<decltype(f)>(f)(*typeid_cast<const Args *>(type))) || ...);
 }

From 184e6f840e02f4ca50fab0183f1d75df11673c8a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 03:49:55 +0100
Subject: [PATCH 0194/1097] Remove useless header files

---
 src/Access/UsersConfigAccessStorage.cpp       |   2 +-
 .../AggregateFunctionAnalysisOfVariance.cpp   |  89 ++-
 .../AggregateFunctionAnalysisOfVariance.h     |  97 ---
 .../AggregateFunctionAvgWeighted.cpp          |  86 ++-
 .../AggregateFunctionAvgWeighted.h            |  90 ---
 .../AggregateFunctionBitwise.cpp              | 191 ++++-
 .../AggregateFunctionBitwise.h                | 197 -----
 .../AggregateFunctionBoundingRatio.cpp        | 167 ++++-
 .../AggregateFunctionBoundingRatio.h          | 177 -----
 .../AggregateFunctionDeltaSum.cpp             | 117 ++-
 .../AggregateFunctionDeltaSum.h               | 126 ----
 .../AggregateFunctionDeltaSumTimestamp.cpp    | 163 +++-
 .../AggregateFunctionDeltaSumTimestamp.h      | 171 -----
 .../AggregateFunctionEntropy.cpp              | 139 +++-
 .../AggregateFunctionEntropy.h                | 145 ----
 .../AggregateFunctionHistogram.cpp            | 369 ++++++++-
 .../AggregateFunctionHistogram.h              | 382 ----------
 .../AggregateFunctionIntervalLengthSum.cpp    | 267 ++++++-
 .../AggregateFunctionIntervalLengthSum.h      | 232 ------
 ...AggregateFunctionKolmogorovSmirnovTest.cpp | 322 +++++++-
 .../AggregateFunctionKolmogorovSmirnovTest.h  | 331 ---------
 ...ateFunctionLargestTriangleThreeBuckets.cpp | 356 ++++++++-
 ...egateFunctionLargestTriangleThreeBuckets.h | 289 -------
 .../AggregateFunctionMannWhitney.cpp          | 239 +++++-
 .../AggregateFunctionMannWhitney.h            | 249 -------
 .../AggregateFunctionMaxIntersections.cpp     | 198 ++++-
 .../AggregateFunctionMaxIntersections.h       | 189 -----
 .../AggregateFunctionMeanZTest.cpp            | 125 +++-
 .../AggregateFunctionMeanZTest.h              | 141 ----
 .../AggregateFunctionQuantile.cpp             |  92 ++-
 .../AggregateFunctionQuantileApprox.cpp       | 467 +++++++++++-
 ...AggregateFunctionQuantileDeterministic.cpp |  92 ++-
 ...AggregateFunctionQuantileExactWeighted.cpp | 192 ++++-
 ...teFunctionQuantileInterpolatedWeighted.cpp | 349 ++++++++-
 .../AggregateFunctionRankCorrelation.cpp      |  87 ++-
 .../AggregateFunctionRankCorrelation.h        |  98 ---
 .../AggregateFunctionRetention.cpp            | 133 +++-
 .../AggregateFunctionRetention.h              | 143 ----
 .../AggregateFunctionSequenceMatch.cpp        | 691 ++++++++++++++++-
 .../AggregateFunctionSequenceMatch.h          | 702 ------------------
 .../AggregateFunctionSequenceNextNode.cpp     | 419 ++++++++++-
 .../AggregateFunctionSequenceNextNode.h       | 432 -----------
 ...ggregateFunctionSimpleLinearRegression.cpp | 176 ++++-
 .../AggregateFunctionSimpleLinearRegression.h | 182 -----
 .../AggregateFunctionSparkbar.cpp             | 312 +++++++-
 .../AggregateFunctionSparkbar.h               | 323 --------
 .../AggregateFunctionStatistics.cpp           | 458 +++++++++++-
 .../AggregateFunctionStatistics.h             | 468 ------------
 .../AggregateFunctionSumCount.cpp             |  56 +-
 .../AggregateFunctionSumCount.h               |  61 --
 .../AggregateFunctionTopK.cpp                 | 238 +++++-
 .../AggregateFunctionTopK.h                   | 250 -------
 .../AggregateFunctionUniqCombined.cpp         | 428 ++++++++---
 .../AggregateFunctionUniqCombined.h           | 240 ------
 .../AggregateFunctionUniqUpTo.cpp             | 270 ++++++-
 .../AggregateFunctionUniqUpTo.h               | 278 -------
 .../AggregateFunctionVarianceMatrix.cpp       | 151 +++-
 .../AggregateFunctionVarianceMatrix.h         | 159 ----
 .../AggregateFunctionWindowFunnel.cpp         | 279 ++++++-
 .../AggregateFunctionWindowFunnel.h           | 287 -------
 src/AggregateFunctions/QuantileApprox.h       | 477 ------------
 .../QuantileExactWeighted.h                   | 203 -----
 .../QuantileInterpolatedWeighted.h            | 308 --------
 .../QuantileReservoirSampler.h                | 102 ---
 .../QuantileReservoirSamplerDeterministic.h   | 102 ---
 src/Common/HyperLogLogCounter.h               |   2 +-
 src/Common/SipHash.h                          |   4 +-
 ...rmEndianness.hpp => transformEndianness.h} |   0
 src/Functions/reinterpretAs.cpp               |   2 +-
 src/IO/ReadHelpers.h                          |   2 +-
 src/IO/WriteHelpers.h                         |   2 +-
 71 files changed, 7491 insertions(+), 7872 deletions(-)
 delete mode 100644 src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionAvgWeighted.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionBitwise.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionBoundingRatio.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionDeltaSum.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionEntropy.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionHistogram.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionMannWhitney.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionMaxIntersections.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionMeanZTest.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionRankCorrelation.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionRetention.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionSequenceMatch.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionSequenceNextNode.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionSparkbar.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionStatistics.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionSumCount.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionTopK.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionUniqUpTo.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionVarianceMatrix.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionWindowFunnel.h
 delete mode 100644 src/AggregateFunctions/QuantileApprox.h
 delete mode 100644 src/AggregateFunctions/QuantileExactWeighted.h
 delete mode 100644 src/AggregateFunctions/QuantileInterpolatedWeighted.h
 delete mode 100644 src/AggregateFunctions/QuantileReservoirSampler.h
 delete mode 100644 src/AggregateFunctions/QuantileReservoirSamplerDeterministic.h
 rename src/Common/{TransformEndianness.hpp => transformEndianness.h} (100%)

diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp
index 952a1064829..2b0fb3f9b2e 100644
--- a/src/Access/UsersConfigAccessStorage.cpp
+++ b/src/Access/UsersConfigAccessStorage.cpp
@@ -12,7 +12,7 @@
 #include <Common/Config/ConfigReloader.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/quoteString.h>
-#include <Common/TransformEndianness.hpp>
+#include <Common/transformEndianness.h>
 #include <Core/Settings.h>
 #include <Interpreters/executeQuery.h>
 #include <Parsers/Access/ASTGrantQuery.h>
diff --git a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp
index 9ef2d295828..5092d869809 100644
--- a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp
@@ -1,7 +1,18 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionAnalysisOfVariance.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 
+#include <IO/VarInt.h>
+
+#include <array>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <Columns/ColumnNullable.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/Moments.h>
+#include "Common/NaNUtils.h"
+#include <Common/assert_cast.h>
+
+
 namespace DB
 {
 
@@ -13,6 +24,82 @@ namespace ErrorCodes
 namespace
 {
 
+using AggregateFunctionAnalysisOfVarianceData = AnalysisOfVarianceMoments<Float64>;
+
+
+/// One way analysis of variance
+/// Provides a statistical test of whether two or more population means are equal (null hypothesis)
+/// Has an assumption that subjects from group i have normal distribution.
+/// Accepts two arguments - a value and a group number which this value belongs to.
+/// Groups are enumerated starting from 0 and there should be at least two groups to perform a test
+/// Moreover there should be at least one group with the number of observations greater than one.
+class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataHelper<AggregateFunctionAnalysisOfVarianceData, AggregateFunctionAnalysisOfVariance>
+{
+public:
+    explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper(arguments, params, createResultType())
+    {}
+
+    DataTypePtr createResultType() const
+    {
+        DataTypes types {std::make_shared<DataTypeNumber<Float64>>(), std::make_shared<DataTypeNumber<Float64>>() };
+        Strings names {"f_statistic", "p_value"};
+        return std::make_shared<DataTypeTuple>(
+            std::move(types),
+            std::move(names)
+        );
+    }
+
+    String getName() const override { return "analysisOfVariance"; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        data(place).add(columns[0]->getFloat64(row_num), columns[1]->getUInt(row_num));
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        data(place).merge(data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        data(place).read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto f_stat = data(place).getFStatistic();
+
+        auto & column_tuple = assert_cast<ColumnTuple &>(to);
+        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
+        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
+
+        if (unlikely(!std::isfinite(f_stat) || f_stat < 0))
+        {
+            column_stat.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
+            column_value.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
+            return;
+        }
+
+        auto p_value = data(place).getPValue(f_stat);
+
+        /// Because p-value is a probability.
+        p_value = std::min(1.0, std::max(0.0, p_value));
+
+        column_stat.getData().push_back(f_stat);
+        column_value.getData().push_back(p_value);
+    }
+
+};
+
 AggregateFunctionPtr createAggregateFunctionAnalysisOfVariance(const std::string & name, const DataTypes & arguments, const Array & parameters, const Settings *)
 {
     assertNoParameters(name, parameters);
diff --git a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h
deleted file mode 100644
index 76e749dc1fe..00000000000
--- a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h
+++ /dev/null
@@ -1,97 +0,0 @@
-#pragma once
-
-#include <IO/VarInt.h>
-#include <IO/WriteHelpers.h>
-
-#include <array>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <Columns/ColumnNullable.h>
-#include <Columns/ColumnsCommon.h>
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/Moments.h>
-#include "Common/NaNUtils.h"
-#include <Common/assert_cast.h>
-#include <Core/Types.h>
-
-
-namespace DB
-{
-
-using AggregateFunctionAnalysisOfVarianceData = AnalysisOfVarianceMoments<Float64>;
-
-
-/// One way analysis of variance
-/// Provides a statistical test of whether two or more population means are equal (null hypothesis)
-/// Has an assumption that subjects from group i have normal distribution.
-/// Accepts two arguments - a value and a group number which this value belongs to.
-/// Groups are enumerated starting from 0 and there should be at least two groups to perform a test
-/// Moreover there should be at least one group with the number of observations greater than one.
-class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataHelper<AggregateFunctionAnalysisOfVarianceData, AggregateFunctionAnalysisOfVariance>
-{
-public:
-    explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper(arguments, params, createResultType())
-    {}
-
-    DataTypePtr createResultType() const
-    {
-        DataTypes types {std::make_shared<DataTypeNumber<Float64>>(), std::make_shared<DataTypeNumber<Float64>>() };
-        Strings names {"f_statistic", "p_value"};
-        return std::make_shared<DataTypeTuple>(
-            std::move(types),
-            std::move(names)
-        );
-    }
-
-    String getName() const override { return "analysisOfVariance"; }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        data(place).add(columns[0]->getFloat64(row_num), columns[1]->getUInt(row_num));
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        data(place).merge(data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        data(place).write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        data(place).read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto f_stat = data(place).getFStatistic();
-
-        auto & column_tuple = assert_cast<ColumnTuple &>(to);
-        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
-        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
-
-        if (unlikely(!std::isfinite(f_stat) || f_stat < 0))
-        {
-            column_stat.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
-            column_value.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
-            return;
-        }
-
-        auto p_value = data(place).getPValue(f_stat);
-
-        /// Because p-value is a probability.
-        p_value = std::min(1.0, std::max(0.0, p_value));
-
-        column_stat.getData().push_back(f_stat);
-        column_value.getData().push_back(p_value);
-    }
-
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
index e840005facf..675c4328b83 100644
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
@@ -1,12 +1,14 @@
 #include <memory>
 #include <type_traits>
+#include <AggregateFunctions/AggregateFunctionAvg.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionAvgWeighted.h>
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 
+
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
@@ -16,6 +18,88 @@ namespace ErrorCodes
 
 namespace
 {
+
+template <typename T>
+using AvgWeightedFieldType = std::conditional_t<is_decimal<T>,
+    std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
+    std::conditional_t<DecimalOrExtendedInt<T>,
+        Float64, // no way to do UInt128 * UInt128, better cast to Float64
+        NearestFieldType<T>>>;
+
+template <typename T, typename U>
+using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
+    AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
+
+template <typename Value, typename Weight>
+class AggregateFunctionAvgWeighted final :
+    public AggregateFunctionAvgBase<
+        MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
+{
+public:
+    using Base = AggregateFunctionAvgBase<
+        MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
+    using Base::Base;
+
+    using Numerator = typename Base::Numerator;
+    using Denominator = typename Base::Denominator;
+    using Fraction = typename Base::Fraction;
+
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        const auto& weights = static_cast<const ColumnVectorOrDecimal<Weight> &>(*columns[1]);
+
+        this->data(place).numerator += static_cast<Numerator>(
+            static_cast<const ColumnVectorOrDecimal<Value> &>(*columns[0]).getData()[row_num]) *
+            static_cast<Numerator>(weights.getData()[row_num]);
+
+        this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);
+    }
+
+    String getName() const override { return "avgWeighted"; }
+
+#if USE_EMBEDDED_COMPILER
+
+    bool isCompilable() const override
+    {
+        bool can_be_compiled = Base::isCompilable();
+        can_be_compiled &= canBeNativeType<Weight>();
+
+        return can_be_compiled;
+    }
+
+    void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
+    {
+        llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
+
+        auto * numerator_type = toNativeType<Numerator>(b);
+        auto * numerator_ptr = aggregate_data_ptr;
+        auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
+
+        auto numerator_data_type = toNativeDataType<Numerator>();
+        auto * argument = nativeCast(b, arguments[0], numerator_data_type);
+        auto * weight = nativeCast(b, arguments[1], numerator_data_type);
+
+        llvm::Value * value_weight_multiplication = argument->getType()->isIntegerTy() ? b.CreateMul(argument, weight) : b.CreateFMul(argument, weight);
+        auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_weight_multiplication) : b.CreateFAdd(numerator_value, value_weight_multiplication);
+        b.CreateStore(numerator_result_value, numerator_ptr);
+
+        auto * denominator_type = toNativeType<Denominator>(b);
+
+        static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
+        auto * denominator_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, denominator_offset);
+
+        auto * weight_cast_to_denominator = nativeCast(b, arguments[1], toNativeDataType<Denominator>());
+
+        auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
+        auto * denominator_value_updated = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_value, weight_cast_to_denominator) : b.CreateFAdd(denominator_value, weight_cast_to_denominator);
+
+        b.CreateStore(denominator_value_updated, denominator_ptr);
+    }
+
+#endif
+
+};
+
 bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
 {
     const WhichDataType l_dt(left), r_dt(right);
diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
deleted file mode 100644
index 5a3869032ca..00000000000
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#pragma once
-
-#include <type_traits>
-#include <AggregateFunctions/AggregateFunctionAvg.h>
-
-namespace DB
-{
-struct Settings;
-
-template <typename T>
-using AvgWeightedFieldType = std::conditional_t<is_decimal<T>,
-    std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
-    std::conditional_t<DecimalOrExtendedInt<T>,
-        Float64, // no way to do UInt128 * UInt128, better cast to Float64
-        NearestFieldType<T>>>;
-
-template <typename T, typename U>
-using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
-    AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
-
-template <typename Value, typename Weight>
-class AggregateFunctionAvgWeighted final :
-    public AggregateFunctionAvgBase<
-        MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
-{
-public:
-    using Base = AggregateFunctionAvgBase<
-        MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
-    using Base::Base;
-
-    using Numerator = typename Base::Numerator;
-    using Denominator = typename Base::Denominator;
-    using Fraction = typename Base::Fraction;
-
-    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        const auto& weights = static_cast<const ColumnVectorOrDecimal<Weight> &>(*columns[1]);
-
-        this->data(place).numerator += static_cast<Numerator>(
-            static_cast<const ColumnVectorOrDecimal<Value> &>(*columns[0]).getData()[row_num]) *
-            static_cast<Numerator>(weights.getData()[row_num]);
-
-        this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);
-    }
-
-    String getName() const override { return "avgWeighted"; }
-
-#if USE_EMBEDDED_COMPILER
-
-    bool isCompilable() const override
-    {
-        bool can_be_compiled = Base::isCompilable();
-        can_be_compiled &= canBeNativeType<Weight>();
-
-        return can_be_compiled;
-    }
-
-    void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
-    {
-        llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
-
-        auto * numerator_type = toNativeType<Numerator>(b);
-        auto * numerator_ptr = aggregate_data_ptr;
-        auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
-
-        auto numerator_data_type = toNativeDataType<Numerator>();
-        auto * argument = nativeCast(b, arguments[0], numerator_data_type);
-        auto * weight = nativeCast(b, arguments[1], numerator_data_type);
-
-        llvm::Value * value_weight_multiplication = argument->getType()->isIntegerTy() ? b.CreateMul(argument, weight) : b.CreateFMul(argument, weight);
-        auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_weight_multiplication) : b.CreateFAdd(numerator_value, value_weight_multiplication);
-        b.CreateStore(numerator_result_value, numerator_ptr);
-
-        auto * denominator_type = toNativeType<Denominator>(b);
-
-        static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
-        auto * denominator_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, denominator_offset);
-
-        auto * weight_cast_to_denominator = nativeCast(b, arguments[1], toNativeDataType<Denominator>());
-
-        auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
-        auto * denominator_value_updated = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_value, weight_cast_to_denominator) : b.CreateFAdd(denominator_value, weight_cast_to_denominator);
-
-        b.CreateStore(denominator_value_updated, denominator_ptr);
-    }
-
-#endif
-
-};
-}
diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.cpp b/src/AggregateFunctions/AggregateFunctionBitwise.cpp
index f5c2deb4588..619251552e4 100644
--- a/src/AggregateFunctions/AggregateFunctionBitwise.cpp
+++ b/src/AggregateFunctions/AggregateFunctionBitwise.cpp
@@ -1,11 +1,27 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionBitwise.h>
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnVector.h>
+#include <Common/assert_cast.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include "config.h"
+
+#if USE_EMBEDDED_COMPILER
+#    include <llvm/IR/IRBuilder.h>
+#    include <DataTypes/Native.h>
+#endif
+
 
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
@@ -16,6 +32,179 @@ namespace ErrorCodes
 namespace
 {
 
+template <typename T>
+struct AggregateFunctionGroupBitOrData
+{
+    T value = 0;
+    static const char * name() { return "groupBitOr"; }
+    void update(T x) { value |= x; }
+
+#if USE_EMBEDDED_COMPILER
+
+    static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
+    {
+        auto type = toNativeType<T>(builder);
+        builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
+    }
+
+    static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
+    {
+        return builder.CreateOr(lhs, rhs);
+    }
+
+#endif
+};
+
+template <typename T>
+struct AggregateFunctionGroupBitAndData
+{
+    T value = -1; /// Two's complement arithmetic, sign extension.
+    static const char * name() { return "groupBitAnd"; }
+    void update(T x) { value &= x; }
+
+#if USE_EMBEDDED_COMPILER
+
+    static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
+    {
+        auto type = toNativeType<T>(builder);
+        builder.CreateStore(llvm::ConstantInt::get(type, -1), value_ptr);
+    }
+
+    static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
+    {
+        return builder.CreateAnd(lhs, rhs);
+    }
+
+#endif
+};
+
+template <typename T>
+struct AggregateFunctionGroupBitXorData
+{
+    T value = 0;
+    static const char * name() { return "groupBitXor"; }
+    void update(T x) { value ^= x; }
+
+#if USE_EMBEDDED_COMPILER
+
+    static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
+    {
+        auto type = toNativeType<T>(builder);
+        builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
+    }
+
+    static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
+    {
+        return builder.CreateXor(lhs, rhs);
+    }
+
+#endif
+};
+
+
+/// Counts bitwise operation on numbers.
+template <typename T, typename Data>
+class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>
+{
+public:
+    explicit AggregateFunctionBitwise(const DataTypePtr & type)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>({type}, {}, createResultType())
+    {}
+
+    String getName() const override { return Data::name(); }
+
+    static DataTypePtr createResultType()
+    {
+        return std::make_shared<DataTypeNumber<T>>();
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        this->data(place).update(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).update(this->data(rhs).value);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        writeBinary(this->data(place).value, buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        readBinary(this->data(place).value, buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
+    }
+
+#if USE_EMBEDDED_COMPILER
+
+    bool isCompilable() const override
+    {
+        auto return_type = this->getResultType();
+        return canBeNativeType(*return_type);
+    }
+
+    void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
+    {
+        auto * value_ptr = aggregate_data_ptr;
+        Data::compileCreate(builder, value_ptr);
+    }
+
+    void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
+    {
+        llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
+
+        auto * return_type = toNativeType(b, this->getResultType());
+
+        auto * value_ptr = aggregate_data_ptr;
+        auto * value = b.CreateLoad(return_type, value_ptr);
+
+        auto * result_value = Data::compileUpdate(builder, value, arguments[0].value);
+
+        b.CreateStore(result_value, value_ptr);
+    }
+
+    void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
+    {
+        llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
+
+        auto * return_type = toNativeType(b, this->getResultType());
+
+        auto * value_dst_ptr = aggregate_data_dst_ptr;
+        auto * value_dst = b.CreateLoad(return_type, value_dst_ptr);
+
+        auto * value_src_ptr = aggregate_data_src_ptr;
+        auto * value_src = b.CreateLoad(return_type, value_src_ptr);
+
+        auto * result_value = Data::compileUpdate(builder, value_dst, value_src);
+
+        b.CreateStore(result_value, value_dst_ptr);
+    }
+
+    llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
+    {
+        llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
+
+        auto * return_type = toNativeType(b, this->getResultType());
+        auto * value_ptr = aggregate_data_ptr;
+
+        return b.CreateLoad(return_type, value_ptr);
+    }
+
+#endif
+
+};
+
+
 template <template <typename> class Data>
 AggregateFunctionPtr createAggregateFunctionBitwise(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h
deleted file mode 100644
index 71479b309c7..00000000000
--- a/src/AggregateFunctions/AggregateFunctionBitwise.h
+++ /dev/null
@@ -1,197 +0,0 @@
-#pragma once
-
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <Columns/ColumnVector.h>
-#include <Common/assert_cast.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-#include "config.h"
-
-#if USE_EMBEDDED_COMPILER
-#    include <llvm/IR/IRBuilder.h>
-#    include <DataTypes/Native.h>
-#endif
-
-namespace DB
-{
-struct Settings;
-
-
-template <typename T>
-struct AggregateFunctionGroupBitOrData
-{
-    T value = 0;
-    static const char * name() { return "groupBitOr"; }
-    void update(T x) { value |= x; }
-
-#if USE_EMBEDDED_COMPILER
-
-    static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
-    {
-        auto type = toNativeType<T>(builder);
-        builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
-    }
-
-    static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
-    {
-        return builder.CreateOr(lhs, rhs);
-    }
-
-#endif
-};
-
-template <typename T>
-struct AggregateFunctionGroupBitAndData
-{
-    T value = -1; /// Two's complement arithmetic, sign extension.
-    static const char * name() { return "groupBitAnd"; }
-    void update(T x) { value &= x; }
-
-#if USE_EMBEDDED_COMPILER
-
-    static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
-    {
-        auto type = toNativeType<T>(builder);
-        builder.CreateStore(llvm::ConstantInt::get(type, -1), value_ptr);
-    }
-
-    static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
-    {
-        return builder.CreateAnd(lhs, rhs);
-    }
-
-#endif
-};
-
-template <typename T>
-struct AggregateFunctionGroupBitXorData
-{
-    T value = 0;
-    static const char * name() { return "groupBitXor"; }
-    void update(T x) { value ^= x; }
-
-#if USE_EMBEDDED_COMPILER
-
-    static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr)
-    {
-        auto type = toNativeType<T>(builder);
-        builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr);
-    }
-
-    static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs)
-    {
-        return builder.CreateXor(lhs, rhs);
-    }
-
-#endif
-};
-
-
-/// Counts bitwise operation on numbers.
-template <typename T, typename Data>
-class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>
-{
-public:
-    explicit AggregateFunctionBitwise(const DataTypePtr & type)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionBitwise<T, Data>>({type}, {}, createResultType())
-    {}
-
-    String getName() const override { return Data::name(); }
-
-    static DataTypePtr createResultType()
-    {
-        return std::make_shared<DataTypeNumber<T>>();
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        this->data(place).update(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).update(this->data(rhs).value);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        writeBinary(this->data(place).value, buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        readBinary(this->data(place).value, buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
-    }
-
-#if USE_EMBEDDED_COMPILER
-
-    bool isCompilable() const override
-    {
-        auto return_type = this->getResultType();
-        return canBeNativeType(*return_type);
-    }
-
-    void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
-    {
-        auto * value_ptr = aggregate_data_ptr;
-        Data::compileCreate(builder, value_ptr);
-    }
-
-    void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
-    {
-        llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
-
-        auto * return_type = toNativeType(b, this->getResultType());
-
-        auto * value_ptr = aggregate_data_ptr;
-        auto * value = b.CreateLoad(return_type, value_ptr);
-
-        auto * result_value = Data::compileUpdate(builder, value, arguments[0].value);
-
-        b.CreateStore(result_value, value_ptr);
-    }
-
-    void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
-    {
-        llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
-
-        auto * return_type = toNativeType(b, this->getResultType());
-
-        auto * value_dst_ptr = aggregate_data_dst_ptr;
-        auto * value_dst = b.CreateLoad(return_type, value_dst_ptr);
-
-        auto * value_src_ptr = aggregate_data_src_ptr;
-        auto * value_src = b.CreateLoad(return_type, value_src_ptr);
-
-        auto * result_value = Data::compileUpdate(builder, value_dst, value_src);
-
-        b.CreateStore(result_value, value_dst_ptr);
-    }
-
-    llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
-    {
-        llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
-
-        auto * return_type = toNativeType(b, this->getResultType());
-        auto * value_ptr = aggregate_data_ptr;
-
-        return b.CreateLoad(return_type, value_ptr);
-    }
-
-#endif
-
-};
-
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.cpp b/src/AggregateFunctions/AggregateFunctionBoundingRatio.cpp
index 9c3eec3f1f8..62adb74924d 100644
--- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.cpp
+++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.cpp
@@ -1,7 +1,14 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionBoundingRatio.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnsNumber.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <Common/assert_cast.h>
+#include <Common/transformEndianness.h>
+
 
 namespace DB
 {
@@ -10,11 +17,169 @@ struct Settings;
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
 }
 
 namespace
 {
 
+/** Tracks the leftmost and rightmost (x, y) data points.
+  */
+struct AggregateFunctionBoundingRatioData
+{
+    struct Point
+    {
+        Float64 x;
+        Float64 y;
+    };
+
+    bool empty = true;
+    Point left;
+    Point right;
+
+    void add(Float64 x, Float64 y)
+    {
+        Point point{x, y};
+
+        if (empty)
+        {
+            left = point;
+            right = point;
+            empty = false;
+        }
+        else if (point.x < left.x)
+        {
+            left = point;
+        }
+        else if (point.x > right.x)
+        {
+            right = point;
+        }
+    }
+
+    void merge(const AggregateFunctionBoundingRatioData & other)
+    {
+        if (empty)
+        {
+            *this = other;
+        }
+        else
+        {
+            if (other.left.x < left.x)
+                left = other.left;
+            if (other.right.x > right.x)
+                right = other.right;
+        }
+    }
+
+    void serialize(WriteBuffer & buf) const;
+    void deserialize(ReadBuffer & buf);
+};
+
+template <std::endian endian>
+inline void transformEndianness(AggregateFunctionBoundingRatioData::Point & p)
+{
+    DB::transformEndianness<endian>(p.x);
+    DB::transformEndianness<endian>(p.y);
+}
+
+void AggregateFunctionBoundingRatioData::serialize(WriteBuffer & buf) const
+{
+    writeBinaryLittleEndian(empty, buf);
+
+    if (!empty)
+    {
+        writeBinaryLittleEndian(left, buf);
+        writeBinaryLittleEndian(right, buf);
+    }
+}
+
+void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf)
+{
+    readBinaryLittleEndian(empty, buf);
+
+    if (!empty)
+    {
+        readBinaryLittleEndian(left, buf);
+        readBinaryLittleEndian(right, buf);
+    }
+}
+
+inline void writeBinary(const AggregateFunctionBoundingRatioData::Point & p, WriteBuffer & buf)
+{
+    writePODBinary(p, buf);
+}
+
+inline void readBinary(AggregateFunctionBoundingRatioData::Point & p, ReadBuffer & buf)
+{
+    readPODBinary(p, buf);
+}
+
+
+class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>
+{
+private:
+    /** Calculates the slope of a line between leftmost and rightmost data points.
+      * (y2 - y1) / (x2 - x1)
+      */
+    static Float64 NO_SANITIZE_UNDEFINED getBoundingRatio(const AggregateFunctionBoundingRatioData & data)
+    {
+        if (data.empty)
+            return std::numeric_limits<Float64>::quiet_NaN();
+
+        return (data.right.y - data.left.y) / (data.right.x - data.left.x);
+    }
+
+public:
+    String getName() const override
+    {
+        return "boundingRatio";
+    }
+
+    explicit AggregateFunctionBoundingRatio(const DataTypes & arguments)
+        : IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>(arguments, {}, std::make_shared<DataTypeFloat64>())
+    {
+        const auto * x_arg = arguments.at(0).get();
+        const auto * y_arg = arguments.at(1).get();
+
+        if (!x_arg->isValueRepresentedByNumber() || !y_arg->isValueRepresentedByNumber())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "Illegal types of arguments of aggregate function {}, must have number representation.",
+                            getName());
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    {
+        /// NOTE Slightly inefficient.
+        const auto x = columns[0]->getFloat64(row_num);
+        const auto y = columns[1]->getFloat64(row_num);
+        data(place).add(x, y);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        data(place).merge(data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place)));
+    }
+};
+
+
 AggregateFunctionPtr createAggregateFunctionRate(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
     assertNoParameters(name, parameters);
diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h
deleted file mode 100644
index c41fb551a96..00000000000
--- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h
+++ /dev/null
@@ -1,177 +0,0 @@
-#pragma once
-
-#include <DataTypes/DataTypesNumber.h>
-#include <Columns/ColumnsNumber.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <AggregateFunctions/Helpers.h>
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <Common/assert_cast.h>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
-/** Tracks the leftmost and rightmost (x, y) data points.
-  */
-struct AggregateFunctionBoundingRatioData
-{
-    struct Point
-    {
-        Float64 x;
-        Float64 y;
-    };
-
-    bool empty = true;
-    Point left;
-    Point right;
-
-    void add(Float64 x, Float64 y)
-    {
-        Point point{x, y};
-
-        if (empty)
-        {
-            left = point;
-            right = point;
-            empty = false;
-        }
-        else if (point.x < left.x)
-        {
-            left = point;
-        }
-        else if (point.x > right.x)
-        {
-            right = point;
-        }
-    }
-
-    void merge(const AggregateFunctionBoundingRatioData & other)
-    {
-        if (empty)
-        {
-            *this = other;
-        }
-        else
-        {
-            if (other.left.x < left.x)
-                left = other.left;
-            if (other.right.x > right.x)
-                right = other.right;
-        }
-    }
-
-    void serialize(WriteBuffer & buf) const;
-    void deserialize(ReadBuffer & buf);
-};
-
-template <std::endian endian>
-inline void transformEndianness(AggregateFunctionBoundingRatioData::Point & p)
-{
-    transformEndianness<endian>(p.x);
-    transformEndianness<endian>(p.y);
-}
-
-void AggregateFunctionBoundingRatioData::serialize(WriteBuffer & buf) const
-{
-    writeBinaryLittleEndian(empty, buf);
-
-    if (!empty)
-    {
-        writeBinaryLittleEndian(left, buf);
-        writeBinaryLittleEndian(right, buf);
-    }
-}
-
-void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf)
-{
-    readBinaryLittleEndian(empty, buf);
-
-    if (!empty)
-    {
-        readBinaryLittleEndian(left, buf);
-        readBinaryLittleEndian(right, buf);
-    }
-}
-
-inline void writeBinary(const AggregateFunctionBoundingRatioData::Point & p, WriteBuffer & buf)
-{
-    writePODBinary(p, buf);
-}
-
-inline void readBinary(AggregateFunctionBoundingRatioData::Point & p, ReadBuffer & buf)
-{
-    readPODBinary(p, buf);
-}
-
-
-class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>
-{
-private:
-    /** Calculates the slope of a line between leftmost and rightmost data points.
-      * (y2 - y1) / (x2 - x1)
-      */
-    static Float64 NO_SANITIZE_UNDEFINED getBoundingRatio(const AggregateFunctionBoundingRatioData & data)
-    {
-        if (data.empty)
-            return std::numeric_limits<Float64>::quiet_NaN();
-
-        return (data.right.y - data.left.y) / (data.right.x - data.left.x);
-    }
-
-public:
-    String getName() const override
-    {
-        return "boundingRatio";
-    }
-
-    explicit AggregateFunctionBoundingRatio(const DataTypes & arguments)
-        : IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>(arguments, {}, std::make_shared<DataTypeFloat64>())
-    {
-        const auto * x_arg = arguments.at(0).get();
-        const auto * y_arg = arguments.at(1).get();
-
-        if (!x_arg->isValueRepresentedByNumber() || !y_arg->isValueRepresentedByNumber())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                            "Illegal types of arguments of aggregate function {}, must have number representation.",
-                            getName());
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
-    {
-        /// NOTE Slightly inefficient.
-        const auto x = columns[0]->getFloat64(row_num);
-        const auto y = columns[1]->getFloat64(row_num);
-        data(place).add(x, y);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        data(place).merge(data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        data(place).deserialize(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place)));
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
index f36e0cb4682..3e446ea6a0e 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
@@ -1,9 +1,15 @@
-#include <AggregateFunctions/AggregateFunctionDeltaSum.h>
-
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
 
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
 
 namespace DB
 {
@@ -18,6 +24,113 @@ namespace ErrorCodes
 namespace
 {
 
+template <typename T>
+struct AggregationFunctionDeltaSumData
+{
+    T sum = 0;
+    T last = 0;
+    T first = 0;
+    bool seen = false;
+};
+
+template <typename T>
+class AggregationFunctionDeltaSum final
+    : public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
+{
+public:
+    AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params, createResultType()}
+    {}
+
+    AggregationFunctionDeltaSum()
+        : IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
+    {}
+
+    String getName() const override { return "deltaSum"; }
+
+    static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
+
+        if ((this->data(place).last < value) && this->data(place).seen)
+        {
+            this->data(place).sum += (value - this->data(place).last);
+        }
+
+        this->data(place).last = value;
+
+        if (!this->data(place).seen)
+        {
+            this->data(place).first = value;
+            this->data(place).seen = true;
+        }
+    }
+
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        auto place_data = &this->data(place);
+        auto rhs_data = &this->data(rhs);
+
+        if ((place_data->last < rhs_data->first) && place_data->seen && rhs_data->seen)
+        {
+            // If the lhs last number seen is less than the first number the rhs saw, the lhs is before
+            // the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
+            // difference between lhs last number and rhs first number (the 2 and 4). Then we want to
+            // take last value from the rhs, so first and last become 0 and 7.
+
+            place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
+            place_data->last = rhs_data->last;
+        }
+        else if ((rhs_data->first < place_data->last && rhs_data->seen && place_data->seen))
+        {
+            // In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
+            // assume the input interval states are sorted by time, we assume this is a counter
+            // reset, and therefore do *not* add the difference between our first value and the
+            // rhs last value.
+
+            place_data->sum += rhs_data->sum;
+            place_data->last = rhs_data->last;
+        }
+        else if (rhs_data->seen && !place_data->seen)
+        {
+            // If we're here then the lhs is an empty state and the rhs does have some state, so
+            // we'll just take that state.
+
+            place_data->first = rhs_data->first;
+            place_data->last = rhs_data->last;
+            place_data->sum = rhs_data->sum;
+            place_data->seen = rhs_data->seen;
+        }
+
+        // Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        writeBinaryLittleEndian(this->data(place).sum, buf);
+        writeBinaryLittleEndian(this->data(place).first, buf);
+        writeBinaryLittleEndian(this->data(place).last, buf);
+        writeBinaryLittleEndian(this->data(place).seen, buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        readBinaryLittleEndian(this->data(place).sum, buf);
+        readBinaryLittleEndian(this->data(place).first, buf);
+        readBinaryLittleEndian(this->data(place).last, buf);
+        readBinaryLittleEndian(this->data(place).seen, buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
+    }
+};
+
 AggregateFunctionPtr createAggregateFunctionDeltaSum(
     const String & name,
     const DataTypes & arguments,
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
deleted file mode 100644
index d64f949825a..00000000000
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ /dev/null
@@ -1,126 +0,0 @@
-#pragma once
-
-#include <type_traits>
-
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-
-#include <Columns/ColumnVector.h>
-#include <DataTypes/DataTypesDecimal.h>
-#include <DataTypes/DataTypesNumber.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-
-namespace DB
-{
-struct Settings;
-
-template <typename T>
-struct AggregationFunctionDeltaSumData
-{
-    T sum = 0;
-    T last = 0;
-    T first = 0;
-    bool seen = false;
-};
-
-template <typename T>
-class AggregationFunctionDeltaSum final
-    : public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
-{
-public:
-    AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params, createResultType()}
-    {}
-
-    AggregationFunctionDeltaSum()
-        : IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
-    {}
-
-    String getName() const override { return "deltaSum"; }
-
-    static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
-
-        if ((this->data(place).last < value) && this->data(place).seen)
-        {
-            this->data(place).sum += (value - this->data(place).last);
-        }
-
-        this->data(place).last = value;
-
-        if (!this->data(place).seen)
-        {
-            this->data(place).first = value;
-            this->data(place).seen = true;
-        }
-    }
-
-    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        auto place_data = &this->data(place);
-        auto rhs_data = &this->data(rhs);
-
-        if ((place_data->last < rhs_data->first) && place_data->seen && rhs_data->seen)
-        {
-            // If the lhs last number seen is less than the first number the rhs saw, the lhs is before
-            // the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
-            // difference between lhs last number and rhs first number (the 2 and 4). Then we want to
-            // take last value from the rhs, so first and last become 0 and 7.
-
-            place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
-            place_data->last = rhs_data->last;
-        }
-        else if ((rhs_data->first < place_data->last && rhs_data->seen && place_data->seen))
-        {
-            // In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
-            // assume the input interval states are sorted by time, we assume this is a counter
-            // reset, and therefore do *not* add the difference between our first value and the
-            // rhs last value.
-
-            place_data->sum += rhs_data->sum;
-            place_data->last = rhs_data->last;
-        }
-        else if (rhs_data->seen && !place_data->seen)
-        {
-            // If we're here then the lhs is an empty state and the rhs does have some state, so
-            // we'll just take that state.
-
-            place_data->first = rhs_data->first;
-            place_data->last = rhs_data->last;
-            place_data->sum = rhs_data->sum;
-            place_data->seen = rhs_data->seen;
-        }
-
-        // Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        writeBinaryLittleEndian(this->data(place).sum, buf);
-        writeBinaryLittleEndian(this->data(place).first, buf);
-        writeBinaryLittleEndian(this->data(place).last, buf);
-        writeBinaryLittleEndian(this->data(place).seen, buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        readBinaryLittleEndian(this->data(place).sum, buf);
-        readBinaryLittleEndian(this->data(place).first, buf);
-        readBinaryLittleEndian(this->data(place).last, buf);
-        readBinaryLittleEndian(this->data(place).seen, buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
index 6c07e34668f..a9c999ea151 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
@@ -1,9 +1,15 @@
-#include <AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h>
-
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
 
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
 
 namespace DB
 {
@@ -17,6 +23,159 @@ namespace ErrorCodes
 namespace
 {
 
+template <typename ValueType, typename TimestampType>
+struct AggregationFunctionDeltaSumTimestampData
+{
+    ValueType sum = 0;
+    ValueType first = 0;
+    ValueType last = 0;
+    TimestampType first_ts = 0;
+    TimestampType last_ts = 0;
+    bool seen = false;
+};
+
+template <typename ValueType, typename TimestampType>
+class AggregationFunctionDeltaSumTimestamp final
+    : public IAggregateFunctionDataHelper<
+        AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
+        AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
+      >
+{
+public:
+    AggregationFunctionDeltaSumTimestamp(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<
+            AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
+            AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
+        >{arguments, params, createResultType()}
+    {}
+
+    AggregationFunctionDeltaSumTimestamp()
+        : IAggregateFunctionDataHelper<
+            AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
+            AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
+        >{}
+    {}
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    String getName() const override { return "deltaSumTimestamp"; }
+
+    static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<ValueType>>(); }
+
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        auto value = assert_cast<const ColumnVector<ValueType> &>(*columns[0]).getData()[row_num];
+        auto ts = assert_cast<const ColumnVector<TimestampType> &>(*columns[1]).getData()[row_num];
+
+        if ((this->data(place).last < value) && this->data(place).seen)
+        {
+            this->data(place).sum += (value - this->data(place).last);
+        }
+
+        this->data(place).last = value;
+        this->data(place).last_ts = ts;
+
+        if (!this->data(place).seen)
+        {
+            this->data(place).first = value;
+            this->data(place).seen = true;
+            this->data(place).first_ts = ts;
+        }
+    }
+
+    // before returns true if lhs is before rhs or false if it is not or can't be determined
+    bool ALWAYS_INLINE before (
+        const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * lhs,
+        const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * rhs
+    ) const
+    {
+        if (lhs->last_ts < rhs->first_ts)
+        {
+            return true;
+        }
+        if (lhs->last_ts == rhs->first_ts && (lhs->last_ts < rhs->last_ts || lhs->first_ts < rhs->first_ts))
+        {
+            return true;
+        }
+        return false;
+    }
+
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        auto place_data = &this->data(place);
+        auto rhs_data = &this->data(rhs);
+
+        if (!place_data->seen && rhs_data->seen)
+        {
+            place_data->sum = rhs_data->sum;
+            place_data->seen = true;
+            place_data->first = rhs_data->first;
+            place_data->first_ts = rhs_data->first_ts;
+            place_data->last = rhs_data->last;
+            place_data->last_ts = rhs_data->last_ts;
+        }
+        else if (place_data->seen && !rhs_data->seen)
+            return;
+        else if (before(place_data, rhs_data))
+        {
+            // This state came before the rhs state
+
+            if (rhs_data->first > place_data->last)
+                place_data->sum += (rhs_data->first - place_data->last);
+            place_data->sum += rhs_data->sum;
+            place_data->last = rhs_data->last;
+            place_data->last_ts = rhs_data->last_ts;
+        }
+        else if (before(rhs_data, place_data))
+        {
+            // This state came after the rhs state
+
+            if (place_data->first > rhs_data->last)
+                place_data->sum += (place_data->first - rhs_data->last);
+            place_data->sum += rhs_data->sum;
+            place_data->first = rhs_data->first;
+            place_data->first_ts = rhs_data->first_ts;
+        }
+        else
+        {
+            // If none of those conditions matched, it means both states we are merging have all
+            // same timestamps. We have to pick either the smaller or larger value so that the
+            // result is deterministic.
+
+            if (place_data->first < rhs_data->first)
+            {
+                place_data->first = rhs_data->first;
+                place_data->last = rhs_data->last;
+            }
+        }
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        writeBinaryLittleEndian(this->data(place).sum, buf);
+        writeBinaryLittleEndian(this->data(place).first, buf);
+        writeBinaryLittleEndian(this->data(place).first_ts, buf);
+        writeBinaryLittleEndian(this->data(place).last, buf);
+        writeBinaryLittleEndian(this->data(place).last_ts, buf);
+        writeBinaryLittleEndian(this->data(place).seen, buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        readBinaryLittleEndian(this->data(place).sum, buf);
+        readBinaryLittleEndian(this->data(place).first, buf);
+        readBinaryLittleEndian(this->data(place).first_ts, buf);
+        readBinaryLittleEndian(this->data(place).last, buf);
+        readBinaryLittleEndian(this->data(place).last_ts, buf);
+        readBinaryLittleEndian(this->data(place).seen, buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnVector<ValueType> &>(to).getData().push_back(this->data(place).sum);
+    }
+};
+
 AggregateFunctionPtr createAggregateFunctionDeltaSumTimestamp(
     const String & name,
     const DataTypes & arguments,
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h
deleted file mode 100644
index 5eeb1425afb..00000000000
--- a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h
+++ /dev/null
@@ -1,171 +0,0 @@
-#pragma once
-
-#include <type_traits>
-
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-
-#include <Columns/ColumnVector.h>
-#include <DataTypes/DataTypesDecimal.h>
-#include <DataTypes/DataTypesNumber.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-
-namespace DB
-{
-
-template <typename ValueType, typename TimestampType>
-struct AggregationFunctionDeltaSumTimestampData
-{
-    ValueType sum = 0;
-    ValueType first = 0;
-    ValueType last = 0;
-    TimestampType first_ts = 0;
-    TimestampType last_ts = 0;
-    bool seen = false;
-};
-
-template <typename ValueType, typename TimestampType>
-class AggregationFunctionDeltaSumTimestamp final
-    : public IAggregateFunctionDataHelper<
-        AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
-        AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
-      >
-{
-public:
-    AggregationFunctionDeltaSumTimestamp(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<
-            AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
-            AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
-        >{arguments, params, createResultType()}
-    {}
-
-    AggregationFunctionDeltaSumTimestamp()
-        : IAggregateFunctionDataHelper<
-            AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
-            AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
-        >{}
-    {}
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    String getName() const override { return "deltaSumTimestamp"; }
-
-    static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<ValueType>>(); }
-
-    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        auto value = assert_cast<const ColumnVector<ValueType> &>(*columns[0]).getData()[row_num];
-        auto ts = assert_cast<const ColumnVector<TimestampType> &>(*columns[1]).getData()[row_num];
-
-        if ((this->data(place).last < value) && this->data(place).seen)
-        {
-            this->data(place).sum += (value - this->data(place).last);
-        }
-
-        this->data(place).last = value;
-        this->data(place).last_ts = ts;
-
-        if (!this->data(place).seen)
-        {
-            this->data(place).first = value;
-            this->data(place).seen = true;
-            this->data(place).first_ts = ts;
-        }
-    }
-
-    // before returns true if lhs is before rhs or false if it is not or can't be determined
-    bool ALWAYS_INLINE before (
-        const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * lhs,
-        const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * rhs
-    ) const
-    {
-        if (lhs->last_ts < rhs->first_ts)
-        {
-            return true;
-        }
-        if (lhs->last_ts == rhs->first_ts && (lhs->last_ts < rhs->last_ts || lhs->first_ts < rhs->first_ts))
-        {
-            return true;
-        }
-        return false;
-    }
-
-    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        auto place_data = &this->data(place);
-        auto rhs_data = &this->data(rhs);
-
-        if (!place_data->seen && rhs_data->seen)
-        {
-            place_data->sum = rhs_data->sum;
-            place_data->seen = true;
-            place_data->first = rhs_data->first;
-            place_data->first_ts = rhs_data->first_ts;
-            place_data->last = rhs_data->last;
-            place_data->last_ts = rhs_data->last_ts;
-        }
-        else if (place_data->seen && !rhs_data->seen)
-            return;
-        else if (before(place_data, rhs_data))
-        {
-            // This state came before the rhs state
-
-            if (rhs_data->first > place_data->last)
-                place_data->sum += (rhs_data->first - place_data->last);
-            place_data->sum += rhs_data->sum;
-            place_data->last = rhs_data->last;
-            place_data->last_ts = rhs_data->last_ts;
-        }
-        else if (before(rhs_data, place_data))
-        {
-            // This state came after the rhs state
-
-            if (place_data->first > rhs_data->last)
-                place_data->sum += (place_data->first - rhs_data->last);
-            place_data->sum += rhs_data->sum;
-            place_data->first = rhs_data->first;
-            place_data->first_ts = rhs_data->first_ts;
-        }
-        else
-        {
-            // If none of those conditions matched, it means both states we are merging have all
-            // same timestamps. We have to pick either the smaller or larger value so that the
-            // result is deterministic.
-
-            if (place_data->first < rhs_data->first)
-            {
-                place_data->first = rhs_data->first;
-                place_data->last = rhs_data->last;
-            }
-        }
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        writeBinaryLittleEndian(this->data(place).sum, buf);
-        writeBinaryLittleEndian(this->data(place).first, buf);
-        writeBinaryLittleEndian(this->data(place).first_ts, buf);
-        writeBinaryLittleEndian(this->data(place).last, buf);
-        writeBinaryLittleEndian(this->data(place).last_ts, buf);
-        writeBinaryLittleEndian(this->data(place).seen, buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        readBinaryLittleEndian(this->data(place).sum, buf);
-        readBinaryLittleEndian(this->data(place).first, buf);
-        readBinaryLittleEndian(this->data(place).first_ts, buf);
-        readBinaryLittleEndian(this->data(place).last, buf);
-        readBinaryLittleEndian(this->data(place).last_ts, buf);
-        readBinaryLittleEndian(this->data(place).seen, buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnVector<ValueType> &>(to).getData().push_back(this->data(place).sum);
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.cpp b/src/AggregateFunctions/AggregateFunctionEntropy.cpp
index 373a80c395c..e3b4aecff71 100644
--- a/src/AggregateFunctions/AggregateFunctionEntropy.cpp
+++ b/src/AggregateFunctions/AggregateFunctionEntropy.cpp
@@ -1,8 +1,18 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionEntropy.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
 
+#include <Common/HashTable/HashMap.h>
+#include <Common/NaNUtils.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/UniqVariadicHash.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnVector.h>
+#include <Common/assert_cast.h>
+
+#include <cmath>
+
 
 namespace DB
 {
@@ -16,6 +26,133 @@ namespace ErrorCodes
 namespace
 {
 
+/** Calculates Shannon Entropy, using HashMap and computing empirical distribution function.
+  * Entropy is measured in bits (base-2 logarithm is used).
+  */
+template <typename Value>
+struct EntropyData
+{
+    using Weight = UInt64;
+
+    using HashingMap = HashMapWithStackMemory<Value, Weight, HashCRC32<Value>, 4>;
+
+    /// For the case of pre-hashed values.
+    using TrivialMap = HashMapWithStackMemory<Value, Weight, UInt128TrivialHash, 4>;
+
+    using Map = std::conditional_t<std::is_same_v<UInt128, Value>, TrivialMap, HashingMap>;
+
+    Map map;
+
+    void add(const Value & x)
+    {
+        if (!isNaN(x))
+            ++map[x];
+    }
+
+    void add(const Value & x, const Weight & weight)
+    {
+        if (!isNaN(x))
+            map[x] += weight;
+    }
+
+    void merge(const EntropyData & rhs)
+    {
+        for (const auto & pair : rhs.map)
+            map[pair.getKey()] += pair.getMapped();
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        map.write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        typename Map::Reader reader(buf);
+        while (reader.next())
+        {
+            const auto & pair = reader.get();
+            map[pair.first] = pair.second;
+        }
+    }
+
+    Float64 get() const
+    {
+        UInt64 total_value = 0;
+        for (const auto & pair : map)
+            total_value += pair.getMapped();
+
+        Float64 shannon_entropy = 0;
+        for (const auto & pair : map)
+        {
+            Float64 frequency = Float64(pair.getMapped()) / total_value;
+            shannon_entropy -= frequency * log2(frequency);
+        }
+
+        return shannon_entropy;
+    }
+};
+
+
+template <typename Value>
+class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>
+{
+private:
+    size_t num_args;
+
+public:
+    explicit AggregateFunctionEntropy(const DataTypes & argument_types_)
+        : IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>(argument_types_, {}, createResultType())
+        , num_args(argument_types_.size())
+    {
+    }
+
+    String getName() const override { return "entropy"; }
+
+    static DataTypePtr createResultType()
+    {
+        return std::make_shared<DataTypeNumber<Float64>>();
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        if constexpr (!std::is_same_v<UInt128, Value>)
+        {
+            /// Here we manage only with numerical types
+            const auto & column = assert_cast<const ColumnVector <Value> &>(*columns[0]);
+            this->data(place).add(column.getData()[row_num]);
+        }
+        else
+        {
+            this->data(place).add(UniqVariadicHash<true, false>::apply(num_args, columns, row_num));
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto & column = assert_cast<ColumnVector<Float64> &>(to);
+        column.getData().push_back(this->data(place).get());
+    }
+};
+
+
 AggregateFunctionPtr createAggregateFunctionEntropy(
     const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.h b/src/AggregateFunctions/AggregateFunctionEntropy.h
deleted file mode 100644
index 9321b5c5825..00000000000
--- a/src/AggregateFunctions/AggregateFunctionEntropy.h
+++ /dev/null
@@ -1,145 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/HashMap.h>
-#include <Common/NaNUtils.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/UniqVariadicHash.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Columns/ColumnVector.h>
-#include <Common/assert_cast.h>
-
-#include <cmath>
-
-
-namespace DB
-{
-struct Settings;
-
-/** Calculates Shannon Entropy, using HashMap and computing empirical distribution function.
-  * Entropy is measured in bits (base-2 logarithm is used).
-  */
-template <typename Value>
-struct EntropyData
-{
-    using Weight = UInt64;
-
-    using HashingMap = HashMapWithStackMemory<Value, Weight, HashCRC32<Value>, 4>;
-
-    /// For the case of pre-hashed values.
-    using TrivialMap = HashMapWithStackMemory<Value, Weight, UInt128TrivialHash, 4>;
-
-    using Map = std::conditional_t<std::is_same_v<UInt128, Value>, TrivialMap, HashingMap>;
-
-    Map map;
-
-    void add(const Value & x)
-    {
-        if (!isNaN(x))
-            ++map[x];
-    }
-
-    void add(const Value & x, const Weight & weight)
-    {
-        if (!isNaN(x))
-            map[x] += weight;
-    }
-
-    void merge(const EntropyData & rhs)
-    {
-        for (const auto & pair : rhs.map)
-            map[pair.getKey()] += pair.getMapped();
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        map.write(buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        typename Map::Reader reader(buf);
-        while (reader.next())
-        {
-            const auto & pair = reader.get();
-            map[pair.first] = pair.second;
-        }
-    }
-
-    Float64 get() const
-    {
-        UInt64 total_value = 0;
-        for (const auto & pair : map)
-            total_value += pair.getMapped();
-
-        Float64 shannon_entropy = 0;
-        for (const auto & pair : map)
-        {
-            Float64 frequency = Float64(pair.getMapped()) / total_value;
-            shannon_entropy -= frequency * log2(frequency);
-        }
-
-        return shannon_entropy;
-    }
-};
-
-
-template <typename Value>
-class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>
-{
-private:
-    size_t num_args;
-
-public:
-    explicit AggregateFunctionEntropy(const DataTypes & argument_types_)
-        : IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>(argument_types_, {}, createResultType())
-        , num_args(argument_types_.size())
-    {
-    }
-
-    String getName() const override { return "entropy"; }
-
-    static DataTypePtr createResultType()
-    {
-        return std::make_shared<DataTypeNumber<Float64>>();
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        if constexpr (!std::is_same_v<UInt128, Value>)
-        {
-            /// Here we manage only with numerical types
-            const auto & column = assert_cast<const ColumnVector <Value> &>(*columns[0]);
-            this->data(place).add(column.getData()[row_num]);
-        }
-        else
-        {
-            this->data(place).add(UniqVariadicHash<true, false>::apply(num_args, columns, row_num));
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto & column = assert_cast<ColumnVector<Float64> &>(to);
-        column.getData().push_back(this->data(place).get());
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.cpp b/src/AggregateFunctions/AggregateFunctionHistogram.cpp
index b430e433bef..eccef4f3f3c 100644
--- a/src/AggregateFunctions/AggregateFunctionHistogram.cpp
+++ b/src/AggregateFunctions/AggregateFunctionHistogram.cpp
@@ -1,9 +1,31 @@
-#include <AggregateFunctions/AggregateFunctionHistogram.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
 #include <Common/FieldVisitorConvertToNumber.h>
 
+#include <Common/NaNUtils.h>
+
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnArray.h>
+#include <Common/assert_cast.h>
+
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+
+#include <IO/WriteBuffer.h>
+#include <IO/ReadBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/VarInt.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include <math.h>
+#include <queue>
+#include <stddef.h>
+
 
 namespace DB
 {
@@ -16,12 +38,357 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
     extern const int UNSUPPORTED_PARAMETER;
     extern const int PARAMETER_OUT_OF_BOUND;
+    extern const int TOO_LARGE_ARRAY_SIZE;
+    extern const int INCORRECT_DATA;
 }
 
 
 namespace
 {
 
+/** distance compression algorithm implementation
+  * http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf
+  */
+class AggregateFunctionHistogramData
+{
+public:
+    using Mean = Float64;
+    using Weight = Float64;
+
+    constexpr static size_t bins_count_limit = 250;
+
+private:
+    struct WeightedValue
+    {
+        Mean mean;
+        Weight weight;
+
+        WeightedValue operator+(const WeightedValue & other) const
+        {
+            return {mean + other.weight * (other.mean - mean) / (other.weight + weight), other.weight + weight};
+        }
+    };
+
+    // quantity of stored weighted-values
+    UInt32 size;
+
+    // calculated lower and upper bounds of seen points
+    Mean lower_bound;
+    Mean upper_bound;
+
+    // Weighted values representation of histogram.
+    WeightedValue points[0];
+
+    void sort()
+    {
+        ::sort(points, points + size,
+            [](const WeightedValue & first, const WeightedValue & second)
+            {
+                return first.mean < second.mean;
+            });
+    }
+
+    template <typename T>
+    struct PriorityQueueStorage
+    {
+        size_t size = 0;
+        T * data_ptr;
+
+        explicit PriorityQueueStorage(T * value)
+            : data_ptr(value)
+        {
+        }
+
+        void push_back(T val) /// NOLINT
+        {
+            data_ptr[size] = std::move(val);
+            ++size;
+        }
+
+        void pop_back() { --size; } /// NOLINT
+        T * begin() { return data_ptr; }
+        T * end() const { return data_ptr + size; }
+        bool empty() const { return size == 0; }
+        T & front() { return *data_ptr; }
+        const T & front() const { return *data_ptr; }
+
+        using value_type = T;
+        using reference = T&;
+        using const_reference = const T&;
+        using size_type = size_t;
+    };
+
+    /**
+     * Repeatedly fuse most close values until max_bins bins left
+     */
+    void compress(UInt32 max_bins)
+    {
+        sort();
+        auto new_size = size;
+        if (size <= max_bins)
+            return;
+
+        // Maintain doubly-linked list of "active" points
+        // and store neighbour pairs in priority queue by distance
+        UInt32 previous[size + 1];
+        UInt32 next[size + 1];
+        bool active[size + 1];
+        std::fill(active, active + size, true);
+        active[size] = false;
+
+        auto delete_node = [&](UInt32 i)
+        {
+            previous[next[i]] = previous[i];
+            next[previous[i]] = next[i];
+            active[i] = false;
+        };
+
+        for (size_t i = 0; i <= size; ++i)
+        {
+            previous[i] = static_cast<UInt32>(i - 1);
+            next[i] = static_cast<UInt32>(i + 1);
+        }
+
+        next[size] = 0;
+        previous[0] = size;
+
+        using QueueItem = std::pair<Mean, UInt32>;
+
+        QueueItem storage[2 * size - max_bins];
+
+        std::priority_queue<
+            QueueItem,
+            PriorityQueueStorage<QueueItem>,
+            std::greater<QueueItem>>
+                queue{std::greater<QueueItem>(),
+                        PriorityQueueStorage<QueueItem>(storage)};
+
+        auto quality = [&](UInt32 i) { return points[next[i]].mean - points[i].mean; };
+
+        for (size_t i = 0; i + 1 < size; ++i)
+            queue.push({quality(static_cast<UInt32>(i)), i});
+
+        while (new_size > max_bins && !queue.empty())
+        {
+            auto min_item = queue.top();
+            queue.pop();
+            auto left = min_item.second;
+            auto right = next[left];
+
+            if (!active[left] || !active[right] || quality(left) > min_item.first)
+                continue;
+
+            points[left] = points[left] + points[right];
+
+            delete_node(right);
+            if (active[next[left]])
+                queue.push({quality(left), left});
+            if (active[previous[left]])
+                queue.push({quality(previous[left]), previous[left]});
+
+            --new_size;
+        }
+
+        size_t left = 0;
+        for (size_t right = 0; right < size; ++right)
+        {
+            if (active[right])
+            {
+                points[left] = points[right];
+                ++left;
+            }
+        }
+        size = new_size;
+    }
+
+    /***
+     * Delete too close points from histogram.
+     * Assumes that points are sorted.
+     */
+    void unique()
+    {
+        if (size == 0)
+            return;
+
+        size_t left = 0;
+
+        for (auto right = left + 1; right < size; ++right)
+        {
+            // Fuse points if their text representations differ only in last digit
+            auto min_diff = 10 * (points[left].mean + points[right].mean) * std::numeric_limits<Mean>::epsilon();
+            if (points[left].mean + std::fabs(min_diff) >= points[right].mean)
+            {
+                points[left] = points[left] + points[right];
+            }
+            else
+            {
+                ++left;
+                points[left] = points[right];
+            }
+        }
+        size = static_cast<UInt32>(left + 1);
+    }
+
+public:
+    AggregateFunctionHistogramData()
+        : size(0)
+        , lower_bound(std::numeric_limits<Mean>::max())
+        , upper_bound(std::numeric_limits<Mean>::lowest())
+    {
+        static_assert(offsetof(AggregateFunctionHistogramData, points) == sizeof(AggregateFunctionHistogramData), "points should be last member");
+    }
+
+    static size_t structSize(size_t max_bins)
+    {
+        return sizeof(AggregateFunctionHistogramData) + max_bins * 2 * sizeof(WeightedValue);
+    }
+
+    void insertResultInto(ColumnVector<Mean> & to_lower, ColumnVector<Mean> & to_upper, ColumnVector<Weight> & to_weights, UInt32 max_bins)
+    {
+        compress(max_bins);
+        unique();
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            to_lower.insertValue((i == 0) ? lower_bound : (points[i].mean + points[i - 1].mean) / 2);
+            to_upper.insertValue((i + 1 == size) ? upper_bound : (points[i].mean + points[i + 1].mean) / 2);
+
+            // linear density approximation
+            Weight lower_weight = (i == 0) ? points[i].weight : ((points[i - 1].weight) + points[i].weight * 3) / 4;
+            Weight upper_weight = (i + 1 == size) ? points[i].weight : (points[i + 1].weight + points[i].weight * 3) / 4;
+            to_weights.insertValue((lower_weight + upper_weight) / 2);
+        }
+    }
+
+    void add(Mean value, Weight weight, UInt32 max_bins)
+    {
+        // nans break sort and compression
+        // infs don't fit in bins partition method
+        if (!isFinite(value))
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid value (inf or nan) for aggregation by 'histogram' function");
+
+        points[size] = {value, weight};
+        ++size;
+        lower_bound = std::min(lower_bound, value);
+        upper_bound = std::max(upper_bound, value);
+
+        if (size >= max_bins * 2)
+            compress(max_bins);
+    }
+
+    void merge(const AggregateFunctionHistogramData & other, UInt32 max_bins)
+    {
+        lower_bound = std::min(lower_bound, other.lower_bound);
+        upper_bound = std::max(upper_bound, other.upper_bound);
+        for (size_t i = 0; i < other.size; ++i)
+            add(other.points[i].mean, other.points[i].weight, max_bins);
+    }
+
+    void write(WriteBuffer & buf) const
+    {
+        writeBinary(lower_bound, buf);
+        writeBinary(upper_bound, buf);
+
+        writeVarUInt(size, buf);
+        buf.write(reinterpret_cast<const char *>(points), size * sizeof(WeightedValue));
+    }
+
+    void read(ReadBuffer & buf, UInt32 max_bins)
+    {
+        readBinary(lower_bound, buf);
+        readBinary(upper_bound, buf);
+
+        readVarUInt(size, buf);
+        if (size > max_bins * 2)
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too many bins");
+        static constexpr size_t max_size = 1_GiB;
+        if (size > max_size)
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+                            "Too large array size in histogram (maximum: {})", max_size);
+
+        buf.readStrict(reinterpret_cast<char *>(points), size * sizeof(WeightedValue));
+    }
+};
+
+template <typename T>
+class AggregateFunctionHistogram final: public IAggregateFunctionDataHelper<AggregateFunctionHistogramData, AggregateFunctionHistogram<T>>
+{
+private:
+    using Data = AggregateFunctionHistogramData;
+
+    const UInt32 max_bins;
+
+public:
+    AggregateFunctionHistogram(UInt32 max_bins_, const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<AggregateFunctionHistogramData, AggregateFunctionHistogram<T>>(arguments, params, createResultType())
+        , max_bins(max_bins_)
+    {
+    }
+
+    size_t sizeOfData() const override
+    {
+        return Data::structSize(max_bins);
+    }
+    static DataTypePtr createResultType()
+    {
+        DataTypes types;
+        auto mean = std::make_shared<DataTypeNumber<Data::Mean>>();
+        auto weight = std::make_shared<DataTypeNumber<Data::Weight>>();
+
+        // lower bound
+        types.emplace_back(mean);
+        // upper bound
+        types.emplace_back(mean);
+        // weight
+        types.emplace_back(weight);
+
+        auto tuple = std::make_shared<DataTypeTuple>(types);
+        return std::make_shared<DataTypeArray>(tuple);
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        auto val = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
+        this->data(place).add(static_cast<Data::Mean>(val), 1, max_bins);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs), max_bins);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).read(buf, max_bins);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto & data = this->data(place);
+
+        auto & to_array = assert_cast<ColumnArray &>(to);
+        ColumnArray::Offsets & offsets_to = to_array.getOffsets();
+        auto & to_tuple = assert_cast<ColumnTuple &>(to_array.getData());
+
+        auto & to_lower = assert_cast<ColumnVector<Data::Mean> &>(to_tuple.getColumn(0));
+        auto & to_upper = assert_cast<ColumnVector<Data::Mean> &>(to_tuple.getColumn(1));
+        auto & to_weights = assert_cast<ColumnVector<Data::Weight> &>(to_tuple.getColumn(2));
+        data.insertResultInto(to_lower, to_upper, to_weights, max_bins);
+
+        offsets_to.push_back(to_tuple.size());
+    }
+
+    String getName() const override { return "histogram"; }
+};
+
+
 AggregateFunctionPtr createAggregateFunctionHistogram(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
 {
     if (params.size() != 1)
diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h
deleted file mode 100644
index 967bc9bb517..00000000000
--- a/src/AggregateFunctions/AggregateFunctionHistogram.h
+++ /dev/null
@@ -1,382 +0,0 @@
-#pragma once
-
-#include <base/sort.h>
-
-#include <Common/NaNUtils.h>
-
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnTuple.h>
-#include <Columns/ColumnArray.h>
-#include <Common/assert_cast.h>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeTuple.h>
-
-#include <IO/WriteBuffer.h>
-#include <IO/ReadBuffer.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-#include <IO/VarInt.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-#include <math.h>
-#include <queue>
-#include <stddef.h>
-
-namespace DB
-{
-struct Settings;
-class Arena;
-
-namespace ErrorCodes
-{
-    extern const int TOO_LARGE_ARRAY_SIZE;
-    extern const int INCORRECT_DATA;
-}
-
-/**
- * distance compression algorithm implementation
- * http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf
- */
-class AggregateFunctionHistogramData
-{
-public:
-    using Mean = Float64;
-    using Weight = Float64;
-
-    constexpr static size_t bins_count_limit = 250;
-
-private:
-    struct WeightedValue
-    {
-        Mean mean;
-        Weight weight;
-
-        WeightedValue operator+(const WeightedValue & other) const
-        {
-            return {mean + other.weight * (other.mean - mean) / (other.weight + weight), other.weight + weight};
-        }
-    };
-
-    // quantity of stored weighted-values
-    UInt32 size;
-
-    // calculated lower and upper bounds of seen points
-    Mean lower_bound;
-    Mean upper_bound;
-
-    // Weighted values representation of histogram.
-    WeightedValue points[0];
-
-    void sort()
-    {
-        ::sort(points, points + size,
-            [](const WeightedValue & first, const WeightedValue & second)
-            {
-                return first.mean < second.mean;
-            });
-    }
-
-    template <typename T>
-    struct PriorityQueueStorage
-    {
-        size_t size = 0;
-        T * data_ptr;
-
-        explicit PriorityQueueStorage(T * value)
-            : data_ptr(value)
-        {
-        }
-
-        void push_back(T val) /// NOLINT
-        {
-            data_ptr[size] = std::move(val);
-            ++size;
-        }
-
-        void pop_back() { --size; } /// NOLINT
-        T * begin() { return data_ptr; }
-        T * end() const { return data_ptr + size; }
-        bool empty() const { return size == 0; }
-        T & front() { return *data_ptr; }
-        const T & front() const { return *data_ptr; }
-
-        using value_type = T;
-        using reference = T&;
-        using const_reference = const T&;
-        using size_type = size_t;
-    };
-
-    /**
-     * Repeatedly fuse most close values until max_bins bins left
-     */
-    void compress(UInt32 max_bins)
-    {
-        sort();
-        auto new_size = size;
-        if (size <= max_bins)
-            return;
-
-        // Maintain doubly-linked list of "active" points
-        // and store neighbour pairs in priority queue by distance
-        UInt32 previous[size + 1];
-        UInt32 next[size + 1];
-        bool active[size + 1];
-        std::fill(active, active + size, true);
-        active[size] = false;
-
-        auto delete_node = [&](UInt32 i)
-        {
-            previous[next[i]] = previous[i];
-            next[previous[i]] = next[i];
-            active[i] = false;
-        };
-
-        for (size_t i = 0; i <= size; ++i)
-        {
-            previous[i] = static_cast<UInt32>(i - 1);
-            next[i] = static_cast<UInt32>(i + 1);
-        }
-
-        next[size] = 0;
-        previous[0] = size;
-
-        using QueueItem = std::pair<Mean, UInt32>;
-
-        QueueItem storage[2 * size - max_bins];
-
-        std::priority_queue<
-            QueueItem,
-            PriorityQueueStorage<QueueItem>,
-            std::greater<QueueItem>>
-                queue{std::greater<QueueItem>(),
-                        PriorityQueueStorage<QueueItem>(storage)};
-
-        auto quality = [&](UInt32 i) { return points[next[i]].mean - points[i].mean; };
-
-        for (size_t i = 0; i + 1 < size; ++i)
-            queue.push({quality(static_cast<UInt32>(i)), i});
-
-        while (new_size > max_bins && !queue.empty())
-        {
-            auto min_item = queue.top();
-            queue.pop();
-            auto left = min_item.second;
-            auto right = next[left];
-
-            if (!active[left] || !active[right] || quality(left) > min_item.first)
-                continue;
-
-            points[left] = points[left] + points[right];
-
-            delete_node(right);
-            if (active[next[left]])
-                queue.push({quality(left), left});
-            if (active[previous[left]])
-                queue.push({quality(previous[left]), previous[left]});
-
-            --new_size;
-        }
-
-        size_t left = 0;
-        for (size_t right = 0; right < size; ++right)
-        {
-            if (active[right])
-            {
-                points[left] = points[right];
-                ++left;
-            }
-        }
-        size = new_size;
-    }
-
-    /***
-     * Delete too close points from histogram.
-     * Assumes that points are sorted.
-     */
-    void unique()
-    {
-        if (size == 0)
-            return;
-
-        size_t left = 0;
-
-        for (auto right = left + 1; right < size; ++right)
-        {
-            // Fuse points if their text representations differ only in last digit
-            auto min_diff = 10 * (points[left].mean + points[right].mean) * std::numeric_limits<Mean>::epsilon();
-            if (points[left].mean + std::fabs(min_diff) >= points[right].mean)
-            {
-                points[left] = points[left] + points[right];
-            }
-            else
-            {
-                ++left;
-                points[left] = points[right];
-            }
-        }
-        size = static_cast<UInt32>(left + 1);
-    }
-
-public:
-    AggregateFunctionHistogramData()
-        : size(0)
-        , lower_bound(std::numeric_limits<Mean>::max())
-        , upper_bound(std::numeric_limits<Mean>::lowest())
-    {
-        static_assert(offsetof(AggregateFunctionHistogramData, points) == sizeof(AggregateFunctionHistogramData), "points should be last member");
-    }
-
-    static size_t structSize(size_t max_bins)
-    {
-        return sizeof(AggregateFunctionHistogramData) + max_bins * 2 * sizeof(WeightedValue);
-    }
-
-    void insertResultInto(ColumnVector<Mean> & to_lower, ColumnVector<Mean> & to_upper, ColumnVector<Weight> & to_weights, UInt32 max_bins)
-    {
-        compress(max_bins);
-        unique();
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            to_lower.insertValue((i == 0) ? lower_bound : (points[i].mean + points[i - 1].mean) / 2);
-            to_upper.insertValue((i + 1 == size) ? upper_bound : (points[i].mean + points[i + 1].mean) / 2);
-
-            // linear density approximation
-            Weight lower_weight = (i == 0) ? points[i].weight : ((points[i - 1].weight) + points[i].weight * 3) / 4;
-            Weight upper_weight = (i + 1 == size) ? points[i].weight : (points[i + 1].weight + points[i].weight * 3) / 4;
-            to_weights.insertValue((lower_weight + upper_weight) / 2);
-        }
-    }
-
-    void add(Mean value, Weight weight, UInt32 max_bins)
-    {
-        // nans break sort and compression
-        // infs don't fit in bins partition method
-        if (!isFinite(value))
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid value (inf or nan) for aggregation by 'histogram' function");
-
-        points[size] = {value, weight};
-        ++size;
-        lower_bound = std::min(lower_bound, value);
-        upper_bound = std::max(upper_bound, value);
-
-        if (size >= max_bins * 2)
-            compress(max_bins);
-    }
-
-    void merge(const AggregateFunctionHistogramData & other, UInt32 max_bins)
-    {
-        lower_bound = std::min(lower_bound, other.lower_bound);
-        upper_bound = std::max(upper_bound, other.upper_bound);
-        for (size_t i = 0; i < other.size; ++i)
-            add(other.points[i].mean, other.points[i].weight, max_bins);
-    }
-
-    void write(WriteBuffer & buf) const
-    {
-        writeBinary(lower_bound, buf);
-        writeBinary(upper_bound, buf);
-
-        writeVarUInt(size, buf);
-        buf.write(reinterpret_cast<const char *>(points), size * sizeof(WeightedValue));
-    }
-
-    void read(ReadBuffer & buf, UInt32 max_bins)
-    {
-        readBinary(lower_bound, buf);
-        readBinary(upper_bound, buf);
-
-        readVarUInt(size, buf);
-        if (size > max_bins * 2)
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too many bins");
-        static constexpr size_t max_size = 1_GiB;
-        if (size > max_size)
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
-                            "Too large array size in histogram (maximum: {})", max_size);
-
-        buf.readStrict(reinterpret_cast<char *>(points), size * sizeof(WeightedValue));
-    }
-};
-
-template <typename T>
-class AggregateFunctionHistogram final: public IAggregateFunctionDataHelper<AggregateFunctionHistogramData, AggregateFunctionHistogram<T>>
-{
-private:
-    using Data = AggregateFunctionHistogramData;
-
-    const UInt32 max_bins;
-
-public:
-    AggregateFunctionHistogram(UInt32 max_bins_, const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<AggregateFunctionHistogramData, AggregateFunctionHistogram<T>>(arguments, params, createResultType())
-        , max_bins(max_bins_)
-    {
-    }
-
-    size_t sizeOfData() const override
-    {
-        return Data::structSize(max_bins);
-    }
-    static DataTypePtr createResultType()
-    {
-        DataTypes types;
-        auto mean = std::make_shared<DataTypeNumber<Data::Mean>>();
-        auto weight = std::make_shared<DataTypeNumber<Data::Weight>>();
-
-        // lower bound
-        types.emplace_back(mean);
-        // upper bound
-        types.emplace_back(mean);
-        // weight
-        types.emplace_back(weight);
-
-        auto tuple = std::make_shared<DataTypeTuple>(types);
-        return std::make_shared<DataTypeArray>(tuple);
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        auto val = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
-        this->data(place).add(static_cast<Data::Mean>(val), 1, max_bins);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs), max_bins);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).read(buf, max_bins);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto & data = this->data(place);
-
-        auto & to_array = assert_cast<ColumnArray &>(to);
-        ColumnArray::Offsets & offsets_to = to_array.getOffsets();
-        auto & to_tuple = assert_cast<ColumnTuple &>(to_array.getData());
-
-        auto & to_lower = assert_cast<ColumnVector<Data::Mean> &>(to_tuple.getColumn(0));
-        auto & to_upper = assert_cast<ColumnVector<Data::Mean> &>(to_tuple.getColumn(1));
-        auto & to_weights = assert_cast<ColumnVector<Data::Weight> &>(to_tuple.getColumn(2));
-        data.insertResultInto(to_lower, to_upper, to_weights, max_bins);
-
-        offsets_to.push_back(to_tuple.size());
-    }
-
-    String getName() const override { return "histogram"; }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.cpp b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.cpp
index 5e01fb73299..eacd0596757 100644
--- a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.cpp
@@ -1,57 +1,272 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionIntervalLengthSum.h>
-#include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
 #include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDateTime.h>
 
-#include <base/range.h>
+#include <unordered_set>
+
+#include <AggregateFunctions/Combinators/AggregateFunctionNull.h>
+
+#include <Columns/ColumnsNumber.h>
+
+#include <Common/assert_cast.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
 
 
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int TOO_LARGE_ARRAY_SIZE;
 }
 
 struct Settings;
 
 namespace
 {
-    template <template <typename> class Data>
-    AggregateFunctionPtr
-    createAggregateFunctionIntervalLengthSum(const std::string & name, const DataTypes & arguments, const Array &, const Settings *)
+
+/** Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
+  * Returns UInt64 for integral types (UInt/Int*, Date/DateTime) and returns Float64 for Float*.
+  *
+  * Implementation simply stores intervals sorted by beginning and sums lengths at final.
+  */
+template <typename T>
+struct AggregateFunctionIntervalLengthSumData
+{
+    constexpr static size_t MAX_ARRAY_SIZE = 0xFFFFFF;
+
+    using Segment = std::pair<T, T>;
+    using Segments = PODArrayWithStackMemory<Segment, 64>;
+
+    bool sorted = false;
+
+    Segments segments;
+
+    void add(T begin, T end)
     {
-        if (arguments.size() != 2)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Aggregate function {} requires two timestamps argument.", name);
+        /// Reversed intervals are counted by absolute value of their length.
+        if (unlikely(end < begin))
+            std::swap(begin, end);
+        else if (unlikely(begin == end))
+            return;
 
-        auto args = {arguments[0].get(), arguments[1].get()};
+        if (sorted && !segments.empty())
+            sorted = segments.back().first <= begin;
+        segments.emplace_back(begin, end);
+    }
 
-        if (WhichDataType{args.begin()[0]}.idx != WhichDataType{args.begin()[1]}.idx)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Illegal types {} and {} of arguments "
-                            "of aggregate function {}, both arguments should have same data type",
-                            args.begin()[0]->getName(), args.begin()[1]->getName(), name);
+    void merge(const AggregateFunctionIntervalLengthSumData & other)
+    {
+        if (other.segments.empty())
+            return;
 
-        for (const auto & arg : args)
+        const auto size = segments.size();
+
+        segments.insert(std::begin(other.segments), std::end(other.segments));
+
+        /// either sort whole container or do so partially merging ranges afterwards
+        if (!sorted && !other.sorted)
         {
-            if (!isNativeNumber(arg) && !isDate(arg) && !isDateTime(arg))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                                "Illegal type {} of argument of aggregate function {}, must "
-                                "be native integral type, Date/DateTime or Float", arg->getName(), name);
+            ::sort(std::begin(segments), std::end(segments));
+        }
+        else
+        {
+            const auto begin = std::begin(segments);
+            const auto middle = std::next(begin, size);
+            const auto end = std::end(segments);
+
+            if (!sorted)
+                ::sort(begin, middle);
+
+            if (!other.sorted)
+                ::sort(middle, end);
+
+            std::inplace_merge(begin, middle, end);
         }
 
-        AggregateFunctionPtr res(createWithBasicNumberOrDateOrDateTime<AggregateFunctionIntervalLengthSum, Data>(*arguments[0], arguments));
+        sorted = true;
+    }
 
-        if (res)
-            return res;
+    void sort()
+    {
+        if (sorted)
+            return;
 
+        ::sort(std::begin(segments), std::end(segments));
+        sorted = true;
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeBinary(sorted, buf);
+        writeBinary(segments.size(), buf);
+
+        for (const auto & time_gap : segments)
+        {
+            writeBinary(time_gap.first, buf);
+            writeBinary(time_gap.second, buf);
+        }
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readBinary(sorted, buf);
+
+        size_t size;
+        readBinary(size, buf);
+
+        if (unlikely(size > MAX_ARRAY_SIZE))
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {})", MAX_ARRAY_SIZE);
+
+        segments.clear();
+        segments.reserve(size);
+
+        Segment segment;
+        for (size_t i = 0; i < size; ++i)
+        {
+            readBinary(segment.first, buf);
+            readBinary(segment.second, buf);
+            segments.emplace_back(segment);
+        }
+    }
+};
+
+template <typename T, typename Data>
+class AggregateFunctionIntervalLengthSum final : public IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>
+{
+private:
+    static auto NO_SANITIZE_UNDEFINED length(typename Data::Segment segment)
+    {
+        return segment.second - segment.first;
+    }
+
+    template <typename TResult>
+    TResult getIntervalLengthSum(Data & data) const
+    {
+        if (data.segments.empty())
+            return 0;
+
+        data.sort();
+
+        TResult res = 0;
+
+        typename Data::Segment curr_segment = data.segments[0];
+
+        for (size_t i = 1, size = data.segments.size(); i < size; ++i)
+        {
+            const typename Data::Segment & next_segment = data.segments[i];
+
+            /// Check if current interval intersects with next one then add length, otherwise advance interval end.
+            if (curr_segment.second < next_segment.first)
+            {
+                res += length(curr_segment);
+                curr_segment = next_segment;
+            }
+            else if (next_segment.second > curr_segment.second)
+            {
+                curr_segment.second = next_segment.second;
+            }
+        }
+        res += length(curr_segment);
+
+        return res;
+    }
+
+public:
+    String getName() const override { return "intervalLengthSum"; }
+
+    explicit AggregateFunctionIntervalLengthSum(const DataTypes & arguments)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>(arguments, {}, createResultType())
+    {
+    }
+
+    static DataTypePtr createResultType()
+    {
+        if constexpr (std::is_floating_point_v<T>)
+            return std::make_shared<DataTypeFloat64>();
+        return std::make_shared<DataTypeUInt64>();
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    AggregateFunctionPtr getOwnNullAdapter(
+        const AggregateFunctionPtr & nested_function,
+        const DataTypes & arguments,
+        const Array & params,
+        const AggregateFunctionProperties & /*properties*/) const override
+    {
+        return std::make_shared<AggregateFunctionNullVariadic<false, false>>(nested_function, arguments, params);
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    {
+        auto begin = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
+        auto end = assert_cast<const ColumnVector<T> *>(columns[1])->getData()[row_num];
+        this->data(place).add(begin, end);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        if constexpr (std::is_floating_point_v<T>)
+            assert_cast<ColumnFloat64 &>(to).getData().push_back(getIntervalLengthSum<Float64>(this->data(place)));
+        else
+            assert_cast<ColumnUInt64 &>(to).getData().push_back(getIntervalLengthSum<UInt64>(this->data(place)));
+    }
+};
+
+
+template <template <typename> class Data>
+AggregateFunctionPtr
+createAggregateFunctionIntervalLengthSum(const std::string & name, const DataTypes & arguments, const Array &, const Settings *)
+{
+    if (arguments.size() != 2)
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+            "Aggregate function {} requires two timestamps argument.", name);
+
+    auto args = {arguments[0].get(), arguments[1].get()};
+
+    if (WhichDataType{args.begin()[0]}.idx != WhichDataType{args.begin()[1]}.idx)
         throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Illegal type {} of argument of aggregate function {}, must "
-                        "be native integral type, Date/DateTime or Float", arguments.front().get()->getName(), name);
+                        "Illegal types {} and {} of arguments "
+                        "of aggregate function {}, both arguments should have same data type",
+                        args.begin()[0]->getName(), args.begin()[1]->getName(), name);
+
+    for (const auto & arg : args)
+    {
+        if (!isNativeNumber(arg) && !isDate(arg) && !isDateTime(arg))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                            "Illegal type {} of argument of aggregate function {}, must "
+                            "be native integral type, Date/DateTime or Float", arg->getName(), name);
+    }
+
+    AggregateFunctionPtr res(createWithBasicNumberOrDateOrDateTime<AggregateFunctionIntervalLengthSum, Data>(*arguments[0], arguments));
+
+    if (res)
+        return res;
+
+    throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Illegal type {} of argument of aggregate function {}, must "
+                    "be native integral type, Date/DateTime or Float", arguments.front().get()->getName(), name);
 }
 
 }
diff --git a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h
deleted file mode 100644
index e16645c7a1e..00000000000
--- a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h
+++ /dev/null
@@ -1,232 +0,0 @@
-#pragma once
-
-#include <unordered_set>
-
-#include <AggregateFunctions/Combinators/AggregateFunctionNull.h>
-
-#include <Columns/ColumnsNumber.h>
-
-#include <Common/assert_cast.h>
-#include <base/arithmeticOverflow.h>
-#include <base/sort.h>
-
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypesNumber.h>
-
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int TOO_LARGE_ARRAY_SIZE;
-}
-
-/** Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
-  * Returns UInt64 for integral types (UInt/Int*, Date/DateTime) and returns Float64 for Float*.
-  *
-  * Implementation simply stores intervals sorted by beginning and sums lengths at final.
-  */
-template <typename T>
-struct AggregateFunctionIntervalLengthSumData
-{
-    constexpr static size_t MAX_ARRAY_SIZE = 0xFFFFFF;
-
-    using Segment = std::pair<T, T>;
-    using Segments = PODArrayWithStackMemory<Segment, 64>;
-
-    bool sorted = false;
-
-    Segments segments;
-
-    void add(T begin, T end)
-    {
-        /// Reversed intervals are counted by absolute value of their length.
-        if (unlikely(end < begin))
-            std::swap(begin, end);
-        else if (unlikely(begin == end))
-            return;
-
-        if (sorted && !segments.empty())
-            sorted = segments.back().first <= begin;
-        segments.emplace_back(begin, end);
-    }
-
-    void merge(const AggregateFunctionIntervalLengthSumData & other)
-    {
-        if (other.segments.empty())
-            return;
-
-        const auto size = segments.size();
-
-        segments.insert(std::begin(other.segments), std::end(other.segments));
-
-        /// either sort whole container or do so partially merging ranges afterwards
-        if (!sorted && !other.sorted)
-        {
-            ::sort(std::begin(segments), std::end(segments));
-        }
-        else
-        {
-            const auto begin = std::begin(segments);
-            const auto middle = std::next(begin, size);
-            const auto end = std::end(segments);
-
-            if (!sorted)
-                ::sort(begin, middle);
-
-            if (!other.sorted)
-                ::sort(middle, end);
-
-            std::inplace_merge(begin, middle, end);
-        }
-
-        sorted = true;
-    }
-
-    void sort()
-    {
-        if (sorted)
-            return;
-
-        ::sort(std::begin(segments), std::end(segments));
-        sorted = true;
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        writeBinary(sorted, buf);
-        writeBinary(segments.size(), buf);
-
-        for (const auto & time_gap : segments)
-        {
-            writeBinary(time_gap.first, buf);
-            writeBinary(time_gap.second, buf);
-        }
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        readBinary(sorted, buf);
-
-        size_t size;
-        readBinary(size, buf);
-
-        if (unlikely(size > MAX_ARRAY_SIZE))
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {})", MAX_ARRAY_SIZE);
-
-        segments.clear();
-        segments.reserve(size);
-
-        Segment segment;
-        for (size_t i = 0; i < size; ++i)
-        {
-            readBinary(segment.first, buf);
-            readBinary(segment.second, buf);
-            segments.emplace_back(segment);
-        }
-    }
-};
-
-template <typename T, typename Data>
-class AggregateFunctionIntervalLengthSum final : public IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>
-{
-private:
-    static auto NO_SANITIZE_UNDEFINED length(typename Data::Segment segment)
-    {
-        return segment.second - segment.first;
-    }
-
-    template <typename TResult>
-    TResult getIntervalLengthSum(Data & data) const
-    {
-        if (data.segments.empty())
-            return 0;
-
-        data.sort();
-
-        TResult res = 0;
-
-        typename Data::Segment curr_segment = data.segments[0];
-
-        for (size_t i = 1, size = data.segments.size(); i < size; ++i)
-        {
-            const typename Data::Segment & next_segment = data.segments[i];
-
-            /// Check if current interval intersects with next one then add length, otherwise advance interval end.
-            if (curr_segment.second < next_segment.first)
-            {
-                res += length(curr_segment);
-                curr_segment = next_segment;
-            }
-            else if (next_segment.second > curr_segment.second)
-            {
-                curr_segment.second = next_segment.second;
-            }
-        }
-        res += length(curr_segment);
-
-        return res;
-    }
-
-public:
-    String getName() const override { return "intervalLengthSum"; }
-
-    explicit AggregateFunctionIntervalLengthSum(const DataTypes & arguments)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>(arguments, {}, createResultType())
-    {
-    }
-
-    static DataTypePtr createResultType()
-    {
-        if constexpr (std::is_floating_point_v<T>)
-            return std::make_shared<DataTypeFloat64>();
-        return std::make_shared<DataTypeUInt64>();
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    AggregateFunctionPtr getOwnNullAdapter(
-        const AggregateFunctionPtr & nested_function,
-        const DataTypes & arguments,
-        const Array & params,
-        const AggregateFunctionProperties & /*properties*/) const override
-    {
-        return std::make_shared<AggregateFunctionNullVariadic<false, false>>(nested_function, arguments, params);
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
-    {
-        auto begin = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
-        auto end = assert_cast<const ColumnVector<T> *>(columns[1])->getData()[row_num];
-        this->data(place).add(begin, end);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        if constexpr (std::is_floating_point_v<T>)
-            assert_cast<ColumnFloat64 &>(to).getData().push_back(getIntervalLengthSum<Float64>(this->data(place)));
-        else
-            assert_cast<ColumnUInt64 &>(to).getData().push_back(getIntervalLengthSum<UInt64>(this->data(place)));
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp
index 9bd06c3ef4d..882150325be 100644
--- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp
+++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp
@@ -1,19 +1,339 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h>
 #include <AggregateFunctions/FactoryHelpers.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/StatCommon.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+#include <Common/Exception.h>
+#include <Common/assert_cast.h>
+#include <Common/PODArray_fwd.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <IO/ReadHelpers.h>
+
 
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
 }
 
 namespace DB
 {
+
 struct Settings;
 
 namespace
 {
 
+struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
+{
+    enum class Alternative
+    {
+        TwoSided,
+        Less,
+        Greater
+    };
+
+    std::pair<Float64, Float64> getResult(Alternative alternative, String method)
+    {
+        ::sort(x.begin(), x.end());
+        ::sort(y.begin(), y.end());
+
+        Float64 max_s = std::numeric_limits<Float64>::min();
+        Float64 min_s = std::numeric_limits<Float64>::max();
+        Float64 now_s = 0;
+        UInt64 pos_x = 0;
+        UInt64 pos_y = 0;
+        UInt64 pos_tmp;
+        UInt64 n1 = x.size();
+        UInt64 n2 = y.size();
+
+        const Float64 n1_d = 1. / n1;
+        const Float64 n2_d = 1. / n2;
+        const Float64 tol = 1e-7;
+
+        // reference: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
+        while (pos_x < x.size() && pos_y < y.size())
+        {
+            if (likely(fabs(x[pos_x] - y[pos_y]) >= tol))
+            {
+                if (x[pos_x] < y[pos_y])
+                {
+                    now_s += n1_d;
+                    ++pos_x;
+                }
+                else
+                {
+                    now_s -= n2_d;
+                    ++pos_y;
+                }
+            }
+            else
+            {
+                pos_tmp = pos_x + 1;
+                while (pos_tmp < x.size() && unlikely(fabs(x[pos_tmp] - x[pos_x]) <= tol))
+                    pos_tmp++;
+                now_s += n1_d * (pos_tmp - pos_x);
+                pos_x = pos_tmp;
+                pos_tmp = pos_y + 1;
+                while (pos_tmp < y.size() && unlikely(fabs(y[pos_tmp] - y[pos_y]) <= tol))
+                    pos_tmp++;
+                now_s -= n2_d * (pos_tmp - pos_y);
+                pos_y = pos_tmp;
+            }
+            max_s = std::max(max_s, now_s);
+            min_s = std::min(min_s, now_s);
+        }
+        now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y);
+        min_s = std::min(min_s, now_s);
+        max_s = std::max(max_s, now_s);
+
+        Float64 d = 0;
+        if (alternative == Alternative::TwoSided)
+            d = std::max(std::abs(max_s), std::abs(min_s));
+        else if (alternative == Alternative::Less)
+            d = -min_s;
+        else if (alternative == Alternative::Greater)
+            d = max_s;
+
+        UInt64 g = std::__gcd(n1, n2);
+        UInt64 nx_g = n1 / g;
+        UInt64 ny_g = n2 / g;
+
+        if (method == "auto")
+            method = std::max(n1, n2) <= 10000 ? "exact" : "asymptotic";
+        else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
+            method = "asymptotic";
+
+        Float64 p_value = std::numeric_limits<Float64>::infinity();
+
+        if (method == "exact")
+        {
+            /* reference:
+             * Gunar Schröer and Dietrich Trenkler
+             * Exact and Randomization Distributions of Kolmogorov-Smirnov, Tests for Two or Three Samples
+             *
+             * and
+             *
+             * Thomas Viehmann
+             * Numerically more stable computation of the p-values for the two-sample Kolmogorov-Smirnov test
+             */
+            if (n2 > n1)
+                std::swap(n1, n2);
+
+            const Float64 f_n1 = static_cast<Float64>(n1);
+            const Float64 f_n2 = static_cast<Float64>(n2);
+            const Float64 k_d = (0.5 + floor(d * f_n2 * f_n1 - tol)) / (f_n2 * f_n1);
+            PaddedPODArray<Float64> c(n1 + 1);
+
+            auto check = alternative == Alternative::TwoSided ?
+                         [](const Float64 & q, const Float64 & r, const Float64 & s) { return fabs(r - s) >= q; }
+                       : [](const Float64 & q, const Float64 & r, const Float64 & s) { return r - s >= q; };
+
+            c[0] = 0;
+            for (UInt64 j = 1; j <= n1; j++)
+                if (check(k_d, 0., j / f_n1))
+                    c[j] = 1.;
+                else
+                    c[j] = c[j - 1];
+
+            for (UInt64 i = 1; i <= n2; i++)
+            {
+                if (check(k_d, i / f_n2, 0.))
+                    c[0] = 1.;
+                for (UInt64 j = 1; j <= n1; j++)
+                    if (check(k_d, i / f_n2, j / f_n1))
+                        c[j] = 1.;
+                    else
+                    {
+                        Float64 v = i / static_cast<Float64>(i + j);
+                        Float64 w = j / static_cast<Float64>(i + j);
+                        c[j] = v * c[j] + w * c[j - 1];
+                    }
+            }
+            p_value = c[n1];
+        }
+        else if (method == "asymp" || method == "asymptotic")
+        {
+            Float64 n = std::min(n1, n2);
+            Float64 m = std::max(n1, n2);
+            Float64 p = sqrt((n * m) / (n + m)) * d;
+
+            if (alternative == Alternative::TwoSided)
+            {
+                /* reference:
+                 * J.DURBIN
+                 * Distribution theory for tests based on the sample distribution function
+                 */
+                Float64 new_val, old_val, s, w, z;
+                UInt64 k_max = static_cast<UInt64>(sqrt(2 - log(tol)));
+
+                if (p < 1)
+                {
+                    z = - (M_PI_2 * M_PI_4) / (p * p);
+                    w = log(p);
+                    s = 0;
+                    for (UInt64 k = 1; k < k_max; k += 2)
+                        s += exp(k * k * z - w);
+                    p = s / 0.398942280401432677939946059934;
+                }
+                else
+                {
+                    z = -2 * p * p;
+                    s = -1;
+                    UInt64 k = 1;
+                    old_val = 0;
+                    new_val = 1;
+                    while (fabs(old_val - new_val) > tol)
+                    {
+                        old_val = new_val;
+                        new_val += 2 * s * exp(z * k * k);
+                        s *= -1;
+                        k++;
+                    }
+                    p = new_val;
+                }
+                p_value = 1 - p;
+            }
+            else
+            {
+                /* reference:
+                 * J. L. HODGES, Jr
+                 * The significance probability of the Smirnov two-sample test
+                 */
+
+                // Use Hodges' suggested approximation Eqn 5.3
+                // Requires m to be the larger of (n1, n2)
+                Float64 expt = -2 * p * p - 2 * p * (m + 2 * n) / sqrt(m * n * (m + n)) / 3.0;
+                p_value = exp(expt);
+            }
+        }
+        return {d, p_value};
+    }
+
+};
+
+class AggregateFunctionKolmogorovSmirnov final:
+    public IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov>
+{
+private:
+    using Alternative = typename KolmogorovSmirnov::Alternative;
+    Alternative alternative = Alternative::TwoSided;
+    String method = "auto";
+
+public:
+    explicit AggregateFunctionKolmogorovSmirnov(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov> ({arguments}, {}, createResultType())
+    {
+        if (params.size() > 2)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
+
+        if (params.empty())
+            return;
+
+        if (params[0].getType() != Field::Types::String)
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
+
+        const auto & param = params[0].get<String>();
+        if (param == "two-sided")
+            alternative = Alternative::TwoSided;
+        else if (param == "less")
+            alternative = Alternative::Less;
+        else if (param == "greater")
+            alternative = Alternative::Greater;
+        else
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
+                    "It must be one of: 'two-sided', 'less', 'greater'", getName());
+
+        if (params.size() != 2)
+            return;
+
+        if (params[1].getType() != Field::Types::String)
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
+
+        method = params[1].get<String>();
+        if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic")
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
+                    "It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName());
+    }
+
+    String getName() const override
+    {
+        return "kolmogorovSmirnovTest";
+    }
+
+    bool allocatesMemoryInArena() const override { return true; }
+
+    static DataTypePtr createResultType()
+    {
+        DataTypes types
+        {
+            std::make_shared<DataTypeNumber<Float64>>(),
+            std::make_shared<DataTypeNumber<Float64>>(),
+        };
+
+        Strings names
+        {
+            "d_statistic",
+            "p_value"
+        };
+
+        return std::make_shared<DataTypeTuple>(
+            std::move(types),
+            std::move(names)
+        );
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        Float64 value = columns[0]->getFloat64(row_num);
+        UInt8 is_second = columns[1]->getUInt(row_num);
+        if (is_second)
+            this->data(place).addY(value, arena);
+        else
+            this->data(place).addX(value, arena);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        this->data(place).merge(this->data(rhs), arena);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        this->data(place).read(buf, arena);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        if (!this->data(place).size_x || !this->data(place).size_y)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
+
+        auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
+
+        /// Because p-value is a probability.
+        p_value = std::min(1.0, std::max(0.0, p_value));
+
+        auto & column_tuple = assert_cast<ColumnTuple &>(to);
+        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
+        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
+
+        column_stat.getData().push_back(d_statistic);
+        column_value.getData().push_back(p_value);
+    }
+
+};
+
+
 AggregateFunctionPtr createAggregateFunctionKolmogorovSmirnovTest(
     const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
deleted file mode 100644
index 5629de31c88..00000000000
--- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
+++ /dev/null
@@ -1,331 +0,0 @@
-#pragma once
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/StatCommon.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnTuple.h>
-#include <Common/Exception.h>
-#include <Common/assert_cast.h>
-#include <Common/PODArray_fwd.h>
-#include <base/types.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int BAD_ARGUMENTS;
-}
-
-struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
-{
-    enum class Alternative
-    {
-        TwoSided,
-        Less,
-        Greater
-    };
-
-    std::pair<Float64, Float64> getResult(Alternative alternative, String method)
-    {
-        ::sort(x.begin(), x.end());
-        ::sort(y.begin(), y.end());
-
-        Float64 max_s = std::numeric_limits<Float64>::min();
-        Float64 min_s = std::numeric_limits<Float64>::max();
-        Float64 now_s = 0;
-        UInt64 pos_x = 0;
-        UInt64 pos_y = 0;
-        UInt64 pos_tmp;
-        UInt64 n1 = x.size();
-        UInt64 n2 = y.size();
-
-        const Float64 n1_d = 1. / n1;
-        const Float64 n2_d = 1. / n2;
-        const Float64 tol = 1e-7;
-
-        // reference: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
-        while (pos_x < x.size() && pos_y < y.size())
-        {
-            if (likely(fabs(x[pos_x] - y[pos_y]) >= tol))
-            {
-                if (x[pos_x] < y[pos_y])
-                {
-                    now_s += n1_d;
-                    ++pos_x;
-                }
-                else
-                {
-                    now_s -= n2_d;
-                    ++pos_y;
-                }
-            }
-            else
-            {
-                pos_tmp = pos_x + 1;
-                while (pos_tmp < x.size() && unlikely(fabs(x[pos_tmp] - x[pos_x]) <= tol))
-                    pos_tmp++;
-                now_s += n1_d * (pos_tmp - pos_x);
-                pos_x = pos_tmp;
-                pos_tmp = pos_y + 1;
-                while (pos_tmp < y.size() && unlikely(fabs(y[pos_tmp] - y[pos_y]) <= tol))
-                    pos_tmp++;
-                now_s -= n2_d * (pos_tmp - pos_y);
-                pos_y = pos_tmp;
-            }
-            max_s = std::max(max_s, now_s);
-            min_s = std::min(min_s, now_s);
-        }
-        now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y);
-        min_s = std::min(min_s, now_s);
-        max_s = std::max(max_s, now_s);
-
-        Float64 d = 0;
-        if (alternative == Alternative::TwoSided)
-            d = std::max(std::abs(max_s), std::abs(min_s));
-        else if (alternative == Alternative::Less)
-            d = -min_s;
-        else if (alternative == Alternative::Greater)
-            d = max_s;
-
-        UInt64 g = std::__gcd(n1, n2);
-        UInt64 nx_g = n1 / g;
-        UInt64 ny_g = n2 / g;
-
-        if (method == "auto")
-            method = std::max(n1, n2) <= 10000 ? "exact" : "asymptotic";
-        else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
-            method = "asymptotic";
-
-        Float64 p_value = std::numeric_limits<Float64>::infinity();
-
-        if (method == "exact")
-        {
-            /* reference:
-             * Gunar Schröer and Dietrich Trenkler
-             * Exact and Randomization Distributions of Kolmogorov-Smirnov, Tests for Two or Three Samples
-             *
-             * and
-             *
-             * Thomas Viehmann
-             * Numerically more stable computation of the p-values for the two-sample Kolmogorov-Smirnov test
-             */
-            if (n2 > n1)
-                std::swap(n1, n2);
-
-            const Float64 f_n1 = static_cast<Float64>(n1);
-            const Float64 f_n2 = static_cast<Float64>(n2);
-            const Float64 k_d = (0.5 + floor(d * f_n2 * f_n1 - tol)) / (f_n2 * f_n1);
-            PaddedPODArray<Float64> c(n1 + 1);
-
-            auto check = alternative == Alternative::TwoSided ?
-                         [](const Float64 & q, const Float64 & r, const Float64 & s) { return fabs(r - s) >= q; }
-                       : [](const Float64 & q, const Float64 & r, const Float64 & s) { return r - s >= q; };
-
-            c[0] = 0;
-            for (UInt64 j = 1; j <= n1; j++)
-                if (check(k_d, 0., j / f_n1))
-                    c[j] = 1.;
-                else
-                    c[j] = c[j - 1];
-
-            for (UInt64 i = 1; i <= n2; i++)
-            {
-                if (check(k_d, i / f_n2, 0.))
-                    c[0] = 1.;
-                for (UInt64 j = 1; j <= n1; j++)
-                    if (check(k_d, i / f_n2, j / f_n1))
-                        c[j] = 1.;
-                    else
-                    {
-                        Float64 v = i / static_cast<Float64>(i + j);
-                        Float64 w = j / static_cast<Float64>(i + j);
-                        c[j] = v * c[j] + w * c[j - 1];
-                    }
-            }
-            p_value = c[n1];
-        }
-        else if (method == "asymp" || method == "asymptotic")
-        {
-            Float64 n = std::min(n1, n2);
-            Float64 m = std::max(n1, n2);
-            Float64 p = sqrt((n * m) / (n + m)) * d;
-
-            if (alternative == Alternative::TwoSided)
-            {
-                /* reference:
-                 * J.DURBIN
-                 * Distribution theory for tests based on the sample distribution function
-                 */
-                Float64 new_val, old_val, s, w, z;
-                UInt64 k_max = static_cast<UInt64>(sqrt(2 - log(tol)));
-
-                if (p < 1)
-                {
-                    z = - (M_PI_2 * M_PI_4) / (p * p);
-                    w = log(p);
-                    s = 0;
-                    for (UInt64 k = 1; k < k_max; k += 2)
-                        s += exp(k * k * z - w);
-                    p = s / 0.398942280401432677939946059934;
-                }
-                else
-                {
-                    z = -2 * p * p;
-                    s = -1;
-                    UInt64 k = 1;
-                    old_val = 0;
-                    new_val = 1;
-                    while (fabs(old_val - new_val) > tol)
-                    {
-                        old_val = new_val;
-                        new_val += 2 * s * exp(z * k * k);
-                        s *= -1;
-                        k++;
-                    }
-                    p = new_val;
-                }
-                p_value = 1 - p;
-            }
-            else
-            {
-                /* reference:
-                 * J. L. HODGES, Jr
-                 * The significance probability of the Smirnov two-sample test
-                 */
-
-                // Use Hodges' suggested approximation Eqn 5.3
-                // Requires m to be the larger of (n1, n2)
-                Float64 expt = -2 * p * p - 2 * p * (m + 2 * n) / sqrt(m * n * (m + n)) / 3.0;
-                p_value = exp(expt);
-            }
-        }
-        return {d, p_value};
-    }
-
-};
-
-class AggregateFunctionKolmogorovSmirnov final:
-    public IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov>
-{
-private:
-    using Alternative = typename KolmogorovSmirnov::Alternative;
-    Alternative alternative = Alternative::TwoSided;
-    String method = "auto";
-
-public:
-    explicit AggregateFunctionKolmogorovSmirnov(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov> ({arguments}, {}, createResultType())
-    {
-        if (params.size() > 2)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
-
-        if (params.empty())
-            return;
-
-        if (params[0].getType() != Field::Types::String)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
-
-        const auto & param = params[0].get<String>();
-        if (param == "two-sided")
-            alternative = Alternative::TwoSided;
-        else if (param == "less")
-            alternative = Alternative::Less;
-        else if (param == "greater")
-            alternative = Alternative::Greater;
-        else
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
-                    "It must be one of: 'two-sided', 'less', 'greater'", getName());
-
-        if (params.size() != 2)
-            return;
-
-        if (params[1].getType() != Field::Types::String)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
-
-        method = params[1].get<String>();
-        if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic")
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
-                    "It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName());
-    }
-
-    String getName() const override
-    {
-        return "kolmogorovSmirnovTest";
-    }
-
-    bool allocatesMemoryInArena() const override { return true; }
-
-    static DataTypePtr createResultType()
-    {
-        DataTypes types
-        {
-            std::make_shared<DataTypeNumber<Float64>>(),
-            std::make_shared<DataTypeNumber<Float64>>(),
-        };
-
-        Strings names
-        {
-            "d_statistic",
-            "p_value"
-        };
-
-        return std::make_shared<DataTypeTuple>(
-            std::move(types),
-            std::move(names)
-        );
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        Float64 value = columns[0]->getFloat64(row_num);
-        UInt8 is_second = columns[1]->getUInt(row_num);
-        if (is_second)
-            this->data(place).addY(value, arena);
-        else
-            this->data(place).addX(value, arena);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
-    {
-        this->data(place).merge(this->data(rhs), arena);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        this->data(place).read(buf, arena);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        if (!this->data(place).size_x || !this->data(place).size_y)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
-
-        auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
-
-        /// Because p-value is a probability.
-        p_value = std::min(1.0, std::max(0.0, p_value));
-
-        auto & column_tuple = assert_cast<ColumnTuple &>(to);
-        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
-        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
-
-        column_stat.getData().push_back(d_statistic);
-        column_value.getData().push_back(p_value);
-    }
-
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
index 9052f7a6661..f5751fcaa51 100644
--- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
+++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
@@ -1,12 +1,30 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h>
 #include <AggregateFunctions/FactoryHelpers.h>
-#include <AggregateFunctions/Helpers.h>
+
+#include <numeric>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/StatCommon.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnsDateTime.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <IO/ReadHelpers.h>
+#include <Common/PODArray_fwd.h>
+#include <Common/assert_cast.h>
+
+#include <boost/math/distributions/normal.hpp>
 
 
 namespace ErrorCodes
 {
-extern const int NOT_IMPLEMENTED;
+    extern const int NOT_IMPLEMENTED;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
 namespace DB
@@ -16,29 +34,321 @@ struct Settings;
 namespace
 {
 
-    AggregateFunctionPtr
-    createAggregateFunctionLargestTriangleThreeBuckets(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+struct LargestTriangleThreeBucketsData : public StatisticalSample<Float64, Float64>
+{
+    void add(const Float64 xval, const Float64 yval, Arena * arena)
     {
-        assertBinary(name, argument_types);
-
-
-        if (!(isNumber(argument_types[0]) || isDateOrDate32(argument_types[0]) || isDateTime(argument_types[0])
-              || isDateTime64(argument_types[0])))
-            throw Exception(
-                ErrorCodes::NOT_IMPLEMENTED,
-                "Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the first argument",
-                name);
-
-        if (!(isNumber(argument_types[1]) || isDateOrDate32(argument_types[1]) || isDateTime(argument_types[1])
-              || isDateTime64(argument_types[1])))
-            throw Exception(
-                ErrorCodes::NOT_IMPLEMENTED,
-                "Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the second argument",
-                name);
-
-        return std::make_shared<AggregateFunctionLargestTriangleThreeBuckets>(argument_types, parameters);
+        this->addX(xval, arena);
+        this->addY(yval, arena);
     }
 
+    void sort(Arena * arena)
+    {
+        // sort the this->x and this->y in ascending order of this->x using index
+        std::vector<size_t> index(this->x.size());
+
+        std::iota(index.begin(), index.end(), 0);
+        ::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; });
+
+        SampleX temp_x{};
+        SampleY temp_y{};
+
+        for (size_t i = 0; i < this->x.size(); ++i)
+        {
+            temp_x.push_back(this->x[index[i]], arena);
+            temp_y.push_back(this->y[index[i]], arena);
+        }
+
+        for (size_t i = 0; i < this->x.size(); ++i)
+        {
+            this->x[i] = temp_x[i];
+            this->y[i] = temp_y[i];
+        }
+    }
+
+    PODArray<std::pair<Float64, Float64>> getResult(size_t total_buckets, Arena * arena)
+    {
+        // Sort the data
+        this->sort(arena);
+
+        PODArray<std::pair<Float64, Float64>> result;
+
+        // Handle special cases for small data list
+        if (this->x.size() <= total_buckets)
+        {
+            for (size_t i = 0; i < this->x.size(); ++i)
+            {
+                result.emplace_back(std::make_pair(this->x[i], this->y[i]));
+            }
+            return result;
+        }
+
+        // Handle special cases for 0 or 1 or 2 buckets
+        if (total_buckets == 0)
+            return result;
+        if (total_buckets == 1)
+        {
+            result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
+            return result;
+        }
+        if (total_buckets == 2)
+        {
+            result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
+            result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
+            return result;
+        }
+
+        // Find the size of each bucket
+        size_t single_bucket_size = this->x.size() / total_buckets;
+
+        // Include the first data point
+        result.emplace_back(std::make_pair(this->x[0], this->y[0]));
+
+        for (size_t i = 1; i < total_buckets - 1; ++i) // Skip the first and last bucket
+        {
+            size_t start_index = i * single_bucket_size;
+            size_t end_index = (i + 1) * single_bucket_size;
+
+            // Compute the average point in the next bucket
+            Float64 avg_x = 0;
+            Float64 avg_y = 0;
+            for (size_t j = end_index; j < (i + 2) * single_bucket_size; ++j)
+            {
+                avg_x += this->x[j];
+                avg_y += this->y[j];
+            }
+            avg_x /= single_bucket_size;
+            avg_y /= single_bucket_size;
+
+            // Find the point in the current bucket that forms the largest triangle
+            size_t max_index = start_index;
+            Float64 max_area = 0.0;
+            for (size_t j = start_index; j < end_index; ++j)
+            {
+                Float64 area = std::abs(
+                    0.5
+                    * (result.back().first * this->y[j] + this->x[j] * avg_y + avg_x * result.back().second - result.back().first * avg_y
+                       - this->x[j] * result.back().second - avg_x * this->y[j]));
+                if (area > max_area)
+                {
+                    max_area = area;
+                    max_index = j;
+                }
+            }
+
+            // Include the selected point
+            result.emplace_back(std::make_pair(this->x[max_index], this->y[max_index]));
+        }
+
+        // Include the last data point
+        result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
+
+        return result;
+    }
+};
+
+class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>
+{
+private:
+    UInt64 total_buckets{0};
+    TypeIndex x_type;
+    TypeIndex y_type;
+
+public:
+    explicit AggregateFunctionLargestTriangleThreeBuckets(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>({arguments}, {}, createResultType(arguments))
+    {
+        if (params.size() != 1)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter", getName());
+
+        if (params[0].getType() != Field::Types::UInt64)
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName());
+
+        total_buckets = params[0].get<UInt64>();
+
+        this->x_type = WhichDataType(arguments[0]).idx;
+        this->y_type = WhichDataType(arguments[1]).idx;
+    }
+
+    static constexpr auto name = "largestTriangleThreeBuckets";
+
+    String getName() const override { return name; }
+
+    bool allocatesMemoryInArena() const override { return true; }
+
+    static DataTypePtr createResultType(const DataTypes & arguments)
+    {
+        TypeIndex x_type = arguments[0]->getTypeId();
+        TypeIndex y_type = arguments[1]->getTypeId();
+
+        UInt32 x_scale = 0;
+        UInt32 y_scale = 0;
+
+        if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[0].get()))
+        {
+            x_scale = datetime64_type->getScale();
+        }
+
+        if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[1].get()))
+        {
+            y_scale = datetime64_type->getScale();
+        }
+
+        DataTypes types = {getDataTypeFromTypeIndex(x_type, x_scale), getDataTypeFromTypeIndex(y_type, y_scale)};
+
+        auto tuple = std::make_shared<DataTypeTuple>(std::move(types));
+
+        return std::make_shared<DataTypeArray>(tuple);
+    }
+
+    static DataTypePtr getDataTypeFromTypeIndex(TypeIndex type_index, UInt32 scale)
+    {
+        DataTypePtr data_type;
+        switch (type_index)
+        {
+            case TypeIndex::Date:
+                data_type = std::make_shared<DataTypeDate>();
+                break;
+            case TypeIndex::Date32:
+                data_type = std::make_shared<DataTypeDate32>();
+                break;
+            case TypeIndex::DateTime:
+                data_type = std::make_shared<DataTypeDateTime>();
+                break;
+            case TypeIndex::DateTime64:
+                data_type = std::make_shared<DataTypeDateTime64>(scale);
+                break;
+            default:
+                data_type = std::make_shared<DataTypeNumber<Float64>>();
+        }
+        return data_type;
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type);
+        Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type);
+        this->data(place).add(x, y, arena);
+    }
+
+    Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const
+    {
+        switch (type_index)
+        {
+            case TypeIndex::Date:
+                return static_cast<const ColumnDate &>(*column).getData()[row_num];
+            case TypeIndex::Date32:
+                return static_cast<const ColumnDate32 &>(*column).getData()[row_num];
+            case TypeIndex::DateTime:
+                return static_cast<const ColumnDateTime &>(*column).getData()[row_num];
+            case TypeIndex::DateTime64:
+                return static_cast<const ColumnDateTime64 &>(*column).getData()[row_num];
+            default:
+                return column->getFloat64(row_num);
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        auto & a = this->data(place);
+        const auto & b = this->data(rhs);
+
+        a.merge(b, arena);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        this->data(place).read(buf, arena);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
+    {
+        auto res = this->data(place).getResult(total_buckets, arena);
+
+        auto & col = assert_cast<ColumnArray &>(to);
+        auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
+
+        auto column_x_adder_func = getColumnAdderFunc(x_type);
+        auto column_y_adder_func = getColumnAdderFunc(y_type);
+
+        for (size_t i = 0; i < res.size(); ++i)
+        {
+            auto & column_tuple = assert_cast<ColumnTuple &>(col.getData());
+            column_x_adder_func(column_tuple.getColumn(0), res[i].first);
+            column_y_adder_func(column_tuple.getColumn(1), res[i].second);
+        }
+
+        col_offsets.getData().push_back(col.getData().size());
+    }
+
+    std::function<void(IColumn &, Float64)> getColumnAdderFunc(TypeIndex type_index) const
+    {
+        switch (type_index)
+        {
+            case TypeIndex::Date:
+                return [](IColumn & column, Float64 value)
+                {
+                    auto & col = assert_cast<ColumnDate &>(column);
+                    col.getData().push_back(static_cast<UInt16>(value));
+                };
+            case TypeIndex::Date32:
+                return [](IColumn & column, Float64 value)
+                {
+                    auto & col = assert_cast<ColumnDate32 &>(column);
+                    col.getData().push_back(static_cast<UInt32>(value));
+                };
+            case TypeIndex::DateTime:
+                return [](IColumn & column, Float64 value)
+                {
+                    auto & col = assert_cast<ColumnDateTime &>(column);
+                    col.getData().push_back(static_cast<UInt32>(value));
+                };
+            case TypeIndex::DateTime64:
+                return [](IColumn & column, Float64 value)
+                {
+                    auto & col = assert_cast<ColumnDateTime64 &>(column);
+                    col.getData().push_back(static_cast<UInt64>(value));
+                };
+            default:
+                return [](IColumn & column, Float64 value)
+                {
+                    auto & col = assert_cast<ColumnFloat64 &>(column);
+                    col.getData().push_back(value);
+                };
+        }
+    }
+};
+
+
+AggregateFunctionPtr
+createAggregateFunctionLargestTriangleThreeBuckets(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+{
+    assertBinary(name, argument_types);
+
+
+    if (!(isNumber(argument_types[0]) || isDateOrDate32(argument_types[0]) || isDateTime(argument_types[0])
+          || isDateTime64(argument_types[0])))
+        throw Exception(
+            ErrorCodes::NOT_IMPLEMENTED,
+            "Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the first argument",
+            name);
+
+    if (!(isNumber(argument_types[1]) || isDateOrDate32(argument_types[1]) || isDateTime(argument_types[1])
+          || isDateTime64(argument_types[1])))
+        throw Exception(
+            ErrorCodes::NOT_IMPLEMENTED,
+            "Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the second argument",
+            name);
+
+    return std::make_shared<AggregateFunctionLargestTriangleThreeBuckets>(argument_types, parameters);
+}
+
 }
 
 
diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h
index 0f2e888e967..8bc245e856b 100644
--- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h
+++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h
@@ -33,295 +33,6 @@ namespace ErrorCodes
 }
 
 
-struct LargestTriangleThreeBucketsData : public StatisticalSample<Float64, Float64>
-{
-    void add(const Float64 xval, const Float64 yval, Arena * arena)
-    {
-        this->addX(xval, arena);
-        this->addY(yval, arena);
-    }
 
-    void sort(Arena * arena)
-    {
-        // sort the this->x and this->y in ascending order of this->x using index
-        std::vector<size_t> index(this->x.size());
-
-        std::iota(index.begin(), index.end(), 0);
-        ::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; });
-
-        SampleX temp_x{};
-        SampleY temp_y{};
-
-        for (size_t i = 0; i < this->x.size(); ++i)
-        {
-            temp_x.push_back(this->x[index[i]], arena);
-            temp_y.push_back(this->y[index[i]], arena);
-        }
-
-        for (size_t i = 0; i < this->x.size(); ++i)
-        {
-            this->x[i] = temp_x[i];
-            this->y[i] = temp_y[i];
-        }
-    }
-
-    PODArray<std::pair<Float64, Float64>> getResult(size_t total_buckets, Arena * arena)
-    {
-        // Sort the data
-        this->sort(arena);
-
-        PODArray<std::pair<Float64, Float64>> result;
-
-        // Handle special cases for small data list
-        if (this->x.size() <= total_buckets)
-        {
-            for (size_t i = 0; i < this->x.size(); ++i)
-            {
-                result.emplace_back(std::make_pair(this->x[i], this->y[i]));
-            }
-            return result;
-        }
-
-        // Handle special cases for 0 or 1 or 2 buckets
-        if (total_buckets == 0)
-            return result;
-        if (total_buckets == 1)
-        {
-            result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
-            return result;
-        }
-        if (total_buckets == 2)
-        {
-            result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
-            result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
-            return result;
-        }
-
-        // Find the size of each bucket
-        size_t single_bucket_size = this->x.size() / total_buckets;
-
-        // Include the first data point
-        result.emplace_back(std::make_pair(this->x[0], this->y[0]));
-
-        for (size_t i = 1; i < total_buckets - 1; ++i) // Skip the first and last bucket
-        {
-            size_t start_index = i * single_bucket_size;
-            size_t end_index = (i + 1) * single_bucket_size;
-
-            // Compute the average point in the next bucket
-            Float64 avg_x = 0;
-            Float64 avg_y = 0;
-            for (size_t j = end_index; j < (i + 2) * single_bucket_size; ++j)
-            {
-                avg_x += this->x[j];
-                avg_y += this->y[j];
-            }
-            avg_x /= single_bucket_size;
-            avg_y /= single_bucket_size;
-
-            // Find the point in the current bucket that forms the largest triangle
-            size_t max_index = start_index;
-            Float64 max_area = 0.0;
-            for (size_t j = start_index; j < end_index; ++j)
-            {
-                Float64 area = std::abs(
-                    0.5
-                    * (result.back().first * this->y[j] + this->x[j] * avg_y + avg_x * result.back().second - result.back().first * avg_y
-                       - this->x[j] * result.back().second - avg_x * this->y[j]));
-                if (area > max_area)
-                {
-                    max_area = area;
-                    max_index = j;
-                }
-            }
-
-            // Include the selected point
-            result.emplace_back(std::make_pair(this->x[max_index], this->y[max_index]));
-        }
-
-        // Include the last data point
-        result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
-
-        return result;
-    }
-};
-
-class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>
-{
-private:
-    UInt64 total_buckets{0};
-    TypeIndex x_type;
-    TypeIndex y_type;
-
-public:
-    explicit AggregateFunctionLargestTriangleThreeBuckets(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>({arguments}, {}, createResultType(arguments))
-    {
-        if (params.size() != 1)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter", getName());
-
-        if (params[0].getType() != Field::Types::UInt64)
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName());
-
-        total_buckets = params[0].get<UInt64>();
-
-        this->x_type = WhichDataType(arguments[0]).idx;
-        this->y_type = WhichDataType(arguments[1]).idx;
-    }
-
-    static constexpr auto name = "largestTriangleThreeBuckets";
-
-    String getName() const override { return name; }
-
-    bool allocatesMemoryInArena() const override { return true; }
-
-    static DataTypePtr createResultType(const DataTypes & arguments)
-    {
-        TypeIndex x_type = arguments[0]->getTypeId();
-        TypeIndex y_type = arguments[1]->getTypeId();
-
-        UInt32 x_scale = 0;
-        UInt32 y_scale = 0;
-
-        if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[0].get()))
-        {
-            x_scale = datetime64_type->getScale();
-        }
-
-        if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[1].get()))
-        {
-            y_scale = datetime64_type->getScale();
-        }
-
-        DataTypes types = {getDataTypeFromTypeIndex(x_type, x_scale), getDataTypeFromTypeIndex(y_type, y_scale)};
-
-        auto tuple = std::make_shared<DataTypeTuple>(std::move(types));
-
-        return std::make_shared<DataTypeArray>(tuple);
-    }
-
-    static DataTypePtr getDataTypeFromTypeIndex(TypeIndex type_index, UInt32 scale)
-    {
-        DataTypePtr data_type;
-        switch (type_index)
-        {
-            case TypeIndex::Date:
-                data_type = std::make_shared<DataTypeDate>();
-                break;
-            case TypeIndex::Date32:
-                data_type = std::make_shared<DataTypeDate32>();
-                break;
-            case TypeIndex::DateTime:
-                data_type = std::make_shared<DataTypeDateTime>();
-                break;
-            case TypeIndex::DateTime64:
-                data_type = std::make_shared<DataTypeDateTime64>(scale);
-                break;
-            default:
-                data_type = std::make_shared<DataTypeNumber<Float64>>();
-        }
-        return data_type;
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type);
-        Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type);
-        this->data(place).add(x, y, arena);
-    }
-
-    Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const
-    {
-        switch (type_index)
-        {
-            case TypeIndex::Date:
-                return static_cast<const ColumnDate &>(*column).getData()[row_num];
-            case TypeIndex::Date32:
-                return static_cast<const ColumnDate32 &>(*column).getData()[row_num];
-            case TypeIndex::DateTime:
-                return static_cast<const ColumnDateTime &>(*column).getData()[row_num];
-            case TypeIndex::DateTime64:
-                return static_cast<const ColumnDateTime64 &>(*column).getData()[row_num];
-            default:
-                return column->getFloat64(row_num);
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
-    {
-        auto & a = this->data(place);
-        const auto & b = this->data(rhs);
-
-        a.merge(b, arena);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        this->data(place).read(buf, arena);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
-    {
-        auto res = this->data(place).getResult(total_buckets, arena);
-
-        auto & col = assert_cast<ColumnArray &>(to);
-        auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
-
-        auto column_x_adder_func = getColumnAdderFunc(x_type);
-        auto column_y_adder_func = getColumnAdderFunc(y_type);
-
-        for (size_t i = 0; i < res.size(); ++i)
-        {
-            auto & column_tuple = assert_cast<ColumnTuple &>(col.getData());
-            column_x_adder_func(column_tuple.getColumn(0), res[i].first);
-            column_y_adder_func(column_tuple.getColumn(1), res[i].second);
-        }
-
-        col_offsets.getData().push_back(col.getData().size());
-    }
-
-    std::function<void(IColumn &, Float64)> getColumnAdderFunc(TypeIndex type_index) const
-    {
-        switch (type_index)
-        {
-            case TypeIndex::Date:
-                return [](IColumn & column, Float64 value)
-                {
-                    auto & col = assert_cast<ColumnDate &>(column);
-                    col.getData().push_back(static_cast<UInt16>(value));
-                };
-            case TypeIndex::Date32:
-                return [](IColumn & column, Float64 value)
-                {
-                    auto & col = assert_cast<ColumnDate32 &>(column);
-                    col.getData().push_back(static_cast<UInt32>(value));
-                };
-            case TypeIndex::DateTime:
-                return [](IColumn & column, Float64 value)
-                {
-                    auto & col = assert_cast<ColumnDateTime &>(column);
-                    col.getData().push_back(static_cast<UInt32>(value));
-                };
-            case TypeIndex::DateTime64:
-                return [](IColumn & column, Float64 value)
-                {
-                    auto & col = assert_cast<ColumnDateTime64 &>(column);
-                    col.getData().push_back(static_cast<UInt64>(value));
-                };
-            default:
-                return [](IColumn & column, Float64 value)
-                {
-                    auto & col = assert_cast<ColumnFloat64 &>(column);
-                    col.getData().push_back(value);
-                };
-        }
-    }
-};
 
 }
diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
index daf0f5e9c5a..b06da05fa79 100644
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
@@ -1,21 +1,254 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionMannWhitney.h>
 #include <AggregateFunctions/FactoryHelpers.h>
-#include <AggregateFunctions/Helpers.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/StatCommon.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+#include <Common/assert_cast.h>
+#include <Common/PODArray_fwd.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <IO/ReadHelpers.h>
+#include <limits>
+
+#include <boost/math/distributions/normal.hpp>
 
 
 namespace ErrorCodes
 {
-extern const int NOT_IMPLEMENTED;
+    extern const int NOT_IMPLEMENTED;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
 }
 
 namespace DB
 {
+
 struct Settings;
 
 namespace
 {
 
+struct MannWhitneyData : public StatisticalSample<Float64, Float64>
+{
+    /*Since null hypothesis is "for randomly selected values X and Y from two populations,
+     *the probability of X being greater than Y is equal to the probability of Y being greater than X".
+     *Or "the distribution F of first sample equals to the distribution G of second sample".
+     *Then alternative for this hypothesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */
+    enum class Alternative
+    {
+        TwoSided,
+        Less,
+        Greater
+    };
+
+    /// The behaviour equals to the similar function from scipy.
+    /// https://github.com/scipy/scipy/blob/ab9e9f17e0b7b2d618c4d4d8402cd4c0c200d6c0/scipy/stats/stats.py#L6978
+    std::pair<Float64, Float64> getResult(Alternative alternative, bool continuity_correction)
+    {
+        ConcatenatedSamples both(this->x, this->y);
+        RanksArray ranks;
+        Float64 tie_correction;
+
+        /// Compute ranks according to both samples.
+        std::tie(ranks, tie_correction) = computeRanksAndTieCorrection(both);
+
+        const Float64 n1 = this->size_x;
+        const Float64 n2 = this->size_y;
+
+        Float64 r1 = 0;
+        for (size_t i = 0; i < n1; ++i)
+            r1 += ranks[i];
+
+        const Float64 u1 = n1 * n2 + (n1 * (n1 + 1.)) / 2. - r1;
+        const Float64 u2 = n1 * n2 - u1;
+
+        /// The distribution of U-statistic under null hypothesis H0  is symmetric with respect to meanrank.
+        const Float64 meanrank = n1 * n2 /2. + 0.5 * continuity_correction;
+        const Float64 sd = std::sqrt(tie_correction * n1 * n2 * (n1 + n2 + 1) / 12.0);
+
+        Float64 u = 0;
+        if (alternative == Alternative::TwoSided)
+            /// There is no difference which u_i to take as u, because z will be differ only in sign and we take std::abs() from it.
+            u = std::max(u1, u2);
+        else if (alternative == Alternative::Less)
+            u = u1;
+        else if (alternative == Alternative::Greater)
+            u = u2;
+
+        Float64 z = (u - meanrank) / sd;
+
+        if (unlikely(!std::isfinite(z)))
+            return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
+
+        if (alternative == Alternative::TwoSided)
+            z = std::abs(z);
+
+        auto standard_normal_distribution = boost::math::normal_distribution<Float64>();
+        auto cdf = boost::math::cdf(standard_normal_distribution, z);
+
+        Float64 p_value = 0;
+        if (alternative == Alternative::TwoSided)
+            p_value = 2 - 2 * cdf;
+        else
+            p_value = 1 - cdf;
+
+        return {u2, p_value};
+    }
+
+private:
+    using Sample = typename StatisticalSample<Float64, Float64>::SampleX;
+
+    /// We need to compute ranks according to all samples. Use this class to avoid extra copy and memory allocation.
+    class ConcatenatedSamples
+    {
+        public:
+            ConcatenatedSamples(const Sample & first_, const Sample & second_)
+                : first(first_), second(second_) {}
+
+            const Float64 & operator[](size_t ind) const
+            {
+                if (ind < first.size())
+                    return first[ind];
+                return second[ind % first.size()];
+            }
+
+            size_t size() const
+            {
+                return first.size() + second.size();
+            }
+
+        private:
+            const Sample & first;
+            const Sample & second;
+    };
+};
+
+class AggregateFunctionMannWhitney final:
+    public IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney>
+{
+private:
+    using Alternative = typename MannWhitneyData::Alternative;
+    Alternative alternative;
+    bool continuity_correction{true};
+
+public:
+    explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney> ({arguments}, {}, createResultType())
+    {
+        if (params.size() > 2)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
+
+        if (params.empty())
+        {
+            alternative = Alternative::TwoSided;
+            return;
+        }
+
+        if (params[0].getType() != Field::Types::String)
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
+
+        const auto & param = params[0].get<String>();
+        if (param == "two-sided")
+            alternative = Alternative::TwoSided;
+        else if (param == "less")
+            alternative = Alternative::Less;
+        else if (param == "greater")
+            alternative = Alternative::Greater;
+        else
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
+                    "It must be one of: 'two-sided', 'less', 'greater'", getName());
+
+        if (params.size() != 2)
+            return;
+
+        if (params[1].getType() != Field::Types::UInt64)
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a UInt64", getName());
+
+        continuity_correction = static_cast<bool>(params[1].get<UInt64>());
+    }
+
+    String getName() const override
+    {
+        return "mannWhitneyUTest";
+    }
+
+    bool allocatesMemoryInArena() const override { return true; }
+
+    static DataTypePtr createResultType()
+    {
+        DataTypes types
+        {
+            std::make_shared<DataTypeNumber<Float64>>(),
+            std::make_shared<DataTypeNumber<Float64>>(),
+        };
+
+        Strings names
+        {
+            "u_statistic",
+            "p_value"
+        };
+
+        return std::make_shared<DataTypeTuple>(
+            std::move(types),
+            std::move(names)
+        );
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        Float64 value = columns[0]->getFloat64(row_num);
+        UInt8 is_second = columns[1]->getUInt(row_num);
+
+        if (is_second)
+            this->data(place).addY(value, arena);
+        else
+            this->data(place).addX(value, arena);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        auto & a = this->data(place);
+        const auto & b = this->data(rhs);
+
+        a.merge(b, arena);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        this->data(place).read(buf, arena);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        if (!this->data(place).size_x || !this->data(place).size_y)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
+
+        auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction);
+
+        /// Because p-value is a probability.
+        p_value = std::min(1.0, std::max(0.0, p_value));
+
+        auto & column_tuple = assert_cast<ColumnTuple &>(to);
+        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
+        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
+
+        column_stat.getData().push_back(u_statistic);
+        column_value.getData().push_back(p_value);
+    }
+
+};
+
+
 AggregateFunctionPtr createAggregateFunctionMannWhitneyUTest(
     const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
deleted file mode 100644
index ac6ce0d0ca9..00000000000
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h
+++ /dev/null
@@ -1,249 +0,0 @@
-#pragma once
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/StatCommon.h>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnTuple.h>
-#include <Common/assert_cast.h>
-#include <Common/PODArray_fwd.h>
-#include <base/types.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypesDecimal.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <limits>
-
-#include <boost/math/distributions/normal.hpp>
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int BAD_ARGUMENTS;
-}
-
-
-struct MannWhitneyData : public StatisticalSample<Float64, Float64>
-{
-    /*Since null hypothesis is "for randomly selected values X and Y from two populations,
-     *the probability of X being greater than Y is equal to the probability of Y being greater than X".
-     *Or "the distribution F of first sample equals to the distribution G of second sample".
-     *Then alternative for this hypothesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */
-    enum class Alternative
-    {
-        TwoSided,
-        Less,
-        Greater
-    };
-
-    /// The behaviour equals to the similar function from scipy.
-    /// https://github.com/scipy/scipy/blob/ab9e9f17e0b7b2d618c4d4d8402cd4c0c200d6c0/scipy/stats/stats.py#L6978
-    std::pair<Float64, Float64> getResult(Alternative alternative, bool continuity_correction)
-    {
-        ConcatenatedSamples both(this->x, this->y);
-        RanksArray ranks;
-        Float64 tie_correction;
-
-        /// Compute ranks according to both samples.
-        std::tie(ranks, tie_correction) = computeRanksAndTieCorrection(both);
-
-        const Float64 n1 = this->size_x;
-        const Float64 n2 = this->size_y;
-
-        Float64 r1 = 0;
-        for (size_t i = 0; i < n1; ++i)
-            r1 += ranks[i];
-
-        const Float64 u1 = n1 * n2 + (n1 * (n1 + 1.)) / 2. - r1;
-        const Float64 u2 = n1 * n2 - u1;
-
-        /// The distribution of U-statistic under null hypothesis H0  is symmetric with respect to meanrank.
-        const Float64 meanrank = n1 * n2 /2. + 0.5 * continuity_correction;
-        const Float64 sd = std::sqrt(tie_correction * n1 * n2 * (n1 + n2 + 1) / 12.0);
-
-        Float64 u = 0;
-        if (alternative == Alternative::TwoSided)
-            /// There is no difference which u_i to take as u, because z will be differ only in sign and we take std::abs() from it.
-            u = std::max(u1, u2);
-        else if (alternative == Alternative::Less)
-            u = u1;
-        else if (alternative == Alternative::Greater)
-            u = u2;
-
-        Float64 z = (u - meanrank) / sd;
-
-        if (unlikely(!std::isfinite(z)))
-            return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
-
-        if (alternative == Alternative::TwoSided)
-            z = std::abs(z);
-
-        auto standard_normal_distribution = boost::math::normal_distribution<Float64>();
-        auto cdf = boost::math::cdf(standard_normal_distribution, z);
-
-        Float64 p_value = 0;
-        if (alternative == Alternative::TwoSided)
-            p_value = 2 - 2 * cdf;
-        else
-            p_value = 1 - cdf;
-
-        return {u2, p_value};
-    }
-
-private:
-    using Sample = typename StatisticalSample<Float64, Float64>::SampleX;
-
-    /// We need to compute ranks according to all samples. Use this class to avoid extra copy and memory allocation.
-    class ConcatenatedSamples
-    {
-        public:
-            ConcatenatedSamples(const Sample & first_, const Sample & second_)
-                : first(first_), second(second_) {}
-
-            const Float64 & operator[](size_t ind) const
-            {
-                if (ind < first.size())
-                    return first[ind];
-                return second[ind % first.size()];
-            }
-
-            size_t size() const
-            {
-                return first.size() + second.size();
-            }
-
-        private:
-            const Sample & first;
-            const Sample & second;
-    };
-};
-
-class AggregateFunctionMannWhitney final:
-    public IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney>
-{
-private:
-    using Alternative = typename MannWhitneyData::Alternative;
-    Alternative alternative;
-    bool continuity_correction{true};
-
-public:
-    explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney> ({arguments}, {}, createResultType())
-    {
-        if (params.size() > 2)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
-
-        if (params.empty())
-        {
-            alternative = Alternative::TwoSided;
-            return;
-        }
-
-        if (params[0].getType() != Field::Types::String)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
-
-        const auto & param = params[0].get<String>();
-        if (param == "two-sided")
-            alternative = Alternative::TwoSided;
-        else if (param == "less")
-            alternative = Alternative::Less;
-        else if (param == "greater")
-            alternative = Alternative::Greater;
-        else
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
-                    "It must be one of: 'two-sided', 'less', 'greater'", getName());
-
-        if (params.size() != 2)
-            return;
-
-        if (params[1].getType() != Field::Types::UInt64)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a UInt64", getName());
-
-        continuity_correction = static_cast<bool>(params[1].get<UInt64>());
-    }
-
-    String getName() const override
-    {
-        return "mannWhitneyUTest";
-    }
-
-    bool allocatesMemoryInArena() const override { return true; }
-
-    static DataTypePtr createResultType()
-    {
-        DataTypes types
-        {
-            std::make_shared<DataTypeNumber<Float64>>(),
-            std::make_shared<DataTypeNumber<Float64>>(),
-        };
-
-        Strings names
-        {
-            "u_statistic",
-            "p_value"
-        };
-
-        return std::make_shared<DataTypeTuple>(
-            std::move(types),
-            std::move(names)
-        );
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        Float64 value = columns[0]->getFloat64(row_num);
-        UInt8 is_second = columns[1]->getUInt(row_num);
-
-        if (is_second)
-            this->data(place).addY(value, arena);
-        else
-            this->data(place).addX(value, arena);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
-    {
-        auto & a = this->data(place);
-        const auto & b = this->data(rhs);
-
-        a.merge(b, arena);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        this->data(place).read(buf, arena);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        if (!this->data(place).size_x || !this->data(place).size_y)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
-
-        auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction);
-
-        /// Because p-value is a probability.
-        p_value = std::min(1.0, std::max(0.0, p_value));
-
-        auto & column_tuple = assert_cast<ColumnTuple &>(to);
-        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
-        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
-
-        column_stat.getData().push_back(u_statistic);
-        column_value.getData().push_back(p_value);
-    }
-
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp
index c93920a5ef2..aacb9b14da2 100644
--- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp
@@ -1,8 +1,21 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionMaxIntersections.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
 
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnsNumber.h>
+
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+
+#include <Common/ArenaAllocator.h>
+#include <Common/NaNUtils.h>
+#include <Common/assert_cast.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#define AGGREGATE_FUNCTION_MAX_INTERSECTIONS_MAX_ARRAY_SIZE 0xFFFFFF
+
 
 namespace DB
 {
@@ -11,24 +24,187 @@ struct Settings;
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int TOO_LARGE_ARRAY_SIZE;
 }
 
 namespace
 {
-    AggregateFunctionPtr createAggregateFunctionMaxIntersections(
-        AggregateFunctionIntersectionsKind kind,
-        const std::string & name, const DataTypes & argument_types, const Array & parameters)
+
+/** maxIntersections: returns maximum count of the intersected intervals defined by start_column and end_column values,
+  * maxIntersectionsPosition: returns leftmost position of maximum intersection of intervals.
+  */
+
+/// Similar to GroupArrayNumericData.
+template <typename T>
+struct MaxIntersectionsData
+{
+    /// Left or right end of the interval and signed weight; with positive sign for begin of interval and negative sign for end of interval.
+    using Value = std::pair<T, Int64>;
+
+    // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
+    using Allocator = MixedAlignedArenaAllocator<alignof(Value), 4096>;
+    using Array = PODArray<Value, 32, Allocator>;
+
+    Array value;
+};
+
+enum class AggregateFunctionIntersectionsKind
+{
+    Count,
+    Position
+};
+
+template <typename PointType>
+class AggregateFunctionIntersectionsMax final
+    : public IAggregateFunctionDataHelper<MaxIntersectionsData<PointType>, AggregateFunctionIntersectionsMax<PointType>>
+{
+private:
+    AggregateFunctionIntersectionsKind kind;
+
+public:
+    AggregateFunctionIntersectionsMax(AggregateFunctionIntersectionsKind kind_, const DataTypes & arguments)
+        : IAggregateFunctionDataHelper<MaxIntersectionsData<PointType>, AggregateFunctionIntersectionsMax<PointType>>(arguments, {}, createResultType(kind_))
+        , kind(kind_)
     {
-        assertBinary(name, argument_types);
-        assertNoParameters(name, parameters);
+        if (!isNativeNumber(arguments[0]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: first argument must be represented by integer", getName());
 
-        AggregateFunctionPtr res(createWithNumericType<AggregateFunctionIntersectionsMax>(*argument_types[0], kind, argument_types));
-        if (!res)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types {} and {} of argument for aggregate function {}",
-                argument_types[0]->getName(), argument_types[1]->getName(), name);
+        if (!isNativeNumber(arguments[1]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: second argument must be represented by integer", getName());
 
-        return res;
+        if (!arguments[0]->equals(*arguments[1]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: arguments must have the same type", getName());
     }
+
+    String getName() const override
+    {
+        return kind == AggregateFunctionIntersectionsKind::Count
+            ? "maxIntersections"
+            : "maxIntersectionsPosition";
+    }
+
+    static DataTypePtr createResultType(AggregateFunctionIntersectionsKind kind_)
+    {
+        if (kind_ == AggregateFunctionIntersectionsKind::Count)
+            return std::make_shared<DataTypeUInt64>();
+        else
+            return std::make_shared<DataTypeNumber<PointType>>();
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        PointType left = assert_cast<const ColumnVector<PointType> &>(*columns[0]).getData()[row_num];
+        PointType right = assert_cast<const ColumnVector<PointType> &>(*columns[1]).getData()[row_num];
+
+        if (!isNaN(left))
+            this->data(place).value.push_back(std::make_pair(left, Int64(1)), arena);
+
+        if (!isNaN(right))
+            this->data(place).value.push_back(std::make_pair(right, Int64(-1)), arena);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        auto & cur_elems = this->data(place);
+        auto & rhs_elems = this->data(rhs);
+
+        cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        const auto & value = this->data(place).value;
+        size_t size = value.size();
+        writeVarUInt(size, buf);
+
+        /// In this version, pairs were serialized with padding.
+        /// We must ensure that padding bytes are zero-filled.
+
+        static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, first) == 0);
+        static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, second) > 0);
+
+        char zero_padding[offsetof(typename MaxIntersectionsData<PointType>::Value, second) - sizeof(value[0].first)]{};
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            writePODBinary(value[i].first, buf);
+            writePODBinary(zero_padding, buf);
+            if constexpr (std::endian::native == std::endian::little)
+                writePODBinary(value[i].second, buf);
+            else
+                writePODBinary(std::byteswap(value[i].second), buf);
+        }
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        size_t size = 0;
+        readVarUInt(size, buf);
+
+        if (unlikely(size > AGGREGATE_FUNCTION_MAX_INTERSECTIONS_MAX_ARRAY_SIZE))
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+                            "Too large array size (maximum: {})", AGGREGATE_FUNCTION_MAX_INTERSECTIONS_MAX_ARRAY_SIZE);
+
+        auto & value = this->data(place).value;
+
+        value.resize(size, arena);
+        buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        Int64 current_intersections = 0;
+        Int64 max_intersections = 0;
+        PointType position_of_max_intersections = 0;
+
+        /// const_cast because we will sort the array
+        auto & array = this->data(place).value;
+
+        /// Sort by position; for equal position, sort by weight to get deterministic result.
+        ::sort(array.begin(), array.end());
+
+        for (const auto & point_weight : array)
+        {
+            current_intersections += point_weight.second;
+            if (current_intersections > max_intersections)
+            {
+                max_intersections = current_intersections;
+                position_of_max_intersections = point_weight.first;
+            }
+        }
+
+        if (kind == AggregateFunctionIntersectionsKind::Count)
+        {
+            auto & result_column = assert_cast<ColumnUInt64 &>(to).getData();
+            result_column.push_back(max_intersections);
+        }
+        else
+        {
+            auto & result_column = assert_cast<ColumnVector<PointType> &>(to).getData();
+            result_column.push_back(position_of_max_intersections);
+        }
+    }
+};
+
+
+AggregateFunctionPtr createAggregateFunctionMaxIntersections(
+    AggregateFunctionIntersectionsKind kind,
+    const std::string & name, const DataTypes & argument_types, const Array & parameters)
+{
+    assertBinary(name, argument_types);
+    assertNoParameters(name, parameters);
+
+    AggregateFunctionPtr res(createWithNumericType<AggregateFunctionIntersectionsMax>(*argument_types[0], kind, argument_types));
+    if (!res)
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types {} and {} of argument for aggregate function {}",
+            argument_types[0]->getName(), argument_types[1]->getName(), name);
+
+    return res;
+}
+
 }
 
 void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory & factory)
diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
deleted file mode 100644
index fb333da3b85..00000000000
--- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
+++ /dev/null
@@ -1,189 +0,0 @@
-#pragma once
-
-#include <base/sort.h>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <Columns/ColumnsNumber.h>
-
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-
-#include <Common/ArenaAllocator.h>
-#include <Common/NaNUtils.h>
-#include <Common/assert_cast.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-#define AGGREGATE_FUNCTION_MAX_INTERSECTIONS_MAX_ARRAY_SIZE 0xFFFFFF
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int TOO_LARGE_ARRAY_SIZE;
-}
-
-
-/** maxIntersections: returns maximum count of the intersected intervals defined by start_column and end_column values,
-  * maxIntersectionsPosition: returns leftmost position of maximum intersection of intervals.
-  */
-
-/// Similar to GroupArrayNumericData.
-template <typename T>
-struct MaxIntersectionsData
-{
-    /// Left or right end of the interval and signed weight; with positive sign for begin of interval and negative sign for end of interval.
-    using Value = std::pair<T, Int64>;
-
-    // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
-    using Allocator = MixedAlignedArenaAllocator<alignof(Value), 4096>;
-    using Array = PODArray<Value, 32, Allocator>;
-
-    Array value;
-};
-
-enum class AggregateFunctionIntersectionsKind
-{
-    Count,
-    Position
-};
-
-template <typename PointType>
-class AggregateFunctionIntersectionsMax final
-    : public IAggregateFunctionDataHelper<MaxIntersectionsData<PointType>, AggregateFunctionIntersectionsMax<PointType>>
-{
-private:
-    AggregateFunctionIntersectionsKind kind;
-
-public:
-    AggregateFunctionIntersectionsMax(AggregateFunctionIntersectionsKind kind_, const DataTypes & arguments)
-        : IAggregateFunctionDataHelper<MaxIntersectionsData<PointType>, AggregateFunctionIntersectionsMax<PointType>>(arguments, {}, createResultType(kind_))
-        , kind(kind_)
-    {
-        if (!isNativeNumber(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: first argument must be represented by integer", getName());
-
-        if (!isNativeNumber(arguments[1]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: second argument must be represented by integer", getName());
-
-        if (!arguments[0]->equals(*arguments[1]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: arguments must have the same type", getName());
-    }
-
-    String getName() const override
-    {
-        return kind == AggregateFunctionIntersectionsKind::Count
-            ? "maxIntersections"
-            : "maxIntersectionsPosition";
-    }
-
-    static DataTypePtr createResultType(AggregateFunctionIntersectionsKind kind_)
-    {
-        if (kind_ == AggregateFunctionIntersectionsKind::Count)
-            return std::make_shared<DataTypeUInt64>();
-        else
-            return std::make_shared<DataTypeNumber<PointType>>();
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        PointType left = assert_cast<const ColumnVector<PointType> &>(*columns[0]).getData()[row_num];
-        PointType right = assert_cast<const ColumnVector<PointType> &>(*columns[1]).getData()[row_num];
-
-        if (!isNaN(left))
-            this->data(place).value.push_back(std::make_pair(left, Int64(1)), arena);
-
-        if (!isNaN(right))
-            this->data(place).value.push_back(std::make_pair(right, Int64(-1)), arena);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
-    {
-        auto & cur_elems = this->data(place);
-        auto & rhs_elems = this->data(rhs);
-
-        cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        const auto & value = this->data(place).value;
-        size_t size = value.size();
-        writeVarUInt(size, buf);
-
-        /// In this version, pairs were serialized with padding.
-        /// We must ensure that padding bytes are zero-filled.
-
-        static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, first) == 0);
-        static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, second) > 0);
-
-        char zero_padding[offsetof(typename MaxIntersectionsData<PointType>::Value, second) - sizeof(value[0].first)]{};
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            writePODBinary(value[i].first, buf);
-            writePODBinary(zero_padding, buf);
-            if constexpr (std::endian::native == std::endian::little)
-                writePODBinary(value[i].second, buf);
-            else
-                writePODBinary(std::byteswap(value[i].second), buf);
-        }
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        size_t size = 0;
-        readVarUInt(size, buf);
-
-        if (unlikely(size > AGGREGATE_FUNCTION_MAX_INTERSECTIONS_MAX_ARRAY_SIZE))
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
-                            "Too large array size (maximum: {})", AGGREGATE_FUNCTION_MAX_INTERSECTIONS_MAX_ARRAY_SIZE);
-
-        auto & value = this->data(place).value;
-
-        value.resize(size, arena);
-        buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        Int64 current_intersections = 0;
-        Int64 max_intersections = 0;
-        PointType position_of_max_intersections = 0;
-
-        /// const_cast because we will sort the array
-        auto & array = this->data(place).value;
-
-        /// Sort by position; for equal position, sort by weight to get deterministic result.
-        ::sort(array.begin(), array.end());
-
-        for (const auto & point_weight : array)
-        {
-            current_intersections += point_weight.second;
-            if (current_intersections > max_intersections)
-            {
-                max_intersections = current_intersections;
-                position_of_max_intersections = point_weight.first;
-            }
-        }
-
-        if (kind == AggregateFunctionIntersectionsKind::Count)
-        {
-            auto & result_column = assert_cast<ColumnUInt64 &>(to).getData();
-            result_column.push_back(max_intersections);
-        }
-        else
-        {
-            auto & result_column = assert_cast<ColumnVector<PointType> &>(to).getData();
-            result_column.push_back(position_of_max_intersections);
-        }
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionMeanZTest.cpp b/src/AggregateFunctions/AggregateFunctionMeanZTest.cpp
index 99d0d0063d5..7f4b1d9d6dc 100644
--- a/src/AggregateFunctions/AggregateFunctionMeanZTest.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMeanZTest.cpp
@@ -1,8 +1,16 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionMeanZTest.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Moments.h>
 
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/StatCommon.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+#include <Common/assert_cast.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <cmath>
+
 
 namespace ErrorCodes
 {
@@ -18,6 +26,121 @@ struct Settings;
 namespace
 {
 
+/// Returns tuple of (z-statistic, p-value, confidence-interval-low, confidence-interval-high)
+template <typename Data>
+class AggregateFunctionMeanZTest :
+    public IAggregateFunctionDataHelper<Data, AggregateFunctionMeanZTest<Data>>
+{
+private:
+    Float64 pop_var_x;
+    Float64 pop_var_y;
+    Float64 confidence_level;
+
+public:
+    AggregateFunctionMeanZTest(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionMeanZTest<Data>>({arguments}, params, createResultType())
+    {
+        pop_var_x = params.at(0).safeGet<Float64>();
+        pop_var_y = params.at(1).safeGet<Float64>();
+        confidence_level = params.at(2).safeGet<Float64>();
+
+        if (!std::isfinite(pop_var_x) || !std::isfinite(pop_var_y) || !std::isfinite(confidence_level))
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} requires finite parameter values.", Data::name);
+        }
+
+        if (pop_var_x < 0.0 || pop_var_y < 0.0)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "Population variance parameters must be larger than or equal to zero "
+                            "in aggregate function {}.", Data::name);
+        }
+
+        if (confidence_level <= 0.0 || confidence_level >= 1.0)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Confidence level parameter must be between 0 and 1 in aggregate function {}.", Data::name);
+        }
+    }
+
+    String getName() const override
+    {
+        return Data::name;
+    }
+
+    static DataTypePtr createResultType()
+    {
+        DataTypes types
+        {
+            std::make_shared<DataTypeNumber<Float64>>(),
+            std::make_shared<DataTypeNumber<Float64>>(),
+            std::make_shared<DataTypeNumber<Float64>>(),
+            std::make_shared<DataTypeNumber<Float64>>(),
+        };
+
+        Strings names
+        {
+            "z_statistic",
+            "p_value",
+            "confidence_interval_low",
+            "confidence_interval_high"
+        };
+
+        return std::make_shared<DataTypeTuple>(
+            std::move(types),
+            std::move(names)
+        );
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        Float64 value = columns[0]->getFloat64(row_num);
+        UInt8 is_second = columns[1]->getUInt(row_num);
+
+        if (is_second)
+            this->data(place).addY(value);
+        else
+            this->data(place).addX(value);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto [z_stat, p_value] = this->data(place).getResult(pop_var_x, pop_var_y);
+        auto [ci_low, ci_high] = this->data(place).getConfidenceIntervals(pop_var_x, pop_var_y, confidence_level);
+
+        /// Because p-value is a probability.
+        p_value = std::min(1.0, std::max(0.0, p_value));
+
+        auto & column_tuple = assert_cast<ColumnTuple &>(to);
+        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
+        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
+        auto & column_ci_low = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(2));
+        auto & column_ci_high = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(3));
+
+        column_stat.getData().push_back(z_stat);
+        column_value.getData().push_back(p_value);
+        column_ci_low.getData().push_back(ci_low);
+        column_ci_high.getData().push_back(ci_high);
+    }
+};
+
+
 struct MeanZTestData : public ZTestMoments<Float64>
 {
     static constexpr auto name = "meanZTest";
diff --git a/src/AggregateFunctions/AggregateFunctionMeanZTest.h b/src/AggregateFunctions/AggregateFunctionMeanZTest.h
deleted file mode 100644
index 6e67d167d6b..00000000000
--- a/src/AggregateFunctions/AggregateFunctionMeanZTest.h
+++ /dev/null
@@ -1,141 +0,0 @@
-#pragma once
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/StatCommon.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnTuple.h>
-#include <Common/assert_cast.h>
-#include <Core/Types.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <cmath>
-
-
-namespace DB
-{
-struct Settings;
-
-class ReadBuffer;
-class WriteBuffer;
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
-
-/// Returns tuple of (z-statistic, p-value, confidence-interval-low, confidence-interval-high)
-template <typename Data>
-class AggregateFunctionMeanZTest :
-    public IAggregateFunctionDataHelper<Data, AggregateFunctionMeanZTest<Data>>
-{
-private:
-    Float64 pop_var_x;
-    Float64 pop_var_y;
-    Float64 confidence_level;
-
-public:
-    AggregateFunctionMeanZTest(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionMeanZTest<Data>>({arguments}, params, createResultType())
-    {
-        pop_var_x = params.at(0).safeGet<Float64>();
-        pop_var_y = params.at(1).safeGet<Float64>();
-        confidence_level = params.at(2).safeGet<Float64>();
-
-        if (!std::isfinite(pop_var_x) || !std::isfinite(pop_var_y) || !std::isfinite(confidence_level))
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} requires finite parameter values.", Data::name);
-        }
-
-        if (pop_var_x < 0.0 || pop_var_y < 0.0)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                            "Population variance parameters must be larger than or equal to zero "
-                            "in aggregate function {}.", Data::name);
-        }
-
-        if (confidence_level <= 0.0 || confidence_level >= 1.0)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Confidence level parameter must be between 0 and 1 in aggregate function {}.", Data::name);
-        }
-    }
-
-    String getName() const override
-    {
-        return Data::name;
-    }
-
-    static DataTypePtr createResultType()
-    {
-        DataTypes types
-        {
-            std::make_shared<DataTypeNumber<Float64>>(),
-            std::make_shared<DataTypeNumber<Float64>>(),
-            std::make_shared<DataTypeNumber<Float64>>(),
-            std::make_shared<DataTypeNumber<Float64>>(),
-        };
-
-        Strings names
-        {
-            "z_statistic",
-            "p_value",
-            "confidence_interval_low",
-            "confidence_interval_high"
-        };
-
-        return std::make_shared<DataTypeTuple>(
-            std::move(types),
-            std::move(names)
-        );
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        Float64 value = columns[0]->getFloat64(row_num);
-        UInt8 is_second = columns[1]->getUInt(row_num);
-
-        if (is_second)
-            this->data(place).addY(value);
-        else
-            this->data(place).addX(value);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto [z_stat, p_value] = this->data(place).getResult(pop_var_x, pop_var_y);
-        auto [ci_low, ci_high] = this->data(place).getConfidenceIntervals(pop_var_x, pop_var_y, confidence_level);
-
-        /// Because p-value is a probability.
-        p_value = std::min(1.0, std::max(0.0, p_value));
-
-        auto & column_tuple = assert_cast<ColumnTuple &>(to);
-        auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
-        auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
-        auto & column_ci_low = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(2));
-        auto & column_ci_high = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(3));
-
-        column_stat.getData().push_back(z_stat);
-        column_value.getData().push_back(p_value);
-        column_ci_low.getData().push_back(ci_low);
-        column_ci_high.getData().push_back(ci_high);
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
index 6a2520a379e..425730abd73 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
@@ -1,5 +1,5 @@
 #include <AggregateFunctions/AggregateFunctionQuantile.h>
-#include <AggregateFunctions/QuantileReservoirSampler.h>
+#include <AggregateFunctions/ReservoirSampler.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Helpers.h>
 #include <DataTypes/DataTypeDate.h>
@@ -9,16 +9,106 @@
 
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NOT_IMPLEMENTED;
 }
 
 namespace
 {
 
+/** Quantile calculation with "reservoir sample" algorithm.
+  * It collects pseudorandom subset of limited size from a stream of values,
+  *  and approximate quantile from it.
+  * The result is non-deterministic. Also look at QuantileReservoirSamplerDeterministic.
+  *
+  * This algorithm is quite inefficient in terms of precision for memory usage,
+  *  but very efficient in CPU (though less efficient than QuantileTiming and than QuantileExact for small sets).
+  */
+template <typename Value>
+struct QuantileReservoirSampler
+{
+    using Data = ReservoirSampler<Value, ReservoirSamplerOnEmpty::RETURN_NAN_OR_ZERO>;
+    Data data;
+
+    void add(const Value & x)
+    {
+        data.insert(x);
+    }
+
+    template <typename Weight>
+    void add(const Value &, const Weight &)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for ReservoirSampler");
+    }
+
+    void merge(const QuantileReservoirSampler & rhs)
+    {
+        data.merge(rhs.data);
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        data.write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        data.read(buf);
+    }
+
+    /// Get the value of the `level` quantile. The level must be between 0 and 1.
+    Value get(Float64 level)
+    {
+        if (data.empty())
+            return {};
+
+        if constexpr (is_decimal<Value>)
+            return Value(static_cast<typename Value::NativeType>(data.quantileInterpolated(level)));
+        else
+            return static_cast<Value>(data.quantileInterpolated(level));
+    }
+
+    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
+    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
+    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
+    {
+        bool is_empty = data.empty();
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            if (is_empty)
+            {
+                result[i] = Value{};
+            }
+            else
+            {
+                if constexpr (is_decimal<Value>)
+                    result[indices[i]] = Value(static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]])));
+                else
+                    result[indices[i]] = Value(data.quantileInterpolated(levels[indices[i]]));
+            }
+        }
+    }
+
+    /// The same, but in the case of an empty state, NaN is returned.
+    Float64 getFloat(Float64 level)
+    {
+        return data.quantileInterpolated(level);
+    }
+
+    void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result)
+    {
+        for (size_t i = 0; i < size; ++i)
+            result[indices[i]] = data.quantileInterpolated(levels[indices[i]]);
+    }
+};
+
+
 template <typename Value, bool float_return> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<float_return, Float64, void>, false>;
 template <typename Value, bool float_return> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<float_return, Float64, void>, true>;
 
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp b/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
index c190aaa30d5..dbaac07c939 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
@@ -1,10 +1,15 @@
 #include <AggregateFunctions/AggregateFunctionQuantile.h>
-#include <AggregateFunctions/QuantileApprox.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Helpers.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <Core/Field.h>
+#include <cmath>
+#include <Common/RadixSort.h>
+#include <IO/WriteBuffer.h>
+#include <IO/ReadBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
 
 namespace DB
 {
@@ -12,11 +17,471 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
 }
 
 namespace
 {
 
+template <typename T>
+class ApproxSampler
+{
+public:
+    struct Stats
+    {
+        T value;      // the sampled value
+        Int64 g;      // the minimum rank jump from the previous value's minimum rank
+        Int64 delta;  // the maximum span of the rank
+
+        Stats() = default;
+        Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) {}
+    };
+
+    struct QueryResult
+    {
+        size_t index;
+        Int64 rank;
+        T value;
+
+        QueryResult(size_t index_, Int64 rank_, T value_) : index(index_), rank(rank_), value(value_) { }
+    };
+
+    ApproxSampler() = default;
+
+    explicit ApproxSampler(
+        double relative_error_,
+        size_t compress_threshold_ = default_compress_threshold,
+        size_t count_ = 0,
+        bool compressed_ = false)
+        : relative_error(relative_error_)
+        , compress_threshold(compress_threshold_)
+        , count(count_)
+        , compressed(compressed_)
+    {
+        sampled.reserve(compress_threshold);
+        backup_sampled.reserve(compress_threshold);
+
+        head_sampled.reserve(default_head_size);
+    }
+
+    bool isCompressed() const { return compressed; }
+    void setCompressed() { compressed = true; }
+
+    void insert(T x)
+    {
+        head_sampled.push_back(x);
+        compressed = false;
+        if (head_sampled.size() >= default_head_size)
+        {
+            withHeadBufferInserted();
+            if (sampled.size() >= compress_threshold)
+                compress();
+        }
+    }
+
+    void query(const Float64 * percentiles, const size_t * indices, size_t size, T * result) const
+    {
+        if (!head_sampled.empty())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot operate on an uncompressed summary, call compress() first");
+
+        if (sampled.empty())
+        {
+            for (size_t i = 0; i < size; ++i)
+                result[i] = T();
+            return;
+        }
+
+        Int64 current_max = std::numeric_limits<Int64>::min();
+        for (const auto & stats : sampled)
+            current_max = std::max(stats.delta + stats.g, current_max);
+        Int64 target_error = current_max/2;
+
+        size_t index= 0;
+        auto min_rank = sampled[0].g;
+        for (size_t i = 0; i < size; ++i)
+        {
+            double percentile = percentiles[indices[i]];
+            if (percentile <= relative_error)
+            {
+                result[indices[i]] = sampled.front().value;
+            }
+            else if (percentile >= 1 - relative_error)
+            {
+                result[indices[i]] = sampled.back().value;
+            }
+            else
+            {
+                QueryResult res = findApproxQuantile(index, min_rank, target_error, percentile);
+                index = res.index;
+                min_rank = res.rank;
+                result[indices[i]] = res.value;
+            }
+        }
+
+    }
+
+    void compress()
+    {
+        if (compressed)
+            return;
+
+        withHeadBufferInserted();
+
+        doCompress(2 * relative_error * count);
+        compressed = true;
+    }
+
+
+    void merge(const ApproxSampler & other)
+    {
+        if (other.count == 0)
+            return;
+        else if (count == 0)
+        {
+            compress_threshold = other.compress_threshold;
+            relative_error = other.relative_error;
+            count = other.count;
+            compressed = other.compressed;
+
+            sampled.resize(other.sampled.size());
+            memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
+            return;
+        }
+        else
+        {
+            // Merge the two buffers.
+            // The GK algorithm is a bit unclear about it, but we need to adjust the statistics during the
+            // merging. The main idea is that samples that come from one side will suffer from the lack of
+            // precision of the other.
+            // As a concrete example, take two QuantileSummaries whose samples (value, g, delta) are:
+            // `a = [(0, 1, 0), (20, 99, 0)]` and `b = [(10, 1, 0), (30, 49, 0)]`
+            // This means `a` has 100 values, whose minimum is 0 and maximum is 20,
+            // while `b` has 50 values, between 10 and 30.
+            // The resulting samples of the merge will be:
+            // a+b = [(0, 1, 0), (10, 1, ??), (20, 99, ??), (30, 49, 0)]
+            // The values of `g` do not change, as they represent the minimum number of values between two
+            // consecutive samples. The values of `delta` should be adjusted, however.
+            // Take the case of the sample `10` from `b`. In the original stream, it could have appeared
+            // right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`.
+            // In the GK algorithm's style of working in terms of maximum bounds, one can observe that the
+            // maximum additional uncertainty over samples coming from `b` is `max(g_a + delta_a) =
+            // floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is
+            // `floor(2 * eps_b * n_b)`.
+            // Only samples that interleave the other side are affected. That means that samples from
+            // one side that are lesser (or greater) than all samples from the other side are just copied
+            // unmodified.
+            // If the merging instances have different `relativeError`, the resulting instance will carry
+            // the largest one: `eps_ab = max(eps_a, eps_b)`.
+            // The main invariant of the GK algorithm is kept:
+            // `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
+            // `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)`
+            // Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])`
+            compress();
+
+            backup_sampled.clear();
+            backup_sampled.reserve(sampled.size() + other.sampled.size());
+            double merged_relative_error = std::max(relative_error, other.relative_error);
+            size_t merged_count = count + other.count;
+            Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
+            Int64 additional_other_delta = static_cast<Int64>(std::floor(2 * relative_error * count));
+
+            // Do a merge of two sorted lists until one of the lists is fully consumed
+            size_t self_idx = 0;
+            size_t other_idx = 0;
+            while (self_idx < sampled.size() && other_idx < other.sampled.size())
+            {
+                const Stats & self_sample = sampled[self_idx];
+                const Stats & other_sample = other.sampled[other_idx];
+
+                // Detect next sample
+                Stats next_sample;
+                Int64 additional_delta = 0;
+                if (self_sample.value < other_sample.value)
+                {
+                    ++self_idx;
+                    next_sample = self_sample;
+                    additional_delta = other_idx > 0 ? additional_self_delta : 0;
+                }
+                else
+                {
+                    ++other_idx;
+                    next_sample = other_sample;
+                    additional_delta = self_idx > 0 ? additional_other_delta : 0;
+                }
+
+                // Insert it
+                next_sample.delta += additional_delta;
+                backup_sampled.emplace_back(std::move(next_sample));
+            }
+
+            // Copy the remaining samples from the other list
+            // (by construction, at most one `while` loop will run)
+            while (self_idx < sampled.size())
+            {
+                backup_sampled.emplace_back(sampled[self_idx]);
+                ++self_idx;
+            }
+            while (other_idx < other.sampled.size())
+            {
+                backup_sampled.emplace_back(other.sampled[other_idx]);
+                ++other_idx;
+            }
+
+            std::swap(sampled, backup_sampled);
+            relative_error = merged_relative_error;
+            count = merged_count;
+            compress_threshold = other.compress_threshold;
+
+            doCompress(2 * merged_relative_error * merged_count);
+            compressed = true;
+        }
+    }
+
+    void write(WriteBuffer & buf) const
+    {
+        writeBinaryLittleEndian(compress_threshold, buf);
+        writeBinaryLittleEndian(relative_error, buf);
+        writeBinaryLittleEndian(count, buf);
+        writeBinaryLittleEndian(sampled.size(), buf);
+
+        for (const auto & stats : sampled)
+        {
+            writeBinaryLittleEndian(stats.value, buf);
+            writeBinaryLittleEndian(stats.g, buf);
+            writeBinaryLittleEndian(stats.delta, buf);
+        }
+    }
+
+    void read(ReadBuffer & buf)
+    {
+        readBinaryLittleEndian(compress_threshold, buf);
+        readBinaryLittleEndian(relative_error, buf);
+        readBinaryLittleEndian(count, buf);
+
+        size_t sampled_len = 0;
+        readBinaryLittleEndian(sampled_len, buf);
+        sampled.resize(sampled_len);
+
+        for (size_t i = 0; i < sampled_len; ++i)
+        {
+            auto stats = sampled[i];
+            readBinaryLittleEndian(stats.value, buf);
+            readBinaryLittleEndian(stats.g, buf);
+            readBinaryLittleEndian(stats.delta, buf);
+        }
+    }
+
+private:
+    QueryResult findApproxQuantile(size_t index, Int64 min_rank_at_index, double target_error, double percentile) const
+    {
+        Stats curr_sample = sampled[index];
+        Int64 rank = static_cast<Int64>(std::ceil(percentile * count));
+        size_t i = index;
+        Int64 min_rank = min_rank_at_index;
+        while (i < sampled.size() - 1)
+        {
+            Int64 max_rank = min_rank + curr_sample.delta;
+            if (max_rank - target_error <= rank && rank <= min_rank + target_error)
+                return {i, min_rank, curr_sample.value};
+            else
+            {
+                ++i;
+                curr_sample = sampled[i];
+                min_rank += curr_sample.g;
+            }
+        }
+        return {sampled.size()-1, 0, sampled.back().value};
+    }
+
+    void withHeadBufferInserted()
+    {
+        if (head_sampled.empty())
+            return;
+
+        bool use_radix_sort = head_sampled.size() >= 256 && (is_arithmetic_v<T> && !is_big_int_v<T>);
+        if (use_radix_sort)
+            RadixSort<RadixSortNumTraits<T>>::executeLSD(head_sampled.data(), head_sampled.size());
+        else
+            ::sort(head_sampled.begin(), head_sampled.end());
+
+        backup_sampled.clear();
+        backup_sampled.reserve(sampled.size() + head_sampled.size());
+
+        size_t sample_idx = 0;
+        size_t ops_idx = 0;
+        size_t current_count = count;
+        for (; ops_idx < head_sampled.size(); ++ops_idx)
+        {
+            T current_sample = head_sampled[ops_idx];
+
+            // Add all the samples before the next observation.
+            while (sample_idx < sampled.size() && sampled[sample_idx].value <= current_sample)
+            {
+                backup_sampled.emplace_back(sampled[sample_idx]);
+                ++sample_idx;
+            }
+
+            // If it is the first one to insert, of if it is the last one
+            ++current_count;
+            Int64 delta;
+            if (backup_sampled.empty() || (sample_idx == sampled.size() && ops_idx == (head_sampled.size() - 1)))
+                delta = 0;
+            else
+                delta = static_cast<Int64>(std::floor(2 * relative_error * current_count));
+
+            backup_sampled.emplace_back(current_sample, 1, delta);
+        }
+
+        // Add all the remaining existing samples
+        for (; sample_idx < sampled.size(); ++sample_idx)
+            backup_sampled.emplace_back(sampled[sample_idx]);
+
+        std::swap(sampled, backup_sampled);
+        head_sampled.clear();
+        count = current_count;
+    }
+
+
+    void doCompress(double merge_threshold)
+    {
+        if (sampled.empty())
+            return;
+
+        backup_sampled.clear();
+        // Start for the last element, which is always part of the set.
+        // The head contains the current new head, that may be merged with the current element.
+        Stats head = sampled.back();
+        ssize_t i = sampled.size() - 2;
+
+        // Do not compress the last element
+        while (i >= 1)
+        {
+            // The current sample:
+            const auto & sample1 = sampled[i];
+            // Do we need to compress?
+            if (sample1.g + head.g + head.delta < merge_threshold)
+            {
+                // Do not insert yet, just merge the current element into the head.
+                head.g += sample1.g;
+            }
+            else
+            {
+                // Prepend the current head, and keep the current sample as target for merging.
+                backup_sampled.push_back(head);
+                head = sample1;
+            }
+            --i;
+        }
+
+        backup_sampled.push_back(head);
+        // If necessary, add the minimum element:
+        auto curr_head = sampled.front();
+
+        // don't add the minimum element if `currentSamples` has only one element (both `currHead` and
+        // `head` point to the same element)
+        if (curr_head.value <= head.value && sampled.size() > 1)
+            backup_sampled.emplace_back(sampled.front());
+
+        std::reverse(backup_sampled.begin(), backup_sampled.end());
+        std::swap(sampled, backup_sampled);
+    }
+
+    double relative_error;
+    size_t compress_threshold;
+    size_t count = 0;
+    bool compressed;
+
+    PaddedPODArray<Stats> sampled;
+    PaddedPODArray<Stats> backup_sampled;
+
+    PaddedPODArray<T> head_sampled;
+
+    static constexpr size_t default_compress_threshold = 10000;
+    static constexpr size_t default_head_size = 50000;
+};
+
+template <typename Value>
+class QuantileGK
+{
+private:
+    using Data = ApproxSampler<Value>;
+    mutable Data data;
+
+public:
+    QuantileGK() = default;
+
+    explicit QuantileGK(size_t accuracy) : data(1.0 / static_cast<double>(accuracy)) { }
+
+    void add(const Value & x)
+    {
+        data.insert(x);
+    }
+
+    template <typename Weight>
+    void add(const Value &, const Weight &)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for GKSampler");
+    }
+
+    void merge(const QuantileGK & rhs)
+    {
+        if (!data.isCompressed())
+            data.compress();
+
+        data.merge(rhs.data);
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        /// Always compress before serialization
+        if (!data.isCompressed())
+            data.compress();
+
+        data.write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        data.read(buf);
+
+        data.setCompressed();
+    }
+
+    /// Get the value of the `level` quantile. The level must be between 0 and 1.
+    Value get(Float64 level)
+    {
+        if (!data.isCompressed())
+            data.compress();
+
+        Value res;
+        size_t indice = 0;
+        data.query(&level, &indice, 1, &res);
+        return res;
+    }
+
+    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
+    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
+    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
+    {
+        if (!data.isCompressed())
+            data.compress();
+
+        data.query(levels, indices, size, result);
+    }
+
+    Float64 getFloat64(Float64 /*level*/)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat64 is not implemented for GKSampler");
+    }
+
+    void getManyFloat(const Float64 * /*levels*/, const size_t * /*indices*/, size_t /*size*/, Float64 * /*result*/)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for GKSampler");
+    }
+};
+
+
 template <typename Value, bool _> using FuncQuantileGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantileGK, false, void, false>;
 template <typename Value, bool _> using FuncQuantilesGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantilesGK, false, void, true>;
 
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp b/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp
index af9e3f345ba..4ab8c3c9204 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp
@@ -1,5 +1,5 @@
 #include <AggregateFunctions/AggregateFunctionQuantile.h>
-#include <AggregateFunctions/QuantileReservoirSamplerDeterministic.h>
+#include <AggregateFunctions/ReservoirSamplerDeterministic.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Helpers.h>
 #include <DataTypes/DataTypeDate.h>
@@ -9,16 +9,106 @@
 
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NOT_IMPLEMENTED;
 }
 
 namespace
 {
 
+/** Quantile calculation with "reservoir sample" algorithm.
+  * It collects pseudorandom subset of limited size from a stream of values,
+  *  and approximate quantile from it.
+  * The function accept second argument, named "determinator"
+  *  and a hash function from it is calculated and used as a source for randomness
+  *  to apply random sampling.
+  * The function is deterministic, but care should be taken with choose of "determinator" argument.
+  */
+template <typename Value>
+struct QuantileReservoirSamplerDeterministic
+{
+    using Data = ReservoirSamplerDeterministic<Value, ReservoirSamplerDeterministicOnEmpty::RETURN_NAN_OR_ZERO>;
+    Data data;
+
+    void add(const Value &)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add without determinator is not implemented for ReservoirSamplerDeterministic");
+    }
+
+    template <typename Determinator>
+    void add(const Value & x, const Determinator & determinator)
+    {
+        data.insert(x, determinator);
+    }
+
+    void merge(const QuantileReservoirSamplerDeterministic & rhs)
+    {
+        data.merge(rhs.data);
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        data.write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        data.read(buf);
+    }
+
+    /// Get the value of the `level` quantile. The level must be between 0 and 1.
+    Value get(Float64 level)
+    {
+        if (data.empty())
+            return {};
+
+        if constexpr (is_decimal<Value>)
+            return static_cast<typename Value::NativeType>(data.quantileInterpolated(level));
+        else
+            return static_cast<Value>(data.quantileInterpolated(level));
+    }
+
+    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
+    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
+    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
+    {
+        bool is_empty = data.empty();
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            if (is_empty)
+            {
+                result[i] = Value{};
+            }
+            else
+            {
+                if constexpr (is_decimal<Value>)
+                    result[indices[i]] = static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]]));
+                else
+                    result[indices[i]] = static_cast<Value>(data.quantileInterpolated(levels[indices[i]]));
+            }
+        }
+    }
+
+    /// The same, but in the case of an empty state, NaN is returned.
+    Float64 getFloat(Float64 level)
+    {
+        return data.quantileInterpolated(level);
+    }
+
+    void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result)
+    {
+        for (size_t i = 0; i < size; ++i)
+            result[indices[i]] = data.quantileInterpolated(levels[indices[i]]);
+    }
+};
+
+
 template <typename Value, bool float_return> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<float_return, Float64, void>, false>;
 template <typename Value, bool float_return> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<float_return, Float64, void>, true>;
 
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp
index 80571ca6eb4..f917ac16b25 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp
@@ -1,24 +1,214 @@
 #include <AggregateFunctions/AggregateFunctionQuantile.h>
-#include <AggregateFunctions/QuantileExactWeighted.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Helpers.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <Core/Field.h>
 
+#include <Common/HashTable/HashMap.h>
+#include <Common/NaNUtils.h>
+
 
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NOT_IMPLEMENTED;
 }
 
 namespace
 {
 
+/** Calculates quantile by counting number of occurrences for each value in a hash map.
+  *
+  * It uses O(distinct(N)) memory. Can be naturally applied for values with weight.
+  * In case of many identical values, it can be more efficient than QuantileExact even when weight is not used.
+  */
+template <typename Value>
+struct QuantileExactWeighted
+{
+    struct Int128Hash
+    {
+        size_t operator()(Int128 x) const
+        {
+            return CityHash_v1_0_2::Hash128to64({x >> 64, x & 0xffffffffffffffffll});
+        }
+    };
+
+    using Weight = UInt64;
+    using UnderlyingType = NativeType<Value>;
+    using Hasher = HashCRC32<UnderlyingType>;
+
+    /// When creating, the hash table must be small.
+    using Map = HashMapWithStackMemory<UnderlyingType, Weight, Hasher, 4>;
+
+    Map map;
+
+    void add(const Value & x)
+    {
+        /// We must skip NaNs as they are not compatible with comparison sorting.
+        if (!isNaN(x))
+            ++map[x];
+    }
+
+    void add(const Value & x, Weight weight)
+    {
+        if (!isNaN(x))
+            map[x] += weight;
+    }
+
+    void merge(const QuantileExactWeighted & rhs)
+    {
+        for (const auto & pair : rhs.map)
+            map[pair.getKey()] += pair.getMapped();
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        map.write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        typename Map::Reader reader(buf);
+        while (reader.next())
+        {
+            const auto & pair = reader.get();
+            map[pair.first] = pair.second;
+        }
+    }
+
+    /// Get the value of the `level` quantile. The level must be between 0 and 1.
+    Value get(Float64 level) const
+    {
+        size_t size = map.size();
+
+        if (0 == size)
+            return std::numeric_limits<Value>::quiet_NaN();
+
+        /// Copy the data to a temporary array to get the element you need in order.
+        using Pair = typename Map::value_type;
+        std::unique_ptr<Pair[]> array_holder(new Pair[size]);
+        Pair * array = array_holder.get();
+
+        /// Note: 64-bit integer weight can overflow.
+        /// We do some implementation specific behaviour (return approximate or garbage results).
+        /// Float64 is used as accumulator here to get approximate results.
+        /// But weight can be already overflowed in computations in 'add' and 'merge' methods.
+        /// It will be reasonable to change the type of weight to Float64 in the map,
+        /// but we don't do that for compatibility of serialized data.
+
+        size_t i = 0;
+        Float64 sum_weight = 0;
+        for (const auto & pair : map)
+        {
+            sum_weight += pair.getMapped();
+            array[i] = pair.getValue();
+            ++i;
+        }
+
+        ::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+        Float64 threshold = std::ceil(sum_weight * level);
+        Float64 accumulated = 0;
+
+        const Pair * it = array;
+        const Pair * end = array + size;
+        while (it < end)
+        {
+            accumulated += it->second;
+
+            if (accumulated >= threshold)
+                break;
+
+            ++it;
+        }
+
+        if (it == end)
+            --it;
+
+        return it->first;
+    }
+
+    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
+    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
+    void getMany(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
+    {
+        size_t size = map.size();
+
+        if (0 == size)
+        {
+            for (size_t i = 0; i < num_levels; ++i)
+                result[i] = Value();
+            return;
+        }
+
+        /// Copy the data to a temporary array to get the element you need in order.
+        using Pair = typename Map::value_type;
+        std::unique_ptr<Pair[]> array_holder(new Pair[size]);
+        Pair * array = array_holder.get();
+
+        size_t i = 0;
+        Float64 sum_weight = 0;
+        for (const auto & pair : map)
+        {
+            sum_weight += pair.getMapped();
+            array[i] = pair.getValue();
+            ++i;
+        }
+
+        ::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+        Float64 accumulated = 0;
+
+        const Pair * it = array;
+        const Pair * end = array + size;
+
+        size_t level_index = 0;
+        Float64 threshold = std::ceil(sum_weight * levels[indices[level_index]]);
+
+        while (it < end)
+        {
+            accumulated += it->second;
+
+            while (accumulated >= threshold)
+            {
+                result[indices[level_index]] = it->first;
+                ++level_index;
+
+                if (level_index == num_levels)
+                    return;
+
+                threshold = std::ceil(sum_weight * levels[indices[level_index]]);
+            }
+
+            ++it;
+        }
+
+        while (level_index < num_levels)
+        {
+            result[indices[level_index]] = array[size - 1].first;
+            ++level_index;
+        }
+    }
+
+    /// The same, but in the case of an empty state, NaN is returned.
+    Float64 getFloat(Float64) const
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat is not implemented for QuantileExact");
+    }
+
+    void getManyFloat(const Float64 *, const size_t *, size_t, Float64 *) const
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for QuantileExact");
+    }
+};
+
+
 template <typename Value, bool _> using FuncQuantileExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantileExactWeighted, true, void, false>;
 template <typename Value, bool _> using FuncQuantilesExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantilesExactWeighted, true, void, true>;
 
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
index a71993043e7..1c786a8dfef 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
@@ -1,58 +1,353 @@
 #include <AggregateFunctions/AggregateFunctionQuantile.h>
-#include <AggregateFunctions/QuantileInterpolatedWeighted.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Helpers.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <Core/Field.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/NaNUtils.h>
 
 
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NOT_IMPLEMENTED;
 }
 
 namespace
 {
 
-    template <typename Value, bool _> using FuncQuantileInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantileInterpolatedWeighted, true, void, false>;
-    template <typename Value, bool _> using FuncQuantilesInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantilesInterpolatedWeighted, true, void, true>;
-
-    template <template <typename, bool> class Function>
-    AggregateFunctionPtr createAggregateFunctionQuantile(
-        const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+/** Approximates Quantile by:
+  * - sorting input values and weights
+  * - building a cumulative distribution based on weights
+  * - performing linear interpolation between the weights and values
+  *
+  */
+template <typename Value>
+struct QuantileInterpolatedWeighted
+{
+    struct Int128Hash
     {
-        /// Second argument type check doesn't depend on the type of the first one.
-        Function<void, true>::assertSecondArg(argument_types);
+        size_t operator()(Int128 x) const
+        {
+            return CityHash_v1_0_2::Hash128to64({x >> 64, x & 0xffffffffffffffffll});
+        }
+    };
 
-        const DataTypePtr & argument_type = argument_types[0];
-        WhichDataType which(argument_type);
+    using Weight = UInt64;
+    using UnderlyingType = NativeType<Value>;
+    using Hasher = HashCRC32<UnderlyingType>;
+
+    /// When creating, the hash table must be small.
+    using Map = HashMapWithStackMemory<UnderlyingType, Weight, Hasher, 4>;
+
+    Map map;
+
+    void add(const Value & x)
+    {
+        /// We must skip NaNs as they are not compatible with comparison sorting.
+        if (!isNaN(x))
+            ++map[x];
+    }
+
+    void add(const Value & x, Weight weight)
+    {
+        if (!isNaN(x))
+            map[x] += weight;
+    }
+
+    void merge(const QuantileInterpolatedWeighted & rhs)
+    {
+        for (const auto & pair : rhs.map)
+            map[pair.getKey()] += pair.getMapped();
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        map.write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        typename Map::Reader reader(buf);
+        while (reader.next())
+        {
+            const auto & pair = reader.get();
+            map[pair.first] = pair.second;
+        }
+    }
+
+    Value get(Float64 level) const
+    {
+        return getImpl<Value>(level);
+    }
+
+    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
+    {
+        getManyImpl<Value>(levels, indices, size, result);
+    }
+
+    /// The same, but in the case of an empty state, NaN is returned.
+    Float64 getFloat(Float64) const
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat is not implemented for QuantileInterpolatedWeighted");
+    }
+
+    void getManyFloat(const Float64 *, const size_t *, size_t, Float64 *) const
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for QuantileInterpolatedWeighted");
+    }
+
+private:
+    using Pair = typename std::pair<UnderlyingType, Float64>;
+
+    /// Get the value of the `level` quantile. The level must be between 0 and 1.
+    template <typename T>
+    T getImpl(Float64 level) const
+    {
+        size_t size = map.size();
+
+        if (0 == size)
+            return std::numeric_limits<Value>::quiet_NaN();
+
+        /// Maintain a vector of pair of values and weights for easier sorting and for building
+        /// a cumulative distribution using the provided weights.
+        std::vector<Pair> value_weight_pairs;
+        value_weight_pairs.reserve(size);
+
+        /// Note: weight provided must be a 64-bit integer
+        /// Float64 is used as accumulator here to get approximate results.
+        /// But weight used in the internal array is stored as Float64 as we
+        /// do some quantile estimation operation which involves division and
+        /// require Float64 level of precision.
+
+        Float64 sum_weight = 0;
+        for (const auto & pair : map)
+        {
+            sum_weight += pair.getMapped();
+            auto value = pair.getKey();
+            auto weight = pair.getMapped();
+            value_weight_pairs.push_back({value, weight});
+        }
+
+        ::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+        Float64 accumulated = 0;
+
+        /// vector for populating and storing the cumulative sum using the provided weights.
+        /// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
+        std::vector<Float64> weights_cum_sum;
+        weights_cum_sum.reserve(size);
+
+        for (size_t idx = 0; idx < size; ++idx)
+        {
+            accumulated += value_weight_pairs[idx].second;
+            weights_cum_sum.push_back(accumulated);
+        }
+
+        /// The following estimation of quantile is general and the idea is:
+        /// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
+
+        /// calculates a simple cumulative distribution based on weights
+        if (sum_weight != 0)
+        {
+            for (size_t idx = 0; idx < size; ++idx)
+                value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
+        }
+
+        /// perform linear interpolation
+        size_t idx = 0;
+        if (size >= 2)
+        {
+            if (level >= value_weight_pairs[size - 2].second)
+            {
+                idx = size - 2;
+            }
+            else
+            {
+                size_t start = 0, end = size - 1;
+                while (start <= end)
+                {
+                    size_t mid = start + (end - start) / 2;
+                    if (mid > size)
+                        break;
+                    if (level > value_weight_pairs[mid + 1].second)
+                        start = mid + 1;
+                    else
+                    {
+                        idx = mid;
+                        end = mid - 1;
+                    }
+                }
+            }
+        }
+
+        size_t l = idx;
+        size_t u = idx + 1 < size ? idx + 1 : idx;
+
+        Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
+        UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
+
+        if (level < xl)
+            yr = yl;
+        if (level > xr)
+            yl = yr;
+
+        return static_cast<T>(interpolate(level, xl, xr, yl, yr));
+    }
+
+    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
+    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
+    template <typename T>
+    void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
+    {
+        size_t size = map.size();
+
+        if (0 == size)
+        {
+            for (size_t i = 0; i < num_levels; ++i)
+                result[i] = Value();
+            return;
+        }
+
+        std::vector<Pair> value_weight_pairs;
+        value_weight_pairs.reserve(size);
+
+        Float64 sum_weight = 0;
+        for (const auto & pair : map)
+        {
+            sum_weight += pair.getMapped();
+            auto value = pair.getKey();
+            auto weight = pair.getMapped();
+            value_weight_pairs.push_back({value, weight});
+        }
+
+        ::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+        Float64 accumulated = 0;
+
+        /// vector for populating and storing the cumulative sum using the provided weights.
+        /// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
+        std::vector<Float64> weights_cum_sum;
+        weights_cum_sum.reserve(size);
+
+        for (size_t idx = 0; idx < size; ++idx)
+        {
+            accumulated += value_weight_pairs[idx].second;
+            weights_cum_sum.emplace_back(accumulated);
+        }
+
+
+        /// The following estimation of quantile is general and the idea is:
+        /// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
+
+        /// calculates a simple cumulative distribution based on weights
+        if (sum_weight != 0)
+        {
+            for (size_t idx = 0; idx < size; ++idx)
+                value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
+        }
+
+        for (size_t level_index = 0; level_index < num_levels; ++level_index)
+        {
+            /// perform linear interpolation for every level
+            auto level = levels[indices[level_index]];
+
+            size_t idx = 0;
+            if (size >= 2)
+            {
+                if (level >= value_weight_pairs[size - 2].second)
+                {
+                    idx = size - 2;
+                }
+                else
+                {
+                    size_t start = 0, end = size - 1;
+                    while (start <= end)
+                    {
+                        size_t mid = start + (end - start) / 2;
+                        if (mid > size)
+                            break;
+                        if (level > value_weight_pairs[mid + 1].second)
+                            start = mid + 1;
+                        else
+                        {
+                            idx = mid;
+                            end = mid - 1;
+                        }
+                    }
+                }
+            }
+
+            size_t l = idx;
+            size_t u = idx + 1 < size ? idx + 1 : idx;
+
+            Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
+            UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
+
+            if (level < xl)
+                yr = yl;
+            if (level > xr)
+                yl = yr;
+
+            result[indices[level_index]] = static_cast<T>(interpolate(level, xl, xr, yl, yr));
+        }
+    }
+
+    /// This ignores overflows or NaN's that might arise during add, sub and mul operations and doesn't aim to provide exact
+    /// results since `the quantileInterpolatedWeighted` function itself relies mainly on approximation.
+    UnderlyingType NO_SANITIZE_UNDEFINED interpolate(Float64 level, Float64 xl, Float64 xr, UnderlyingType yl, UnderlyingType yr) const
+    {
+        UnderlyingType dy = yr - yl;
+        Float64 dx = xr - xl;
+        dx = dx == 0 ? 1 : dx; /// to handle NaN behavior that might arise during integer division below.
+
+        /// yl + (dy / dx) * (level - xl)
+        return static_cast<UnderlyingType>(yl + (dy / dx) * (level - xl));
+    }
+};
+
+
+template <typename Value, bool _> using FuncQuantileInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantileInterpolatedWeighted, true, void, false>;
+template <typename Value, bool _> using FuncQuantilesInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantilesInterpolatedWeighted, true, void, true>;
+
+template <template <typename, bool> class Function>
+AggregateFunctionPtr createAggregateFunctionQuantile(
+    const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+{
+    /// Second argument type check doesn't depend on the type of the first one.
+    Function<void, true>::assertSecondArg(argument_types);
+
+    const DataTypePtr & argument_type = argument_types[0];
+    WhichDataType which(argument_type);
 
 #define DISPATCH(TYPE) \
-    if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
-        FOR_BASIC_NUMERIC_TYPES(DISPATCH)
+if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
+    FOR_BASIC_NUMERIC_TYPES(DISPATCH)
 #undef DISPATCH
-        if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
-        if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
+    if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
+    if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
 
-        if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
-        if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
-        if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
-        if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
-        if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
+    if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
+    if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
+    if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
+    if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
+    if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
 
-        if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
-        if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
-        if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
-        if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
+    if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
+    if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
+    if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
+    if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
+
+    throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
+                    argument_type->getName(), name);
+}
 
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
-                        argument_type->getName(), name);
-    }
 }
 
 void registerAggregateFunctionsQuantileInterpolatedWeighted(AggregateFunctionFactory & factory)
diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp
index 56eb3437a05..d338808c717 100644
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp
+++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp
@@ -1,7 +1,13 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionRankCorrelation.h>
 #include <AggregateFunctions/FactoryHelpers.h>
-#include <AggregateFunctions/Helpers.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/StatCommon.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+#include <Common/assert_cast.h>
+#include <Common/PODArray_fwd.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
 
 
 namespace ErrorCodes
@@ -16,6 +22,83 @@ struct Settings;
 namespace
 {
 
+struct RankCorrelationData : public StatisticalSample<Float64, Float64>
+{
+    Float64 getResult()
+    {
+        RanksArray ranks_x;
+        std::tie(ranks_x, std::ignore) = computeRanksAndTieCorrection(this->x);
+
+        RanksArray ranks_y;
+        std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
+
+        /// Sizes can be non-equal due to skipped NaNs.
+        const Float64 size = static_cast<Float64>(std::min(this->size_x, this->size_y));
+
+        /// Count d^2 sum
+        Float64 answer = 0;
+        for (size_t j = 0; j < size; ++j)
+            answer += (ranks_x[j] - ranks_y[j]) * (ranks_x[j] - ranks_y[j]);
+
+        answer *= 6;
+        answer /= size * (size * size - 1);
+        answer = 1 - answer;
+        return answer;
+    }
+};
+
+class AggregateFunctionRankCorrelation :
+    public IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation>
+{
+public:
+    explicit AggregateFunctionRankCorrelation(const DataTypes & arguments)
+        : IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation> ({arguments}, {}, std::make_shared<DataTypeNumber<Float64>>())
+    {}
+
+    String getName() const override
+    {
+        return "rankCorr";
+    }
+
+    bool allocatesMemoryInArena() const override { return true; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        Float64 new_x = columns[0]->getFloat64(row_num);
+        Float64 new_y = columns[1]->getFloat64(row_num);
+        this->data(place).addX(new_x, arena);
+        this->data(place).addY(new_y, arena);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        auto & a = this->data(place);
+        const auto & b = this->data(rhs);
+
+        a.merge(b, arena);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        this->data(place).read(buf, arena);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto answer = this->data(place).getResult();
+
+        auto & column = static_cast<ColumnVector<Float64> &>(to);
+        column.getData().push_back(answer);
+    }
+
+};
+
+
 AggregateFunctionPtr createAggregateFunctionRankCorrelation(
     const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
deleted file mode 100644
index 4f7d04100cf..00000000000
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
+++ /dev/null
@@ -1,98 +0,0 @@
-#pragma once
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/StatCommon.h>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnTuple.h>
-#include <Common/assert_cast.h>
-#include <Common/PODArray_fwd.h>
-#include <base/types.h>
-#include <DataTypes/DataTypesDecimal.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeArray.h>
-
-namespace DB
-{
-struct Settings;
-
-
-struct RankCorrelationData : public StatisticalSample<Float64, Float64>
-{
-    Float64 getResult()
-    {
-        RanksArray ranks_x;
-        std::tie(ranks_x, std::ignore) = computeRanksAndTieCorrection(this->x);
-
-        RanksArray ranks_y;
-        std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
-
-        /// Sizes can be non-equal due to skipped NaNs.
-        const Float64 size = static_cast<Float64>(std::min(this->size_x, this->size_y));
-
-        /// Count d^2 sum
-        Float64 answer = 0;
-        for (size_t j = 0; j < size; ++j)
-            answer += (ranks_x[j] - ranks_y[j]) * (ranks_x[j] - ranks_y[j]);
-
-        answer *= 6;
-        answer /= size * (size * size - 1);
-        answer = 1 - answer;
-        return answer;
-    }
-};
-
-class AggregateFunctionRankCorrelation :
-    public IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation>
-{
-public:
-    explicit AggregateFunctionRankCorrelation(const DataTypes & arguments)
-        :IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation> ({arguments}, {}, std::make_shared<DataTypeNumber<Float64>>())
-    {}
-
-    String getName() const override
-    {
-        return "rankCorr";
-    }
-
-    bool allocatesMemoryInArena() const override { return true; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        Float64 new_x = columns[0]->getFloat64(row_num);
-        Float64 new_y = columns[1]->getFloat64(row_num);
-        this->data(place).addX(new_x, arena);
-        this->data(place).addY(new_y, arena);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
-    {
-        auto & a = this->data(place);
-        const auto & b = this->data(rhs);
-
-        a.merge(b, arena);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        this->data(place).read(buf, arena);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto answer = this->data(place).getResult();
-
-        auto & column = static_cast<ColumnVector<Float64> &>(to);
-        column.getData().push_back(answer);
-    }
-
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionRetention.cpp b/src/AggregateFunctions/AggregateFunctionRetention.cpp
index f037696b205..a004f3527a2 100644
--- a/src/AggregateFunctions/AggregateFunctionRetention.cpp
+++ b/src/AggregateFunctions/AggregateFunctionRetention.cpp
@@ -1,21 +1,150 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionRetention.h>
-#include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 
+#include <unordered_set>
+#include <Columns/ColumnArray.h>
+#include <Common/assert_cast.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeArray.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <bitset>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
 
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 namespace
 {
 
+struct AggregateFunctionRetentionData
+{
+    static constexpr auto max_events = 32;
+
+    using Events = std::bitset<max_events>;
+
+    Events events;
+
+    void add(UInt8 event)
+    {
+        events.set(event);
+    }
+
+    void merge(const AggregateFunctionRetentionData & other)
+    {
+        events |= other.events;
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        UInt32 event_value = static_cast<UInt32>(events.to_ulong());
+        writeBinary(event_value, buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        UInt32 event_value;
+        readBinary(event_value, buf);
+        events = event_value;
+    }
+};
+
+/**
+  * The max size of events is 32, that's enough for retention analytics
+  *
+  * Usage:
+  * - retention(cond1, cond2, cond3, ....)
+  * - returns [cond1_flag, cond1_flag && cond2_flag, cond1_flag && cond3_flag, ...]
+  */
+class AggregateFunctionRetention final
+        : public IAggregateFunctionDataHelper<AggregateFunctionRetentionData, AggregateFunctionRetention>
+{
+private:
+    UInt8 events_size;
+
+public:
+    String getName() const override
+    {
+        return "retention";
+    }
+
+    explicit AggregateFunctionRetention(const DataTypes & arguments)
+        : IAggregateFunctionDataHelper<AggregateFunctionRetentionData, AggregateFunctionRetention>(arguments, {}, std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt8>()))
+    {
+        for (const auto i : collections::range(0, arguments.size()))
+        {
+            const auto * cond_arg = arguments[i].get();
+            if (!isUInt8(cond_arg))
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                                "Illegal type {} of argument {} of aggregate function {}, must be UInt8",
+                                cond_arg->getName(), i, getName());
+        }
+
+        events_size = static_cast<UInt8>(arguments.size());
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    {
+        for (const auto i : collections::range(0, events_size))
+        {
+            auto event = assert_cast<const ColumnVector<UInt8> *>(columns[i])->getData()[row_num];
+            if (event)
+            {
+                this->data(place).add(i);
+            }
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto & data_to = assert_cast<ColumnUInt8 &>(assert_cast<ColumnArray &>(to).getData()).getData();
+        auto & offsets_to = assert_cast<ColumnArray &>(to).getOffsets();
+
+        ColumnArray::Offset current_offset = data_to.size();
+        data_to.resize(current_offset + events_size);
+
+        const bool first_flag = this->data(place).events.test(0);
+        data_to[current_offset] = first_flag;
+        ++current_offset;
+
+        for (size_t i = 1; i < events_size; ++i)
+        {
+            data_to[current_offset] = (first_flag && this->data(place).events.test(i));
+            ++current_offset;
+        }
+
+        offsets_to.push_back(current_offset);
+    }
+};
+
+
 AggregateFunctionPtr createAggregateFunctionRetention(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
 {
     assertNoParameters(name, params);
diff --git a/src/AggregateFunctions/AggregateFunctionRetention.h b/src/AggregateFunctions/AggregateFunctionRetention.h
deleted file mode 100644
index 63ff5921540..00000000000
--- a/src/AggregateFunctions/AggregateFunctionRetention.h
+++ /dev/null
@@ -1,143 +0,0 @@
-#pragma once
-
-#include <unordered_set>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnArray.h>
-#include <Common/assert_cast.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeArray.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <base/range.h>
-#include <bitset>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
-
-struct AggregateFunctionRetentionData
-{
-    static constexpr auto max_events = 32;
-
-    using Events = std::bitset<max_events>;
-
-    Events events;
-
-    void add(UInt8 event)
-    {
-        events.set(event);
-    }
-
-    void merge(const AggregateFunctionRetentionData & other)
-    {
-        events |= other.events;
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        UInt32 event_value = static_cast<UInt32>(events.to_ulong());
-        writeBinary(event_value, buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        UInt32 event_value;
-        readBinary(event_value, buf);
-        events = event_value;
-    }
-};
-
-/**
-  * The max size of events is 32, that's enough for retention analytics
-  *
-  * Usage:
-  * - retention(cond1, cond2, cond3, ....)
-  * - returns [cond1_flag, cond1_flag && cond2_flag, cond1_flag && cond3_flag, ...]
-  */
-class AggregateFunctionRetention final
-        : public IAggregateFunctionDataHelper<AggregateFunctionRetentionData, AggregateFunctionRetention>
-{
-private:
-    UInt8 events_size;
-
-public:
-    String getName() const override
-    {
-        return "retention";
-    }
-
-    explicit AggregateFunctionRetention(const DataTypes & arguments)
-        : IAggregateFunctionDataHelper<AggregateFunctionRetentionData, AggregateFunctionRetention>(arguments, {}, std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt8>()))
-    {
-        for (const auto i : collections::range(0, arguments.size()))
-        {
-            const auto * cond_arg = arguments[i].get();
-            if (!isUInt8(cond_arg))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                                "Illegal type {} of argument {} of aggregate function {}, must be UInt8",
-                                cond_arg->getName(), i, getName());
-        }
-
-        events_size = static_cast<UInt8>(arguments.size());
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
-    {
-        for (const auto i : collections::range(0, events_size))
-        {
-            auto event = assert_cast<const ColumnVector<UInt8> *>(columns[i])->getData()[row_num];
-            if (event)
-            {
-                this->data(place).add(i);
-            }
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto & data_to = assert_cast<ColumnUInt8 &>(assert_cast<ColumnArray &>(to).getData()).getData();
-        auto & offsets_to = assert_cast<ColumnArray &>(to).getOffsets();
-
-        ColumnArray::Offset current_offset = data_to.size();
-        data_to.resize(current_offset + events_size);
-
-        const bool first_flag = this->data(place).events.test(0);
-        data_to[current_offset] = first_flag;
-        ++current_offset;
-
-        for (size_t i = 1; i < events_size; ++i)
-        {
-            data_to[current_offset] = (first_flag && this->data(place).events.test(i));
-            ++current_offset;
-        }
-
-        offsets_to.push_back(current_offset);
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
index 3dd9a8b658d..4238bd2a650 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
@@ -1,15 +1,22 @@
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionSequenceMatch.h>
 
 #include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDate32.h>
 #include <DataTypes/DataTypeDateTime.h>
 
-#include <base/range.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnsNumber.h>
+#include <Common/assert_cast.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <bitset>
+#include <stack>
+
 
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
@@ -18,11 +25,689 @@ namespace ErrorCodes
     extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
     extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int TOO_SLOW;
+    extern const int SYNTAX_ERROR;
+    extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
 }
 
 namespace
 {
 
+/// helper type for comparing `std::pair`s using solely the .first member
+template <template <typename> class Comparator>
+struct ComparePairFirst final
+{
+    template <typename T1, typename T2>
+    bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
+    {
+        return Comparator<T1>{}(lhs.first, rhs.first);
+    }
+};
+
+static constexpr size_t max_events = 32;
+
+template <typename T>
+struct AggregateFunctionSequenceMatchData final
+{
+    using Timestamp = T;
+    using Events = std::bitset<max_events>;
+    using TimestampEvents = std::pair<Timestamp, Events>;
+    using Comparator = ComparePairFirst<std::less>;
+
+    bool sorted = true;
+    PODArrayWithStackMemory<TimestampEvents, 64> events_list;
+    /// sequenceMatch conditions met at least once in events_list
+    Events conditions_met;
+
+    void add(const Timestamp timestamp, const Events & events)
+    {
+        /// store information exclusively for rows with at least one event
+        if (events.any())
+        {
+            events_list.emplace_back(timestamp, events);
+            sorted = false;
+            conditions_met |= events;
+        }
+    }
+
+    void merge(const AggregateFunctionSequenceMatchData & other)
+    {
+        if (other.events_list.empty())
+            return;
+
+        events_list.insert(std::begin(other.events_list), std::end(other.events_list));
+        sorted = false;
+        conditions_met |= other.conditions_met;
+    }
+
+    void sort()
+    {
+        if (sorted)
+            return;
+
+        ::sort(std::begin(events_list), std::end(events_list), Comparator{});
+        sorted = true;
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeBinary(sorted, buf);
+        writeBinary(events_list.size(), buf);
+
+        for (const auto & events : events_list)
+        {
+            writeBinary(events.first, buf);
+            writeBinary(events.second.to_ulong(), buf);
+        }
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readBinary(sorted, buf);
+
+        size_t size;
+        readBinary(size, buf);
+
+        /// If we lose these flags, functionality is broken
+        /// If we serialize/deserialize these flags, we have compatibility issues
+        /// If we set these flags to 1, we have a minor performance penalty, which seems acceptable
+        conditions_met.set();
+
+        events_list.clear();
+        events_list.reserve(size);
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            Timestamp timestamp;
+            readBinary(timestamp, buf);
+
+            UInt64 events;
+            readBinary(events, buf);
+
+            events_list.emplace_back(timestamp, Events{events});
+        }
+    }
+};
+
+
+/// Max number of iterations to match the pattern against a sequence, exception thrown when exceeded
+constexpr auto sequence_match_max_iterations = 1000000;
+
+
+template <typename T, typename Data, typename Derived>
+class AggregateFunctionSequenceBase : public IAggregateFunctionDataHelper<Data, Derived>
+{
+public:
+    AggregateFunctionSequenceBase(const DataTypes & arguments, const Array & params, const String & pattern_, const DataTypePtr & result_type_)
+        : IAggregateFunctionDataHelper<Data, Derived>(arguments, params, result_type_)
+        , pattern(pattern_)
+    {
+        arg_count = arguments.size();
+        parsePattern();
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    {
+        const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
+
+        typename Data::Events events;
+        for (const auto i : collections::range(1, arg_count))
+        {
+            const auto event = assert_cast<const ColumnUInt8 *>(columns[i])->getData()[row_num];
+            events.set(i - 1, event);
+        }
+
+        this->data(place).add(timestamp, events);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override
+    {
+        return this->getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs);
+    }
+
+private:
+    enum class PatternActionType
+    {
+        SpecificEvent,
+        AnyEvent,
+        KleeneStar,
+        TimeLessOrEqual,
+        TimeLess,
+        TimeGreaterOrEqual,
+        TimeGreater,
+        TimeEqual
+    };
+
+    struct PatternAction final
+    {
+        PatternActionType type;
+        std::uint64_t extra;
+
+        PatternAction() = default;
+        explicit PatternAction(const PatternActionType type_, const std::uint64_t extra_ = 0) : type{type_}, extra{extra_} {}
+    };
+
+    using PatternActions = PODArrayWithStackMemory<PatternAction, 64>;
+
+    Derived & derived() { return static_cast<Derived &>(*this); }
+
+    void parsePattern()
+    {
+        actions.clear();
+        actions.emplace_back(PatternActionType::KleeneStar);
+
+        dfa_states.clear();
+        dfa_states.emplace_back(true);
+
+        pattern_has_time = false;
+
+        const char * pos = pattern.data();
+        const char * begin = pos;
+        const char * end = pos + pattern.size();
+
+        auto throw_exception = [&](const std::string & msg)
+        {
+            throw Exception(ErrorCodes::SYNTAX_ERROR, "{} '{}' at position {}", msg, std::string(pos, end), toString(pos - begin));
+        };
+
+        auto match = [&pos, end](const char * str) mutable
+        {
+            size_t length = strlen(str);
+            if (pos + length <= end && 0 == memcmp(pos, str, length))
+            {
+                pos += length;
+                return true;
+            }
+            return false;
+        };
+
+        while (pos < end)
+        {
+            if (match("(?"))
+            {
+                if (match("t"))
+                {
+                    PatternActionType type;
+
+                    if (match("<="))
+                        type = PatternActionType::TimeLessOrEqual;
+                    else if (match("<"))
+                        type = PatternActionType::TimeLess;
+                    else if (match(">="))
+                        type = PatternActionType::TimeGreaterOrEqual;
+                    else if (match(">"))
+                        type = PatternActionType::TimeGreater;
+                    else if (match("=="))
+                        type = PatternActionType::TimeEqual;
+                    else
+                        throw_exception("Unknown time condition");
+
+                    UInt64 duration = 0;
+                    const auto * prev_pos = pos;
+                    pos = tryReadIntText(duration, pos, end);
+                    if (pos == prev_pos)
+                        throw_exception("Could not parse number");
+
+                    if (actions.back().type != PatternActionType::SpecificEvent &&
+                        actions.back().type != PatternActionType::AnyEvent &&
+                        actions.back().type != PatternActionType::KleeneStar)
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Temporal condition should be preceded by an event condition");
+
+                    pattern_has_time = true;
+                    actions.emplace_back(type, duration);
+                }
+                else
+                {
+                    UInt64 event_number = 0;
+                    const auto * prev_pos = pos;
+                    pos = tryReadIntText(event_number, pos, end);
+                    if (pos == prev_pos)
+                        throw_exception("Could not parse number");
+
+                    if (event_number > arg_count - 1)
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Event number {} is out of range", event_number);
+
+                    actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1);
+                    dfa_states.back().transition = DFATransition::SpecificEvent;
+                    dfa_states.back().event = static_cast<uint32_t>(event_number - 1);
+                    dfa_states.emplace_back();
+                    conditions_in_pattern.set(event_number - 1);
+                }
+
+                if (!match(")"))
+                    throw_exception("Expected closing parenthesis, found");
+
+            }
+            else if (match(".*"))
+            {
+                actions.emplace_back(PatternActionType::KleeneStar);
+                dfa_states.back().has_kleene = true;
+            }
+            else if (match("."))
+            {
+                actions.emplace_back(PatternActionType::AnyEvent);
+                dfa_states.back().transition = DFATransition::AnyEvent;
+                dfa_states.emplace_back();
+            }
+            else
+                throw_exception("Could not parse pattern, unexpected starting symbol");
+        }
+    }
+
+protected:
+    /// Uses a DFA based approach in order to better handle patterns without
+    /// time assertions.
+    ///
+    /// NOTE: This implementation relies on the assumption that the pattern is *small*.
+    ///
+    /// This algorithm performs in O(mn) (with m the number of DFA states and N the number
+    /// of events) with a memory consumption and memory allocations in O(m). It means that
+    /// if n >>> m (which is expected to be the case), this algorithm can be considered linear.
+    template <typename EventEntry>
+    bool dfaMatch(EventEntry & events_it, const EventEntry events_end) const
+    {
+        using ActiveStates = std::vector<bool>;
+
+        /// Those two vectors keep track of which states should be considered for the current
+        /// event as well as the states which should be considered for the next event.
+        ActiveStates active_states(dfa_states.size(), false);
+        ActiveStates next_active_states(dfa_states.size(), false);
+        active_states[0] = true;
+
+        /// Keeps track of dead-ends in order not to iterate over all the events to realize that
+        /// the match failed.
+        size_t n_active = 1;
+
+        for (/* empty */; events_it != events_end && n_active > 0 && !active_states.back(); ++events_it)
+        {
+            n_active = 0;
+            next_active_states.assign(dfa_states.size(), false);
+
+            for (size_t state = 0; state < dfa_states.size(); ++state)
+            {
+                if (!active_states[state])
+                {
+                    continue;
+                }
+
+                switch (dfa_states[state].transition)
+                {
+                    case DFATransition::None:
+                        break;
+                    case DFATransition::AnyEvent:
+                        next_active_states[state + 1] = true;
+                        ++n_active;
+                        break;
+                    case DFATransition::SpecificEvent:
+                        if (events_it->second.test(dfa_states[state].event))
+                        {
+                            next_active_states[state + 1] = true;
+                            ++n_active;
+                        }
+                        break;
+                }
+
+                if (dfa_states[state].has_kleene)
+                {
+                    next_active_states[state] = true;
+                    ++n_active;
+                }
+            }
+            swap(active_states, next_active_states);
+        }
+
+        return active_states.back();
+    }
+
+    template <typename EventEntry>
+    bool backtrackingMatch(EventEntry & events_it, const EventEntry events_end) const
+    {
+        const auto action_begin = std::begin(actions);
+        const auto action_end = std::end(actions);
+        auto action_it = action_begin;
+
+        const auto events_begin = events_it;
+        auto base_it = events_it;
+
+        /// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence
+        using backtrack_info = std::tuple<decltype(action_it), EventEntry, EventEntry>;
+        std::stack<backtrack_info> back_stack;
+
+        /// backtrack if possible
+        const auto do_backtrack = [&]
+        {
+            while (!back_stack.empty())
+            {
+                auto & top = back_stack.top();
+
+                action_it = std::get<0>(top);
+                events_it = std::next(std::get<1>(top));
+                base_it = std::get<2>(top);
+
+                back_stack.pop();
+
+                if (events_it != events_end)
+                    return true;
+            }
+
+            return false;
+        };
+
+        size_t i = 0;
+        while (action_it != action_end && events_it != events_end)
+        {
+            if (action_it->type == PatternActionType::SpecificEvent)
+            {
+                if (events_it->second.test(action_it->extra))
+                {
+                    /// move to the next action and events
+                    base_it = events_it;
+                    ++action_it, ++events_it;
+                }
+                else if (!do_backtrack())
+                    /// backtracking failed, bail out
+                    break;
+            }
+            else if (action_it->type == PatternActionType::AnyEvent)
+            {
+                base_it = events_it;
+                ++action_it, ++events_it;
+            }
+            else if (action_it->type == PatternActionType::KleeneStar)
+            {
+                back_stack.emplace(action_it, events_it, base_it);
+                base_it = events_it;
+                ++action_it;
+            }
+            else if (action_it->type == PatternActionType::TimeLessOrEqual)
+            {
+                if (events_it->first <= base_it->first + action_it->extra)
+                {
+                    /// condition satisfied, move onto next action
+                    back_stack.emplace(action_it, events_it, base_it);
+                    base_it = events_it;
+                    ++action_it;
+                }
+                else if (!do_backtrack())
+                    break;
+            }
+            else if (action_it->type == PatternActionType::TimeLess)
+            {
+                if (events_it->first < base_it->first + action_it->extra)
+                {
+                    back_stack.emplace(action_it, events_it, base_it);
+                    base_it = events_it;
+                    ++action_it;
+                }
+                else if (!do_backtrack())
+                    break;
+            }
+            else if (action_it->type == PatternActionType::TimeGreaterOrEqual)
+            {
+                if (events_it->first >= base_it->first + action_it->extra)
+                {
+                    back_stack.emplace(action_it, events_it, base_it);
+                    base_it = events_it;
+                    ++action_it;
+                }
+                else if (++events_it == events_end && !do_backtrack())
+                    break;
+            }
+            else if (action_it->type == PatternActionType::TimeGreater)
+            {
+                if (events_it->first > base_it->first + action_it->extra)
+                {
+                    back_stack.emplace(action_it, events_it, base_it);
+                    base_it = events_it;
+                    ++action_it;
+                }
+                else if (++events_it == events_end && !do_backtrack())
+                    break;
+            }
+            else if (action_it->type == PatternActionType::TimeEqual)
+            {
+                if (events_it->first == base_it->first + action_it->extra)
+                {
+                    back_stack.emplace(action_it, events_it, base_it);
+                    base_it = events_it;
+                    ++action_it;
+                }
+                else if (++events_it == events_end && !do_backtrack())
+                    break;
+            }
+            else
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown PatternActionType");
+
+            if (++i > sequence_match_max_iterations)
+                throw Exception(ErrorCodes::TOO_SLOW, "Pattern application proves too difficult, exceeding max iterations ({})",
+                    sequence_match_max_iterations);
+        }
+
+        /// if there are some actions remaining
+        if (action_it != action_end)
+        {
+            /// match multiple empty strings at end
+            while (action_it->type == PatternActionType::KleeneStar ||
+                   action_it->type == PatternActionType::TimeLessOrEqual ||
+                   action_it->type == PatternActionType::TimeLess ||
+                   (action_it->type == PatternActionType::TimeGreaterOrEqual && action_it->extra == 0))
+                ++action_it;
+        }
+
+        if (events_it == events_begin)
+            ++events_it;
+
+        return action_it == action_end;
+    }
+
+    /// Splits the pattern into deterministic parts separated by non-deterministic fragments
+    /// (time constraints and Kleene stars), and tries to match the deterministic parts in their specified order,
+    /// ignoring the non-deterministic fragments.
+    /// This function can quickly check that a full match is not possible if some deterministic fragment is missing.
+    template <typename EventEntry>
+    bool couldMatchDeterministicParts(const EventEntry events_begin, const EventEntry events_end, bool limit_iterations = true) const
+    {
+        size_t events_processed = 0;
+        auto events_it = events_begin;
+
+        const auto actions_end = std::end(actions);
+        auto actions_it = std::begin(actions);
+        auto det_part_begin = actions_it;
+
+        auto match_deterministic_part = [&events_it, events_end, &events_processed, det_part_begin, actions_it, limit_iterations]()
+        {
+            auto events_it_init = events_it;
+            auto det_part_it = det_part_begin;
+
+            while (det_part_it != actions_it && events_it != events_end)
+            {
+                /// matching any event
+                if (det_part_it->type == PatternActionType::AnyEvent)
+                    ++events_it, ++det_part_it;
+
+                /// matching specific event
+                else
+                {
+                    if (events_it->second.test(det_part_it->extra))
+                        ++events_it, ++det_part_it;
+
+                    /// abandon current matching, try to match the deterministic fragment further in the list
+                    else
+                    {
+                        events_it = ++events_it_init;
+                        det_part_it = det_part_begin;
+                    }
+                }
+
+                if (limit_iterations && ++events_processed > sequence_match_max_iterations)
+                    throw Exception(ErrorCodes::TOO_SLOW, "Pattern application proves too difficult, exceeding max iterations ({})",
+                        sequence_match_max_iterations);
+            }
+
+            return det_part_it == actions_it;
+        };
+
+        for (; actions_it != actions_end; ++actions_it)
+            if (actions_it->type != PatternActionType::SpecificEvent && actions_it->type != PatternActionType::AnyEvent)
+            {
+                if (!match_deterministic_part())
+                    return false;
+                det_part_begin = std::next(actions_it);
+            }
+
+        return match_deterministic_part();
+    }
+
+private:
+    enum class DFATransition : char
+    {
+        ///   .-------.
+        ///   |       |
+        ///   `-------'
+        None,
+        ///   .-------.  (?[0-9])
+        ///   |       | ----------
+        ///   `-------'
+        SpecificEvent,
+        ///   .-------.      .
+        ///   |       | ----------
+        ///   `-------'
+        AnyEvent,
+    };
+
+    struct DFAState
+    {
+        explicit DFAState(bool has_kleene_ = false)
+            : has_kleene{has_kleene_}, event{0}, transition{DFATransition::None}
+        {}
+
+        ///   .-------.
+        ///   |       | - - -
+        ///   `-------'
+        ///     |_^
+        bool has_kleene;
+        /// In the case of a state transitions with a `SpecificEvent`,
+        /// `event` contains the value of the event.
+        uint32_t event;
+        /// The kind of transition out of this state.
+        DFATransition transition;
+    };
+
+    using DFAStates = std::vector<DFAState>;
+
+protected:
+    /// `True` if the parsed pattern contains time assertions (?t...), `false` otherwise.
+    bool pattern_has_time;
+    /// sequenceMatch conditions met at least once in the pattern
+    std::bitset<max_events> conditions_in_pattern;
+
+private:
+    std::string pattern;
+    size_t arg_count;
+    PatternActions actions;
+
+    DFAStates dfa_states;
+};
+
+template <typename T, typename Data>
+class AggregateFunctionSequenceMatch final : public AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceMatch<T, Data>>
+{
+public:
+    AggregateFunctionSequenceMatch(const DataTypes & arguments, const Array & params, const String & pattern_)
+        : AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceMatch<T, Data>>(arguments, params, pattern_, std::make_shared<DataTypeUInt8>()) {}
+
+    using AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceMatch<T, Data>>::AggregateFunctionSequenceBase;
+
+    String getName() const override { return "sequenceMatch"; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto & output = assert_cast<ColumnUInt8 &>(to).getData();
+        if ((this->conditions_in_pattern & this->data(place).conditions_met) != this->conditions_in_pattern)
+        {
+            output.push_back(false);
+            return;
+        }
+        this->data(place).sort();
+
+        const auto & data_ref = this->data(place);
+
+        const auto events_begin = std::begin(data_ref.events_list);
+        const auto events_end = std::end(data_ref.events_list);
+        auto events_it = events_begin;
+
+        bool match = (this->pattern_has_time ?
+            (this->couldMatchDeterministicParts(events_begin, events_end) && this->backtrackingMatch(events_it, events_end)) :
+            this->dfaMatch(events_it, events_end));
+        output.push_back(match);
+    }
+};
+
+template <typename T, typename Data>
+class AggregateFunctionSequenceCount final : public AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceCount<T, Data>>
+{
+public:
+    AggregateFunctionSequenceCount(const DataTypes & arguments, const Array & params, const String & pattern_)
+        : AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceCount<T, Data>>(arguments, params, pattern_, std::make_shared<DataTypeUInt64>()) {}
+
+    using AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceCount<T, Data>>::AggregateFunctionSequenceBase;
+
+    String getName() const override { return "sequenceCount"; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto & output = assert_cast<ColumnUInt64 &>(to).getData();
+        if ((this->conditions_in_pattern & this->data(place).conditions_met) != this->conditions_in_pattern)
+        {
+            output.push_back(0);
+            return;
+        }
+        this->data(place).sort();
+        output.push_back(count(place));
+    }
+
+private:
+    UInt64 count(ConstAggregateDataPtr __restrict place) const
+    {
+        const auto & data_ref = this->data(place);
+
+        const auto events_begin = std::begin(data_ref.events_list);
+        const auto events_end = std::end(data_ref.events_list);
+        auto events_it = events_begin;
+
+        size_t count = 0;
+        // check if there is a chance of matching the sequence at least once
+        if (this->couldMatchDeterministicParts(events_begin, events_end))
+        {
+            while (events_it != events_end && this->backtrackingMatch(events_it, events_end))
+                ++count;
+        }
+
+        return count;
+    }
+};
+
+
 template <template <typename, typename> typename AggregateFunction, template <typename> typename Data>
 AggregateFunctionPtr createAggregateFunctionSequenceBase(
     const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
deleted file mode 100644
index f2e17940d35..00000000000
--- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
+++ /dev/null
@@ -1,702 +0,0 @@
-#pragma once
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Columns/ColumnsNumber.h>
-#include <Common/assert_cast.h>
-#include <base/range.h>
-#include <base/sort.h>
-#include <Common/PODArray.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <bitset>
-#include <stack>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int TOO_SLOW;
-    extern const int SYNTAX_ERROR;
-    extern const int BAD_ARGUMENTS;
-    extern const int LOGICAL_ERROR;
-}
-
-/// helper type for comparing `std::pair`s using solely the .first member
-template <template <typename> class Comparator>
-struct ComparePairFirst final
-{
-    template <typename T1, typename T2>
-    bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
-    {
-        return Comparator<T1>{}(lhs.first, rhs.first);
-    }
-};
-
-static constexpr size_t max_events = 32;
-
-template <typename T>
-struct AggregateFunctionSequenceMatchData final
-{
-    using Timestamp = T;
-    using Events = std::bitset<max_events>;
-    using TimestampEvents = std::pair<Timestamp, Events>;
-    using Comparator = ComparePairFirst<std::less>;
-
-    bool sorted = true;
-    PODArrayWithStackMemory<TimestampEvents, 64> events_list;
-    /// sequenceMatch conditions met at least once in events_list
-    Events conditions_met;
-
-    void add(const Timestamp timestamp, const Events & events)
-    {
-        /// store information exclusively for rows with at least one event
-        if (events.any())
-        {
-            events_list.emplace_back(timestamp, events);
-            sorted = false;
-            conditions_met |= events;
-        }
-    }
-
-    void merge(const AggregateFunctionSequenceMatchData & other)
-    {
-        if (other.events_list.empty())
-            return;
-
-        events_list.insert(std::begin(other.events_list), std::end(other.events_list));
-        sorted = false;
-        conditions_met |= other.conditions_met;
-    }
-
-    void sort()
-    {
-        if (sorted)
-            return;
-
-        ::sort(std::begin(events_list), std::end(events_list), Comparator{});
-        sorted = true;
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        writeBinary(sorted, buf);
-        writeBinary(events_list.size(), buf);
-
-        for (const auto & events : events_list)
-        {
-            writeBinary(events.first, buf);
-            writeBinary(events.second.to_ulong(), buf);
-        }
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        readBinary(sorted, buf);
-
-        size_t size;
-        readBinary(size, buf);
-
-        /// If we lose these flags, functionality is broken
-        /// If we serialize/deserialize these flags, we have compatibility issues
-        /// If we set these flags to 1, we have a minor performance penalty, which seems acceptable
-        conditions_met.set();
-
-        events_list.clear();
-        events_list.reserve(size);
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            Timestamp timestamp;
-            readBinary(timestamp, buf);
-
-            UInt64 events;
-            readBinary(events, buf);
-
-            events_list.emplace_back(timestamp, Events{events});
-        }
-    }
-};
-
-
-/// Max number of iterations to match the pattern against a sequence, exception thrown when exceeded
-constexpr auto sequence_match_max_iterations = 1000000;
-
-
-template <typename T, typename Data, typename Derived>
-class AggregateFunctionSequenceBase : public IAggregateFunctionDataHelper<Data, Derived>
-{
-public:
-    AggregateFunctionSequenceBase(const DataTypes & arguments, const Array & params, const String & pattern_, const DataTypePtr & result_type_)
-        : IAggregateFunctionDataHelper<Data, Derived>(arguments, params, result_type_)
-        , pattern(pattern_)
-    {
-        arg_count = arguments.size();
-        parsePattern();
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
-    {
-        const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
-
-        typename Data::Events events;
-        for (const auto i : collections::range(1, arg_count))
-        {
-            const auto event = assert_cast<const ColumnUInt8 *>(columns[i])->getData()[row_num];
-            events.set(i - 1, event);
-        }
-
-        this->data(place).add(timestamp, events);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override
-    {
-        return this->getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs);
-    }
-
-private:
-    enum class PatternActionType
-    {
-        SpecificEvent,
-        AnyEvent,
-        KleeneStar,
-        TimeLessOrEqual,
-        TimeLess,
-        TimeGreaterOrEqual,
-        TimeGreater,
-        TimeEqual
-    };
-
-    struct PatternAction final
-    {
-        PatternActionType type;
-        std::uint64_t extra;
-
-        PatternAction() = default;
-        explicit PatternAction(const PatternActionType type_, const std::uint64_t extra_ = 0) : type{type_}, extra{extra_} {}
-    };
-
-    using PatternActions = PODArrayWithStackMemory<PatternAction, 64>;
-
-    Derived & derived() { return static_cast<Derived &>(*this); }
-
-    void parsePattern()
-    {
-        actions.clear();
-        actions.emplace_back(PatternActionType::KleeneStar);
-
-        dfa_states.clear();
-        dfa_states.emplace_back(true);
-
-        pattern_has_time = false;
-
-        const char * pos = pattern.data();
-        const char * begin = pos;
-        const char * end = pos + pattern.size();
-
-        auto throw_exception = [&](const std::string & msg)
-        {
-            throw Exception(ErrorCodes::SYNTAX_ERROR, "{} '{}' at position {}", msg, std::string(pos, end), toString(pos - begin));
-        };
-
-        auto match = [&pos, end](const char * str) mutable
-        {
-            size_t length = strlen(str);
-            if (pos + length <= end && 0 == memcmp(pos, str, length))
-            {
-                pos += length;
-                return true;
-            }
-            return false;
-        };
-
-        while (pos < end)
-        {
-            if (match("(?"))
-            {
-                if (match("t"))
-                {
-                    PatternActionType type;
-
-                    if (match("<="))
-                        type = PatternActionType::TimeLessOrEqual;
-                    else if (match("<"))
-                        type = PatternActionType::TimeLess;
-                    else if (match(">="))
-                        type = PatternActionType::TimeGreaterOrEqual;
-                    else if (match(">"))
-                        type = PatternActionType::TimeGreater;
-                    else if (match("=="))
-                        type = PatternActionType::TimeEqual;
-                    else
-                        throw_exception("Unknown time condition");
-
-                    UInt64 duration = 0;
-                    const auto * prev_pos = pos;
-                    pos = tryReadIntText(duration, pos, end);
-                    if (pos == prev_pos)
-                        throw_exception("Could not parse number");
-
-                    if (actions.back().type != PatternActionType::SpecificEvent &&
-                        actions.back().type != PatternActionType::AnyEvent &&
-                        actions.back().type != PatternActionType::KleeneStar)
-                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Temporal condition should be preceded by an event condition");
-
-                    pattern_has_time = true;
-                    actions.emplace_back(type, duration);
-                }
-                else
-                {
-                    UInt64 event_number = 0;
-                    const auto * prev_pos = pos;
-                    pos = tryReadIntText(event_number, pos, end);
-                    if (pos == prev_pos)
-                        throw_exception("Could not parse number");
-
-                    if (event_number > arg_count - 1)
-                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Event number {} is out of range", event_number);
-
-                    actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1);
-                    dfa_states.back().transition = DFATransition::SpecificEvent;
-                    dfa_states.back().event = static_cast<uint32_t>(event_number - 1);
-                    dfa_states.emplace_back();
-                    conditions_in_pattern.set(event_number - 1);
-                }
-
-                if (!match(")"))
-                    throw_exception("Expected closing parenthesis, found");
-
-            }
-            else if (match(".*"))
-            {
-                actions.emplace_back(PatternActionType::KleeneStar);
-                dfa_states.back().has_kleene = true;
-            }
-            else if (match("."))
-            {
-                actions.emplace_back(PatternActionType::AnyEvent);
-                dfa_states.back().transition = DFATransition::AnyEvent;
-                dfa_states.emplace_back();
-            }
-            else
-                throw_exception("Could not parse pattern, unexpected starting symbol");
-        }
-    }
-
-protected:
-    /// Uses a DFA based approach in order to better handle patterns without
-    /// time assertions.
-    ///
-    /// NOTE: This implementation relies on the assumption that the pattern is *small*.
-    ///
-    /// This algorithm performs in O(mn) (with m the number of DFA states and N the number
-    /// of events) with a memory consumption and memory allocations in O(m). It means that
-    /// if n >>> m (which is expected to be the case), this algorithm can be considered linear.
-    template <typename EventEntry>
-    bool dfaMatch(EventEntry & events_it, const EventEntry events_end) const
-    {
-        using ActiveStates = std::vector<bool>;
-
-        /// Those two vectors keep track of which states should be considered for the current
-        /// event as well as the states which should be considered for the next event.
-        ActiveStates active_states(dfa_states.size(), false);
-        ActiveStates next_active_states(dfa_states.size(), false);
-        active_states[0] = true;
-
-        /// Keeps track of dead-ends in order not to iterate over all the events to realize that
-        /// the match failed.
-        size_t n_active = 1;
-
-        for (/* empty */; events_it != events_end && n_active > 0 && !active_states.back(); ++events_it)
-        {
-            n_active = 0;
-            next_active_states.assign(dfa_states.size(), false);
-
-            for (size_t state = 0; state < dfa_states.size(); ++state)
-            {
-                if (!active_states[state])
-                {
-                    continue;
-                }
-
-                switch (dfa_states[state].transition)
-                {
-                    case DFATransition::None:
-                        break;
-                    case DFATransition::AnyEvent:
-                        next_active_states[state + 1] = true;
-                        ++n_active;
-                        break;
-                    case DFATransition::SpecificEvent:
-                        if (events_it->second.test(dfa_states[state].event))
-                        {
-                            next_active_states[state + 1] = true;
-                            ++n_active;
-                        }
-                        break;
-                }
-
-                if (dfa_states[state].has_kleene)
-                {
-                    next_active_states[state] = true;
-                    ++n_active;
-                }
-            }
-            swap(active_states, next_active_states);
-        }
-
-        return active_states.back();
-    }
-
-    template <typename EventEntry>
-    bool backtrackingMatch(EventEntry & events_it, const EventEntry events_end) const
-    {
-        const auto action_begin = std::begin(actions);
-        const auto action_end = std::end(actions);
-        auto action_it = action_begin;
-
-        const auto events_begin = events_it;
-        auto base_it = events_it;
-
-        /// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence
-        using backtrack_info = std::tuple<decltype(action_it), EventEntry, EventEntry>;
-        std::stack<backtrack_info> back_stack;
-
-        /// backtrack if possible
-        const auto do_backtrack = [&]
-        {
-            while (!back_stack.empty())
-            {
-                auto & top = back_stack.top();
-
-                action_it = std::get<0>(top);
-                events_it = std::next(std::get<1>(top));
-                base_it = std::get<2>(top);
-
-                back_stack.pop();
-
-                if (events_it != events_end)
-                    return true;
-            }
-
-            return false;
-        };
-
-        size_t i = 0;
-        while (action_it != action_end && events_it != events_end)
-        {
-            if (action_it->type == PatternActionType::SpecificEvent)
-            {
-                if (events_it->second.test(action_it->extra))
-                {
-                    /// move to the next action and events
-                    base_it = events_it;
-                    ++action_it, ++events_it;
-                }
-                else if (!do_backtrack())
-                    /// backtracking failed, bail out
-                    break;
-            }
-            else if (action_it->type == PatternActionType::AnyEvent)
-            {
-                base_it = events_it;
-                ++action_it, ++events_it;
-            }
-            else if (action_it->type == PatternActionType::KleeneStar)
-            {
-                back_stack.emplace(action_it, events_it, base_it);
-                base_it = events_it;
-                ++action_it;
-            }
-            else if (action_it->type == PatternActionType::TimeLessOrEqual)
-            {
-                if (events_it->first <= base_it->first + action_it->extra)
-                {
-                    /// condition satisfied, move onto next action
-                    back_stack.emplace(action_it, events_it, base_it);
-                    base_it = events_it;
-                    ++action_it;
-                }
-                else if (!do_backtrack())
-                    break;
-            }
-            else if (action_it->type == PatternActionType::TimeLess)
-            {
-                if (events_it->first < base_it->first + action_it->extra)
-                {
-                    back_stack.emplace(action_it, events_it, base_it);
-                    base_it = events_it;
-                    ++action_it;
-                }
-                else if (!do_backtrack())
-                    break;
-            }
-            else if (action_it->type == PatternActionType::TimeGreaterOrEqual)
-            {
-                if (events_it->first >= base_it->first + action_it->extra)
-                {
-                    back_stack.emplace(action_it, events_it, base_it);
-                    base_it = events_it;
-                    ++action_it;
-                }
-                else if (++events_it == events_end && !do_backtrack())
-                    break;
-            }
-            else if (action_it->type == PatternActionType::TimeGreater)
-            {
-                if (events_it->first > base_it->first + action_it->extra)
-                {
-                    back_stack.emplace(action_it, events_it, base_it);
-                    base_it = events_it;
-                    ++action_it;
-                }
-                else if (++events_it == events_end && !do_backtrack())
-                    break;
-            }
-            else if (action_it->type == PatternActionType::TimeEqual)
-            {
-                if (events_it->first == base_it->first + action_it->extra)
-                {
-                    back_stack.emplace(action_it, events_it, base_it);
-                    base_it = events_it;
-                    ++action_it;
-                }
-                else if (++events_it == events_end && !do_backtrack())
-                    break;
-            }
-            else
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown PatternActionType");
-
-            if (++i > sequence_match_max_iterations)
-                throw Exception(ErrorCodes::TOO_SLOW, "Pattern application proves too difficult, exceeding max iterations ({})",
-                    sequence_match_max_iterations);
-        }
-
-        /// if there are some actions remaining
-        if (action_it != action_end)
-        {
-            /// match multiple empty strings at end
-            while (action_it->type == PatternActionType::KleeneStar ||
-                   action_it->type == PatternActionType::TimeLessOrEqual ||
-                   action_it->type == PatternActionType::TimeLess ||
-                   (action_it->type == PatternActionType::TimeGreaterOrEqual && action_it->extra == 0))
-                ++action_it;
-        }
-
-        if (events_it == events_begin)
-            ++events_it;
-
-        return action_it == action_end;
-    }
-
-    /// Splits the pattern into deterministic parts separated by non-deterministic fragments
-    /// (time constraints and Kleene stars), and tries to match the deterministic parts in their specified order,
-    /// ignoring the non-deterministic fragments.
-    /// This function can quickly check that a full match is not possible if some deterministic fragment is missing.
-    template <typename EventEntry>
-    bool couldMatchDeterministicParts(const EventEntry events_begin, const EventEntry events_end, bool limit_iterations = true) const
-    {
-        size_t events_processed = 0;
-        auto events_it = events_begin;
-
-        const auto actions_end = std::end(actions);
-        auto actions_it = std::begin(actions);
-        auto det_part_begin = actions_it;
-
-        auto match_deterministic_part = [&events_it, events_end, &events_processed, det_part_begin, actions_it, limit_iterations]()
-        {
-            auto events_it_init = events_it;
-            auto det_part_it = det_part_begin;
-
-            while (det_part_it != actions_it && events_it != events_end)
-            {
-                /// matching any event
-                if (det_part_it->type == PatternActionType::AnyEvent)
-                    ++events_it, ++det_part_it;
-
-                /// matching specific event
-                else
-                {
-                    if (events_it->second.test(det_part_it->extra))
-                        ++events_it, ++det_part_it;
-
-                    /// abandon current matching, try to match the deterministic fragment further in the list
-                    else
-                    {
-                        events_it = ++events_it_init;
-                        det_part_it = det_part_begin;
-                    }
-                }
-
-                if (limit_iterations && ++events_processed > sequence_match_max_iterations)
-                    throw Exception(ErrorCodes::TOO_SLOW, "Pattern application proves too difficult, exceeding max iterations ({})",
-                        sequence_match_max_iterations);
-            }
-
-            return det_part_it == actions_it;
-        };
-
-        for (; actions_it != actions_end; ++actions_it)
-            if (actions_it->type != PatternActionType::SpecificEvent && actions_it->type != PatternActionType::AnyEvent)
-            {
-                if (!match_deterministic_part())
-                    return false;
-                det_part_begin = std::next(actions_it);
-            }
-
-        return match_deterministic_part();
-    }
-
-private:
-    enum class DFATransition : char
-    {
-        ///   .-------.
-        ///   |       |
-        ///   `-------'
-        None,
-        ///   .-------.  (?[0-9])
-        ///   |       | ----------
-        ///   `-------'
-        SpecificEvent,
-        ///   .-------.      .
-        ///   |       | ----------
-        ///   `-------'
-        AnyEvent,
-    };
-
-    struct DFAState
-    {
-        explicit DFAState(bool has_kleene_ = false)
-            : has_kleene{has_kleene_}, event{0}, transition{DFATransition::None}
-        {}
-
-        ///   .-------.
-        ///   |       | - - -
-        ///   `-------'
-        ///     |_^
-        bool has_kleene;
-        /// In the case of a state transitions with a `SpecificEvent`,
-        /// `event` contains the value of the event.
-        uint32_t event;
-        /// The kind of transition out of this state.
-        DFATransition transition;
-    };
-
-    using DFAStates = std::vector<DFAState>;
-
-protected:
-    /// `True` if the parsed pattern contains time assertions (?t...), `false` otherwise.
-    bool pattern_has_time;
-    /// sequenceMatch conditions met at least once in the pattern
-    std::bitset<max_events> conditions_in_pattern;
-
-private:
-    std::string pattern;
-    size_t arg_count;
-    PatternActions actions;
-
-    DFAStates dfa_states;
-};
-
-template <typename T, typename Data>
-class AggregateFunctionSequenceMatch final : public AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceMatch<T, Data>>
-{
-public:
-    AggregateFunctionSequenceMatch(const DataTypes & arguments, const Array & params, const String & pattern_)
-        : AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceMatch<T, Data>>(arguments, params, pattern_, std::make_shared<DataTypeUInt8>()) {}
-
-    using AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceMatch<T, Data>>::AggregateFunctionSequenceBase;
-
-    String getName() const override { return "sequenceMatch"; }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto & output = assert_cast<ColumnUInt8 &>(to).getData();
-        if ((this->conditions_in_pattern & this->data(place).conditions_met) != this->conditions_in_pattern)
-        {
-            output.push_back(false);
-            return;
-        }
-        this->data(place).sort();
-
-        const auto & data_ref = this->data(place);
-
-        const auto events_begin = std::begin(data_ref.events_list);
-        const auto events_end = std::end(data_ref.events_list);
-        auto events_it = events_begin;
-
-        bool match = (this->pattern_has_time ?
-            (this->couldMatchDeterministicParts(events_begin, events_end) && this->backtrackingMatch(events_it, events_end)) :
-            this->dfaMatch(events_it, events_end));
-        output.push_back(match);
-    }
-};
-
-template <typename T, typename Data>
-class AggregateFunctionSequenceCount final : public AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceCount<T, Data>>
-{
-public:
-    AggregateFunctionSequenceCount(const DataTypes & arguments, const Array & params, const String & pattern_)
-        : AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceCount<T, Data>>(arguments, params, pattern_, std::make_shared<DataTypeUInt64>()) {}
-
-    using AggregateFunctionSequenceBase<T, Data, AggregateFunctionSequenceCount<T, Data>>::AggregateFunctionSequenceBase;
-
-    String getName() const override { return "sequenceCount"; }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto & output = assert_cast<ColumnUInt64 &>(to).getData();
-        if ((this->conditions_in_pattern & this->data(place).conditions_met) != this->conditions_in_pattern)
-        {
-            output.push_back(0);
-            return;
-        }
-        this->data(place).sort();
-        output.push_back(count(place));
-    }
-
-private:
-    UInt64 count(ConstAggregateDataPtr __restrict place) const
-    {
-        const auto & data_ref = this->data(place);
-
-        const auto events_begin = std::begin(data_ref.events_list);
-        const auto events_end = std::end(data_ref.events_list);
-        auto events_it = events_begin;
-
-        size_t count = 0;
-        // check if there is a chance of matching the sequence at least once
-        if (this->couldMatchDeterministicParts(events_begin, events_end))
-        {
-            while (events_it != events_end && this->backtrackingMatch(events_it, events_end))
-                ++count;
-        }
-
-        return count;
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
index 7bb19b13ca0..3ef1102505a 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
@@ -1,14 +1,25 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionSequenceNextNode.h>
-#include <AggregateFunctions/Helpers.h>
-#include <AggregateFunctions/FactoryHelpers.h>
 #include <Core/Settings.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <Interpreters/Context.h>
-#include <Common/CurrentThread.h>
-#include <base/range.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnNullable.h>
+
+#include <Common/ArenaAllocator.h>
+#include <Common/assert_cast.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include <bitset>
 
 
 namespace DB
@@ -24,11 +35,409 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int BAD_ARGUMENTS;
     extern const int UNKNOWN_AGGREGATE_FUNCTION;
+    extern const int TOO_LARGE_ARRAY_SIZE;
 }
 
 namespace
 {
 
+enum class SequenceDirection
+{
+    Forward,
+    Backward,
+};
+
+enum SequenceBase
+{
+    Head,
+    Tail,
+    FirstMatch,
+    LastMatch,
+};
+
+/// This is for security
+static const UInt64 max_node_size_deserialize = 0xFFFFFF;
+
+/// NodeBase used to implement a linked list for storage of SequenceNextNodeImpl
+template <typename Node, size_t MaxEventsSize>
+struct NodeBase
+{
+    UInt64 size; /// size of payload
+
+    DataTypeDateTime::FieldType event_time;
+    std::bitset<MaxEventsSize> events_bitset;
+    bool can_be_base;
+
+    char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
+
+    const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
+
+    Node * clone(Arena * arena) const
+    {
+        return reinterpret_cast<Node *>(
+            const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
+    }
+
+    void write(WriteBuffer & buf) const
+    {
+        writeVarUInt(size, buf);
+        buf.write(data(), size);
+
+        writeBinary(event_time, buf);
+        UInt64 ulong_bitset = events_bitset.to_ulong();
+        writeBinary(ulong_bitset, buf);
+        writeBinary(can_be_base, buf);
+    }
+
+    static Node * read(ReadBuffer & buf, Arena * arena)
+    {
+        UInt64 size;
+        readVarUInt(size, buf);
+        if (unlikely(size > max_node_size_deserialize))
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large node state size");
+
+        Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + size, alignof(Node)));
+        node->size = size;
+        buf.readStrict(node->data(), size);
+
+        readBinary(node->event_time, buf);
+        UInt64 ulong_bitset;
+        readBinary(ulong_bitset, buf);
+        node->events_bitset = ulong_bitset;
+        readBinary(node->can_be_base, buf);
+
+        return node;
+    }
+};
+
+/// It stores String, timestamp, bitset of matched events.
+template <size_t MaxEventsSize>
+struct NodeString : public NodeBase<NodeString<MaxEventsSize>, MaxEventsSize>
+{
+    using Node = NodeString<MaxEventsSize>;
+
+    static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
+    {
+        StringRef string = assert_cast<const ColumnString &>(column).getDataAt(row_num);
+
+        Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node)));
+        node->size = string.size;
+        memcpy(node->data(), string.data, string.size);
+
+        return node;
+    }
+
+    void insertInto(IColumn & column)
+    {
+        assert_cast<ColumnString &>(column).insertData(this->data(), this->size);
+    }
+
+    bool compare(const Node * rhs) const
+    {
+        auto cmp = strncmp(this->data(), rhs->data(), std::min(this->size, rhs->size));
+        return (cmp == 0) ? this->size < rhs->size : cmp < 0;
+    }
+};
+
+/// TODO : Support other types than string
+template <typename Node>
+struct SequenceNextNodeGeneralData
+{
+    using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
+    using Array = PODArray<Node *, 32, Allocator>;
+
+    Array value;
+    bool sorted = false;
+
+    struct Comparator final
+    {
+        bool operator()(const Node * lhs, const Node * rhs) const
+        {
+            return lhs->event_time == rhs->event_time ? lhs->compare(rhs) : lhs->event_time < rhs->event_time;
+        }
+    };
+
+    void sort()
+    {
+        if (!sorted)
+        {
+            std::stable_sort(std::begin(value), std::end(value), Comparator{});
+            sorted = true;
+        }
+    }
+};
+
+/// Implementation of sequenceFirstNode
+template <typename T, typename Node>
+class SequenceNextNodeImpl final
+    : public IAggregateFunctionDataHelper<SequenceNextNodeGeneralData<Node>, SequenceNextNodeImpl<T, Node>>
+{
+    using Self = SequenceNextNodeImpl<T, Node>;
+
+    using Data = SequenceNextNodeGeneralData<Node>;
+    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
+    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
+
+    static constexpr size_t base_cond_column_idx = 2;
+    static constexpr size_t event_column_idx = 1;
+
+    SequenceBase seq_base_kind;
+    SequenceDirection seq_direction;
+    const size_t min_required_args;
+
+    DataTypePtr & data_type;
+    UInt8 events_size;
+    UInt64 max_elems;
+public:
+    SequenceNextNodeImpl(
+        const DataTypePtr & data_type_,
+        const DataTypes & arguments,
+        const Array & parameters_,
+        SequenceBase seq_base_kind_,
+        SequenceDirection seq_direction_,
+        size_t min_required_args_,
+        UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
+        : IAggregateFunctionDataHelper<SequenceNextNodeGeneralData<Node>, Self>(arguments, parameters_, data_type_)
+        , seq_base_kind(seq_base_kind_)
+        , seq_direction(seq_direction_)
+        , min_required_args(min_required_args_)
+        , data_type(this->argument_types[0])
+        , events_size(arguments.size() - min_required_args)
+        , max_elems(max_elems_)
+    {
+    }
+
+    String getName() const override { return "sequenceNextNode"; }
+
+    bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override
+    {
+        return this->getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs);
+    }
+
+    void insert(Data & a, const Node * v, Arena * arena) const
+    {
+        ++a.total_values;
+        a.value.push_back(v->clone(arena), arena);
+    }
+
+    void create(AggregateDataPtr __restrict place) const override /// NOLINT
+    {
+        new (place) Data;
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        Node * node = Node::allocate(*columns[event_column_idx], row_num, arena);
+
+        const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
+
+        /// The events_bitset variable stores matched events in the form of bitset.
+        /// Each Nth-bit indicates that the Nth-event are matched.
+        /// For example, event1 and event3 is matched then the values of events_bitset is 0x00000005.
+        ///   0x00000000
+        /// +          1 (bit of event1)
+        /// +          4 (bit of event3)
+        node->events_bitset.reset();
+        for (UInt8 i = 0; i < events_size; ++i)
+            if (assert_cast<const ColumnVector<UInt8> *>(columns[min_required_args + i])->getData()[row_num])
+                node->events_bitset.set(i);
+        node->event_time = static_cast<DataTypeDateTime::FieldType>(timestamp);
+
+        node->can_be_base = assert_cast<const ColumnVector<UInt8> *>(columns[base_cond_column_idx])->getData()[row_num];
+
+        data(place).value.push_back(node, arena);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        if (data(rhs).value.empty())
+            return;
+
+        if (data(place).value.size() >= max_elems)
+            return;
+
+        auto & a = data(place).value;
+        auto & b = data(rhs).value;
+        const auto a_size = a.size();
+
+        const UInt64 new_elems = std::min(data(rhs).value.size(), static_cast<size_t>(max_elems) - data(place).value.size());
+        for (UInt64 i = 0; i < new_elems; ++i)
+            a.push_back(b[i]->clone(arena), arena);
+
+        /// Either sort whole container or do so partially merging ranges afterwards
+        using Comparator = typename SequenceNextNodeGeneralData<Node>::Comparator;
+
+        if (!data(place).sorted && !data(rhs).sorted)
+            std::stable_sort(std::begin(a), std::end(a), Comparator{});
+        else
+        {
+            const auto begin = std::begin(a);
+            const auto middle = std::next(begin, a_size);
+            const auto end = std::end(a);
+
+            if (!data(place).sorted)
+                std::stable_sort(begin, middle, Comparator{});
+
+            if (!data(rhs).sorted)
+                std::stable_sort(middle, end, Comparator{});
+
+            std::inplace_merge(begin, middle, end, Comparator{});
+        }
+
+        data(place).sorted = true;
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        /// Temporarily do a const_cast to sort the values. It helps to reduce the computational burden on the initiator node.
+        this->data(const_cast<AggregateDataPtr>(place)).sort();
+
+        writeBinary(data(place).sorted, buf);
+
+        auto & value = data(place).value;
+
+        size_t size = std::min(static_cast<size_t>(events_size + 1), value.size());
+        switch (seq_base_kind)
+        {
+            case SequenceBase::Head:
+                writeVarUInt(size, buf);
+                for (size_t i = 0; i < size; ++i)
+                    value[i]->write(buf);
+                break;
+
+            case SequenceBase::Tail:
+                writeVarUInt(size, buf);
+                for (size_t i = 0; i < size; ++i)
+                    value[value.size() - size + i]->write(buf);
+                break;
+
+            case SequenceBase::FirstMatch:
+            case SequenceBase::LastMatch:
+                writeVarUInt(value.size(), buf);
+                for (auto & node : value)
+                    node->write(buf);
+                break;
+        }
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        readBinary(data(place).sorted, buf);
+
+        UInt64 size;
+        readVarUInt(size, buf);
+
+        if (unlikely(size == 0))
+            return;
+
+        if (unlikely(size > max_node_size_deserialize))
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+                            "Too large array size (maximum: {})", max_node_size_deserialize);
+
+        auto & value = data(place).value;
+
+        value.resize(size, arena);
+        for (UInt64 i = 0; i < size; ++i)
+            value[i] = Node::read(buf, arena);
+    }
+
+    inline std::optional<size_t> getBaseIndex(Data & data) const
+    {
+        if (data.value.size() == 0)
+            return {};
+
+        switch (seq_base_kind)
+        {
+            case SequenceBase::Head:
+                if (data.value[0]->can_be_base)
+                    return 0;
+                break;
+
+            case SequenceBase::Tail:
+                if (data.value[data.value.size() - 1]->can_be_base)
+                    return data.value.size() - 1;
+                break;
+
+            case SequenceBase::FirstMatch:
+                for (size_t i = 0; i < data.value.size(); ++i)
+                {
+                    if (data.value[i]->events_bitset.test(0) && data.value[i]->can_be_base)
+                        return i;
+                }
+                break;
+
+            case SequenceBase::LastMatch:
+                for (size_t i = 0; i < data.value.size(); ++i)
+                {
+                    auto reversed_i = data.value.size() - i - 1;
+                    if (data.value[reversed_i]->events_bitset.test(0) && data.value[reversed_i]->can_be_base)
+                        return reversed_i;
+                }
+                break;
+        }
+
+        return {};
+    }
+
+    /// This method returns an index of next node that matched the events.
+    /// matched events in the chain of events are represented as a bitmask.
+    /// The first matched event is 0x00000001, the second one is 0x00000002, the third one is 0x00000004, and so on.
+    UInt32 getNextNodeIndex(Data & data) const
+    {
+        const UInt32 unmatched_idx = static_cast<UInt32>(data.value.size());
+
+        if (data.value.size() <= events_size)
+            return unmatched_idx;
+
+        data.sort();
+
+        std::optional<size_t> base_opt = getBaseIndex(data);
+        if (!base_opt.has_value())
+            return unmatched_idx;
+        UInt32 base = static_cast<UInt32>(base_opt.value());
+
+        if (events_size == 0)
+            return data.value.size() > 0 ? base : unmatched_idx;
+
+        UInt32 i = 0;
+        switch (seq_direction)
+        {
+            case SequenceDirection::Forward:
+                for (i = 0; i < events_size && base + i < data.value.size(); ++i)
+                    if (!data.value[base + i]->events_bitset.test(i))
+                        break;
+                return (i == events_size) ? base + i : unmatched_idx;
+
+            case SequenceDirection::Backward:
+                for (i = 0; i < events_size && i < base; ++i)
+                    if (!data.value[base - i]->events_bitset.test(i))
+                        break;
+                return (i == events_size) ? base - i : unmatched_idx;
+        }
+        UNREACHABLE();
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto & value = data(place).value;
+
+        UInt32 event_idx = getNextNodeIndex(this->data(place));
+        if (event_idx < value.size())
+        {
+            ColumnNullable & to_concrete = assert_cast<ColumnNullable &>(to);
+            value[event_idx]->insertInto(to_concrete.getNestedColumn());
+            to_concrete.getNullMapData().push_back(0);
+        }
+        else
+        {
+            to.insertDefault();
+        }
+    }
+
+    bool allocatesMemoryInArena() const override { return true; }
+};
+
+
 template <typename T>
 inline AggregateFunctionPtr createAggregateFunctionSequenceNodeImpl(
     const DataTypePtr data_type, const DataTypes & argument_types, const Array & parameters, SequenceDirection direction, SequenceBase base)
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h
deleted file mode 100644
index 0f132a28b2b..00000000000
--- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h
+++ /dev/null
@@ -1,432 +0,0 @@
-#pragma once
-
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadBufferFromString.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/Operators.h>
-
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeDateTime.h>
-
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnNullable.h>
-
-#include <Common/ArenaAllocator.h>
-#include <Common/assert_cast.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-#include <type_traits>
-#include <bitset>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int TOO_LARGE_ARRAY_SIZE;
-}
-
-enum class SequenceDirection
-{
-    Forward,
-    Backward,
-};
-
-enum SequenceBase
-{
-    Head,
-    Tail,
-    FirstMatch,
-    LastMatch,
-};
-
-/// This is for security
-static const UInt64 max_node_size_deserialize = 0xFFFFFF;
-
-/// NodeBase used to implement a linked list for storage of SequenceNextNodeImpl
-template <typename Node, size_t MaxEventsSize>
-struct NodeBase
-{
-    UInt64 size; /// size of payload
-
-    DataTypeDateTime::FieldType event_time;
-    std::bitset<MaxEventsSize> events_bitset;
-    bool can_be_base;
-
-    char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
-
-    const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
-
-    Node * clone(Arena * arena) const
-    {
-        return reinterpret_cast<Node *>(
-            const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
-    }
-
-    void write(WriteBuffer & buf) const
-    {
-        writeVarUInt(size, buf);
-        buf.write(data(), size);
-
-        writeBinary(event_time, buf);
-        UInt64 ulong_bitset = events_bitset.to_ulong();
-        writeBinary(ulong_bitset, buf);
-        writeBinary(can_be_base, buf);
-    }
-
-    static Node * read(ReadBuffer & buf, Arena * arena)
-    {
-        UInt64 size;
-        readVarUInt(size, buf);
-        if (unlikely(size > max_node_size_deserialize))
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large node state size");
-
-        Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + size, alignof(Node)));
-        node->size = size;
-        buf.readStrict(node->data(), size);
-
-        readBinary(node->event_time, buf);
-        UInt64 ulong_bitset;
-        readBinary(ulong_bitset, buf);
-        node->events_bitset = ulong_bitset;
-        readBinary(node->can_be_base, buf);
-
-        return node;
-    }
-};
-
-/// It stores String, timestamp, bitset of matched events.
-template <size_t MaxEventsSize>
-struct NodeString : public NodeBase<NodeString<MaxEventsSize>, MaxEventsSize>
-{
-    using Node = NodeString<MaxEventsSize>;
-
-    static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
-    {
-        StringRef string = assert_cast<const ColumnString &>(column).getDataAt(row_num);
-
-        Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node)));
-        node->size = string.size;
-        memcpy(node->data(), string.data, string.size);
-
-        return node;
-    }
-
-    void insertInto(IColumn & column)
-    {
-        assert_cast<ColumnString &>(column).insertData(this->data(), this->size);
-    }
-
-    bool compare(const Node * rhs) const
-    {
-        auto cmp = strncmp(this->data(), rhs->data(), std::min(this->size, rhs->size));
-        return (cmp == 0) ? this->size < rhs->size : cmp < 0;
-    }
-};
-
-/// TODO : Support other types than string
-template <typename Node>
-struct SequenceNextNodeGeneralData
-{
-    using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
-    using Array = PODArray<Node *, 32, Allocator>;
-
-    Array value;
-    bool sorted = false;
-
-    struct Comparator final
-    {
-        bool operator()(const Node * lhs, const Node * rhs) const
-        {
-            return lhs->event_time == rhs->event_time ? lhs->compare(rhs) : lhs->event_time < rhs->event_time;
-        }
-    };
-
-    void sort()
-    {
-        if (!sorted)
-        {
-            std::stable_sort(std::begin(value), std::end(value), Comparator{});
-            sorted = true;
-        }
-    }
-};
-
-/// Implementation of sequenceFirstNode
-template <typename T, typename Node>
-class SequenceNextNodeImpl final
-    : public IAggregateFunctionDataHelper<SequenceNextNodeGeneralData<Node>, SequenceNextNodeImpl<T, Node>>
-{
-    using Self = SequenceNextNodeImpl<T, Node>;
-
-    using Data = SequenceNextNodeGeneralData<Node>;
-    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
-    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
-
-    static constexpr size_t base_cond_column_idx = 2;
-    static constexpr size_t event_column_idx = 1;
-
-    SequenceBase seq_base_kind;
-    SequenceDirection seq_direction;
-    const size_t min_required_args;
-
-    DataTypePtr & data_type;
-    UInt8 events_size;
-    UInt64 max_elems;
-public:
-    SequenceNextNodeImpl(
-        const DataTypePtr & data_type_,
-        const DataTypes & arguments,
-        const Array & parameters_,
-        SequenceBase seq_base_kind_,
-        SequenceDirection seq_direction_,
-        size_t min_required_args_,
-        UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
-        : IAggregateFunctionDataHelper<SequenceNextNodeGeneralData<Node>, Self>(arguments, parameters_, data_type_)
-        , seq_base_kind(seq_base_kind_)
-        , seq_direction(seq_direction_)
-        , min_required_args(min_required_args_)
-        , data_type(this->argument_types[0])
-        , events_size(arguments.size() - min_required_args)
-        , max_elems(max_elems_)
-    {
-    }
-
-    String getName() const override { return "sequenceNextNode"; }
-
-    bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override
-    {
-        return this->getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs);
-    }
-
-    void insert(Data & a, const Node * v, Arena * arena) const
-    {
-        ++a.total_values;
-        a.value.push_back(v->clone(arena), arena);
-    }
-
-    void create(AggregateDataPtr __restrict place) const override /// NOLINT
-    {
-        new (place) Data;
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        Node * node = Node::allocate(*columns[event_column_idx], row_num, arena);
-
-        const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
-
-        /// The events_bitset variable stores matched events in the form of bitset.
-        /// Each Nth-bit indicates that the Nth-event are matched.
-        /// For example, event1 and event3 is matched then the values of events_bitset is 0x00000005.
-        ///   0x00000000
-        /// +          1 (bit of event1)
-        /// +          4 (bit of event3)
-        node->events_bitset.reset();
-        for (UInt8 i = 0; i < events_size; ++i)
-            if (assert_cast<const ColumnVector<UInt8> *>(columns[min_required_args + i])->getData()[row_num])
-                node->events_bitset.set(i);
-        node->event_time = static_cast<DataTypeDateTime::FieldType>(timestamp);
-
-        node->can_be_base = assert_cast<const ColumnVector<UInt8> *>(columns[base_cond_column_idx])->getData()[row_num];
-
-        data(place).value.push_back(node, arena);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
-    {
-        if (data(rhs).value.empty())
-            return;
-
-        if (data(place).value.size() >= max_elems)
-            return;
-
-        auto & a = data(place).value;
-        auto & b = data(rhs).value;
-        const auto a_size = a.size();
-
-        const UInt64 new_elems = std::min(data(rhs).value.size(), static_cast<size_t>(max_elems) - data(place).value.size());
-        for (UInt64 i = 0; i < new_elems; ++i)
-            a.push_back(b[i]->clone(arena), arena);
-
-        /// Either sort whole container or do so partially merging ranges afterwards
-        using Comparator = typename SequenceNextNodeGeneralData<Node>::Comparator;
-
-        if (!data(place).sorted && !data(rhs).sorted)
-            std::stable_sort(std::begin(a), std::end(a), Comparator{});
-        else
-        {
-            const auto begin = std::begin(a);
-            const auto middle = std::next(begin, a_size);
-            const auto end = std::end(a);
-
-            if (!data(place).sorted)
-                std::stable_sort(begin, middle, Comparator{});
-
-            if (!data(rhs).sorted)
-                std::stable_sort(middle, end, Comparator{});
-
-            std::inplace_merge(begin, middle, end, Comparator{});
-        }
-
-        data(place).sorted = true;
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        /// Temporarily do a const_cast to sort the values. It helps to reduce the computational burden on the initiator node.
-        this->data(const_cast<AggregateDataPtr>(place)).sort();
-
-        writeBinary(data(place).sorted, buf);
-
-        auto & value = data(place).value;
-
-        size_t size = std::min(static_cast<size_t>(events_size + 1), value.size());
-        switch (seq_base_kind)
-        {
-            case SequenceBase::Head:
-                writeVarUInt(size, buf);
-                for (size_t i = 0; i < size; ++i)
-                    value[i]->write(buf);
-                break;
-
-            case SequenceBase::Tail:
-                writeVarUInt(size, buf);
-                for (size_t i = 0; i < size; ++i)
-                    value[value.size() - size + i]->write(buf);
-                break;
-
-            case SequenceBase::FirstMatch:
-            case SequenceBase::LastMatch:
-                writeVarUInt(value.size(), buf);
-                for (auto & node : value)
-                    node->write(buf);
-                break;
-        }
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        readBinary(data(place).sorted, buf);
-
-        UInt64 size;
-        readVarUInt(size, buf);
-
-        if (unlikely(size == 0))
-            return;
-
-        if (unlikely(size > max_node_size_deserialize))
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
-                            "Too large array size (maximum: {})", max_node_size_deserialize);
-
-        auto & value = data(place).value;
-
-        value.resize(size, arena);
-        for (UInt64 i = 0; i < size; ++i)
-            value[i] = Node::read(buf, arena);
-    }
-
-    inline std::optional<size_t> getBaseIndex(Data & data) const
-    {
-        if (data.value.size() == 0)
-            return {};
-
-        switch (seq_base_kind)
-        {
-            case SequenceBase::Head:
-                if (data.value[0]->can_be_base)
-                    return 0;
-                break;
-
-            case SequenceBase::Tail:
-                if (data.value[data.value.size() - 1]->can_be_base)
-                    return data.value.size() - 1;
-                break;
-
-            case SequenceBase::FirstMatch:
-                for (size_t i = 0; i < data.value.size(); ++i)
-                {
-                    if (data.value[i]->events_bitset.test(0) && data.value[i]->can_be_base)
-                        return i;
-                }
-                break;
-
-            case SequenceBase::LastMatch:
-                for (size_t i = 0; i < data.value.size(); ++i)
-                {
-                    auto reversed_i = data.value.size() - i - 1;
-                    if (data.value[reversed_i]->events_bitset.test(0) && data.value[reversed_i]->can_be_base)
-                        return reversed_i;
-                }
-                break;
-        }
-
-        return {};
-    }
-
-    /// This method returns an index of next node that matched the events.
-    /// matched events in the chain of events are represented as a bitmask.
-    /// The first matched event is 0x00000001, the second one is 0x00000002, the third one is 0x00000004, and so on.
-    UInt32 getNextNodeIndex(Data & data) const
-    {
-        const UInt32 unmatched_idx = static_cast<UInt32>(data.value.size());
-
-        if (data.value.size() <= events_size)
-            return unmatched_idx;
-
-        data.sort();
-
-        std::optional<size_t> base_opt = getBaseIndex(data);
-        if (!base_opt.has_value())
-            return unmatched_idx;
-        UInt32 base = static_cast<UInt32>(base_opt.value());
-
-        if (events_size == 0)
-            return data.value.size() > 0 ? base : unmatched_idx;
-
-        UInt32 i = 0;
-        switch (seq_direction)
-        {
-            case SequenceDirection::Forward:
-                for (i = 0; i < events_size && base + i < data.value.size(); ++i)
-                    if (!data.value[base + i]->events_bitset.test(i))
-                        break;
-                return (i == events_size) ? base + i : unmatched_idx;
-
-            case SequenceDirection::Backward:
-                for (i = 0; i < events_size && i < base; ++i)
-                    if (!data.value[base - i]->events_bitset.test(i))
-                        break;
-                return (i == events_size) ? base - i : unmatched_idx;
-        }
-        UNREACHABLE();
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto & value = data(place).value;
-
-        UInt32 event_idx = getNextNodeIndex(this->data(place));
-        if (event_idx < value.size())
-        {
-            ColumnNullable & to_concrete = assert_cast<ColumnNullable &>(to);
-            value[event_idx]->insertInto(to_concrete.getNestedColumn());
-            to_concrete.getNullMapData().push_back(0);
-        }
-        else
-        {
-            to.insertDefault();
-        }
-    }
-
-    bool allocatesMemoryInArena() const override { return true; }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp
index 1489db55857..7a20509aec2 100644
--- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp
@@ -1,10 +1,21 @@
-#include <AggregateFunctions/AggregateFunctionSimpleLinearRegression.h>
-
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+#include <Common/assert_cast.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <limits>
+
+
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
@@ -15,6 +26,167 @@ namespace ErrorCodes
 namespace
 {
 
+template <typename T>
+struct AggregateFunctionSimpleLinearRegressionData final
+{
+    size_t count = 0;
+    T sum_x = 0;
+    T sum_y = 0;
+    T sum_xx = 0;
+    T sum_xy = 0;
+
+    void add(T x, T y)
+    {
+        count += 1;
+        sum_x += x;
+        sum_y += y;
+        sum_xx += x * x;
+        sum_xy += x * y;
+    }
+
+    void merge(const AggregateFunctionSimpleLinearRegressionData & other)
+    {
+        count += other.count;
+        sum_x += other.sum_x;
+        sum_y += other.sum_y;
+        sum_xx += other.sum_xx;
+        sum_xy += other.sum_xy;
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeBinary(count, buf);
+        writeBinary(sum_x, buf);
+        writeBinary(sum_y, buf);
+        writeBinary(sum_xx, buf);
+        writeBinary(sum_xy, buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readBinary(count, buf);
+        readBinary(sum_x, buf);
+        readBinary(sum_y, buf);
+        readBinary(sum_xx, buf);
+        readBinary(sum_xy, buf);
+    }
+
+    T getK() const
+    {
+        T divisor = sum_xx * count - sum_x * sum_x;
+
+        if (divisor == 0)
+            return std::numeric_limits<T>::quiet_NaN();
+
+        return (sum_xy * count - sum_x * sum_y) / divisor;
+    }
+
+    T getB(T k) const
+    {
+        if (count == 0)
+            return std::numeric_limits<T>::quiet_NaN();
+
+        return (sum_y - k * sum_x) / count;
+    }
+};
+
+/// Calculates simple linear regression parameters.
+/// Result is a tuple (k, b) for y = k * x + b equation, solved by least squares approximation.
+template <typename X, typename Y, typename Ret = Float64>
+class AggregateFunctionSimpleLinearRegression final : public IAggregateFunctionDataHelper<
+    AggregateFunctionSimpleLinearRegressionData<Ret>,
+    AggregateFunctionSimpleLinearRegression<X, Y, Ret>
+>
+{
+public:
+    AggregateFunctionSimpleLinearRegression(
+        const DataTypes & arguments,
+        const Array & params
+    ):
+        IAggregateFunctionDataHelper<
+            AggregateFunctionSimpleLinearRegressionData<Ret>,
+            AggregateFunctionSimpleLinearRegression<X, Y, Ret>
+        > {arguments, params, createResultType()}
+    {
+        // notice: arguments has been checked before
+    }
+
+    String getName() const override
+    {
+        return "simpleLinearRegression";
+    }
+
+    void add(
+        AggregateDataPtr __restrict place,
+        const IColumn ** columns,
+        size_t row_num,
+        Arena *
+    ) const override
+    {
+        auto col_x = assert_cast<const ColumnVector<X> *>(columns[0]);
+        auto col_y = assert_cast<const ColumnVector<Y> *>(columns[1]);
+
+        X x = col_x->getData()[row_num];
+        Y y = col_y->getData()[row_num];
+
+        this->data(place).add(x, y);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    static DataTypePtr createResultType()
+    {
+        DataTypes types
+        {
+            std::make_shared<DataTypeNumber<Ret>>(),
+            std::make_shared<DataTypeNumber<Ret>>(),
+        };
+
+        Strings names
+        {
+            "k",
+            "b",
+        };
+
+        return std::make_shared<DataTypeTuple>(
+            std::move(types),
+            std::move(names)
+        );
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void insertResultInto(
+        AggregateDataPtr __restrict place,
+        IColumn & to,
+        Arena *) const override
+    {
+        Ret k = this->data(place).getK();
+        Ret b = this->data(place).getB(k);
+
+        auto & col_tuple = assert_cast<ColumnTuple &>(to);
+        auto & col_k = assert_cast<ColumnVector<Ret> &>(col_tuple.getColumn(0));
+        auto & col_b = assert_cast<ColumnVector<Ret> &>(col_tuple.getColumn(1));
+
+        col_k.getData().push_back(k);
+        col_b.getData().push_back(b);
+    }
+};
+
+
 AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression(
     const String & name,
     const DataTypes & arguments,
diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h
deleted file mode 100644
index b0d448afb55..00000000000
--- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h
+++ /dev/null
@@ -1,182 +0,0 @@
-#pragma once
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnTuple.h>
-#include <Common/assert_cast.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <limits>
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-}
-
-template <typename T>
-struct AggregateFunctionSimpleLinearRegressionData final
-{
-    size_t count = 0;
-    T sum_x = 0;
-    T sum_y = 0;
-    T sum_xx = 0;
-    T sum_xy = 0;
-
-    void add(T x, T y)
-    {
-        count += 1;
-        sum_x += x;
-        sum_y += y;
-        sum_xx += x * x;
-        sum_xy += x * y;
-    }
-
-    void merge(const AggregateFunctionSimpleLinearRegressionData & other)
-    {
-        count += other.count;
-        sum_x += other.sum_x;
-        sum_y += other.sum_y;
-        sum_xx += other.sum_xx;
-        sum_xy += other.sum_xy;
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        writeBinary(count, buf);
-        writeBinary(sum_x, buf);
-        writeBinary(sum_y, buf);
-        writeBinary(sum_xx, buf);
-        writeBinary(sum_xy, buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        readBinary(count, buf);
-        readBinary(sum_x, buf);
-        readBinary(sum_y, buf);
-        readBinary(sum_xx, buf);
-        readBinary(sum_xy, buf);
-    }
-
-    T getK() const
-    {
-        T divisor = sum_xx * count - sum_x * sum_x;
-
-        if (divisor == 0)
-            return std::numeric_limits<T>::quiet_NaN();
-
-        return (sum_xy * count - sum_x * sum_y) / divisor;
-    }
-
-    T getB(T k) const
-    {
-        if (count == 0)
-            return std::numeric_limits<T>::quiet_NaN();
-
-        return (sum_y - k * sum_x) / count;
-    }
-};
-
-/// Calculates simple linear regression parameters.
-/// Result is a tuple (k, b) for y = k * x + b equation, solved by least squares approximation.
-template <typename X, typename Y, typename Ret = Float64>
-class AggregateFunctionSimpleLinearRegression final : public IAggregateFunctionDataHelper<
-    AggregateFunctionSimpleLinearRegressionData<Ret>,
-    AggregateFunctionSimpleLinearRegression<X, Y, Ret>
->
-{
-public:
-    AggregateFunctionSimpleLinearRegression(
-        const DataTypes & arguments,
-        const Array & params
-    ):
-        IAggregateFunctionDataHelper<
-            AggregateFunctionSimpleLinearRegressionData<Ret>,
-            AggregateFunctionSimpleLinearRegression<X, Y, Ret>
-        > {arguments, params, createResultType()}
-    {
-        // notice: arguments has been checked before
-    }
-
-    String getName() const override
-    {
-        return "simpleLinearRegression";
-    }
-
-    void add(
-        AggregateDataPtr __restrict place,
-        const IColumn ** columns,
-        size_t row_num,
-        Arena *
-    ) const override
-    {
-        auto col_x = assert_cast<const ColumnVector<X> *>(columns[0]);
-        auto col_y = assert_cast<const ColumnVector<Y> *>(columns[1]);
-
-        X x = col_x->getData()[row_num];
-        Y y = col_y->getData()[row_num];
-
-        this->data(place).add(x, y);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    static DataTypePtr createResultType()
-    {
-        DataTypes types
-        {
-            std::make_shared<DataTypeNumber<Ret>>(),
-            std::make_shared<DataTypeNumber<Ret>>(),
-        };
-
-        Strings names
-        {
-            "k",
-            "b",
-        };
-
-        return std::make_shared<DataTypeTuple>(
-            std::move(types),
-            std::move(names)
-        );
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void insertResultInto(
-        AggregateDataPtr __restrict place,
-        IColumn & to,
-        Arena *) const override
-    {
-        Ret k = this->data(place).getK();
-        Ret b = this->data(place).getB(k);
-
-        auto & col_tuple = assert_cast<ColumnTuple &>(to);
-        auto & col_k = assert_cast<ColumnVector<Ret> &>(col_tuple.getColumn(0));
-        auto & col_b = assert_cast<ColumnVector<Ret> &>(col_tuple.getColumn(1));
-
-        col_k.getData().push_back(k);
-        col_b.getData().push_back(b);
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
index e87e3b306c2..7ff9df03824 100644
--- a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
@@ -1,8 +1,18 @@
-#include <AggregateFunctions/AggregateFunctionSparkbar.h>
 #include <AggregateFunctions/FactoryHelpers.h>
-#include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 
+#include <array>
+#include <string_view>
+#include <DataTypes/DataTypeString.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <Columns/ColumnString.h>
+#include <Common/PODArray.h>
+#include <IO/ReadBufferFromString.h>
+#include <Common/HashTable/HashMap.h>
+#include <Columns/IColumn.h>
+
 
 namespace DB
 {
@@ -13,11 +23,309 @@ namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int BAD_ARGUMENTS;
 }
 
 namespace
 {
 
+template<typename X, typename Y>
+struct AggregateFunctionSparkbarData
+{
+    /// TODO: calculate histogram instead of storing all points
+    using Points = HashMap<X, Y>;
+    Points points;
+
+    X min_x = std::numeric_limits<X>::max();
+    X max_x = std::numeric_limits<X>::lowest();
+
+    Y min_y = std::numeric_limits<Y>::max();
+    Y max_y = std::numeric_limits<Y>::lowest();
+
+    Y insert(const X & x, const Y & y)
+    {
+        if (isNaN(y) || y <= 0)
+            return 0;
+
+        auto [it, inserted] = points.insert({x, y});
+        if (!inserted)
+        {
+            if constexpr (std::is_floating_point_v<Y>)
+            {
+                it->getMapped() += y;
+                return it->getMapped();
+            }
+            else
+            {
+                Y res;
+                bool has_overfllow = common::addOverflow(it->getMapped(), y, res);
+                it->getMapped() = has_overfllow ? std::numeric_limits<Y>::max() : res;
+            }
+        }
+        return it->getMapped();
+    }
+
+    void add(X x, Y y)
+    {
+        auto new_y = insert(x, y);
+
+        min_x = std::min(x, min_x);
+        max_x = std::max(x, max_x);
+
+        min_y = std::min(y, min_y);
+        max_y = std::max(new_y, max_y);
+    }
+
+    void merge(const AggregateFunctionSparkbarData & other)
+    {
+        if (other.points.empty())
+            return;
+
+        for (auto & point : other.points)
+        {
+            auto new_y = insert(point.getKey(), point.getMapped());
+            max_y = std::max(new_y, max_y);
+        }
+
+        min_x = std::min(other.min_x, min_x);
+        max_x = std::max(other.max_x, max_x);
+
+        min_y = std::min(other.min_y, min_y);
+        max_y = std::max(other.max_y, max_y);
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeBinary(min_x, buf);
+        writeBinary(max_x, buf);
+        writeBinary(min_y, buf);
+        writeBinary(max_y, buf);
+        writeVarUInt(points.size(), buf);
+
+        for (const auto & elem : points)
+        {
+            writeBinary(elem.getKey(), buf);
+            writeBinary(elem.getMapped(), buf);
+        }
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readBinary(min_x, buf);
+        readBinary(max_x, buf);
+        readBinary(min_y, buf);
+        readBinary(max_y, buf);
+        size_t size;
+        readVarUInt(size, buf);
+
+        X x;
+        Y y;
+        for (size_t i = 0; i < size; ++i)
+        {
+            readBinary(x, buf);
+            readBinary(y, buf);
+            insert(x, y);
+        }
+    }
+};
+
+template<typename X, typename Y>
+class AggregateFunctionSparkbar final
+    : public IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar<X, Y>>
+{
+
+private:
+    static constexpr size_t BAR_LEVELS = 8;
+    const size_t width = 0;
+
+    /// Range for x specified in parameters.
+    const bool is_specified_range_x = false;
+    const X begin_x = std::numeric_limits<X>::min();
+    const X end_x = std::numeric_limits<X>::max();
+
+    size_t updateFrame(ColumnString::Chars & frame, Y value) const
+    {
+        static constexpr std::array<std::string_view, BAR_LEVELS + 1> bars{" ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"};
+        const auto & bar = (isNaN(value) || value < 1 || static_cast<Y>(BAR_LEVELS) < value) ? bars[0] : bars[static_cast<UInt8>(value)];
+        frame.insert(bar.begin(), bar.end());
+        return bar.size();
+    }
+
+    /**
+     *  The minimum value of y is rendered as the lowest height "▁",
+     *  the maximum value of y is rendered as the highest height "█", and the middle value will be rendered proportionally.
+     *  If a bucket has no y value, it will be rendered as " ".
+     */
+    void render(ColumnString & to_column, const AggregateFunctionSparkbarData<X, Y> & data) const
+    {
+        auto & values = to_column.getChars();
+        auto & offsets = to_column.getOffsets();
+
+        if (data.points.empty())
+        {
+            values.push_back('\0');
+            offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
+            return;
+        }
+
+        auto from_x = is_specified_range_x ? begin_x : data.min_x;
+        auto to_x = is_specified_range_x ? end_x : data.max_x;
+
+        if (from_x >= to_x)
+        {
+            size_t sz = updateFrame(values, 8);
+            values.push_back('\0');
+            offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
+            return;
+        }
+
+        PaddedPODArray<Y> histogram(width, 0);
+        PaddedPODArray<UInt64> count_histogram(width, 0); /// The number of points in each bucket
+
+        for (const auto & point : data.points)
+        {
+            if (point.getKey() < from_x || to_x < point.getKey())
+                continue;
+
+            X delta = to_x - from_x;
+            if (delta < std::numeric_limits<X>::max())
+                delta = delta + 1;
+
+            X value = point.getKey() - from_x;
+            Float64 w = histogram.size();
+            size_t index = std::min<size_t>(static_cast<size_t>(w / delta * value), histogram.size() - 1);
+
+            Y res;
+            bool has_overfllow = false;
+            if constexpr (std::is_floating_point_v<Y>)
+                res = histogram[index] + point.getMapped();
+            else
+                has_overfllow = common::addOverflow(histogram[index], point.getMapped(), res);
+
+            if (unlikely(has_overfllow))
+            {
+                /// In case of overflow, just saturate
+                /// Do not count new values, because we do not know how many of them were added
+                histogram[index] = std::numeric_limits<Y>::max();
+            }
+            else
+            {
+                histogram[index] = res;
+                count_histogram[index] += 1;
+            }
+        }
+
+        for (size_t i = 0; i < histogram.size(); ++i)
+        {
+            if (count_histogram[i] > 0)
+                histogram[i] /= count_histogram[i];
+        }
+
+        Y y_max = 0;
+        for (auto & y : histogram)
+        {
+            if (isNaN(y) || y <= 0)
+                continue;
+            y_max = std::max(y_max, y);
+        }
+
+        if (y_max == 0)
+        {
+            values.push_back('\0');
+            offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
+            return;
+        }
+
+        /// Scale the histogram to the range [0, BAR_LEVELS]
+        for (auto & y : histogram)
+        {
+            if (isNaN(y) || y <= 0)
+            {
+                y = 0;
+                continue;
+            }
+
+            constexpr auto levels_num = static_cast<Y>(BAR_LEVELS - 1);
+            if constexpr (std::is_floating_point_v<Y>)
+            {
+                y = y / (y_max / levels_num) + 1;
+            }
+            else
+            {
+                Y scaled;
+                bool has_overfllow = common::mulOverflow<Y>(y, levels_num, scaled);
+
+                if (has_overfllow)
+                    y = y / (y_max / levels_num) + 1;
+                else
+                    y = scaled / y_max + 1;
+            }
+        }
+
+        size_t sz = 0;
+        for (const auto & y : histogram)
+            sz += updateFrame(values, y);
+
+        values.push_back('\0');
+        offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
+    }
+
+public:
+    AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(arguments, params, std::make_shared<DataTypeString>())
+        , width(params.empty() ? 0 : params.at(0).safeGet<UInt64>())
+        , is_specified_range_x(params.size() >= 3)
+        , begin_x(is_specified_range_x ? static_cast<X>(params.at(1).safeGet<X>()) : std::numeric_limits<X>::min())
+        , end_x(is_specified_range_x ? static_cast<X>(params.at(2).safeGet<X>()) : std::numeric_limits<X>::max())
+    {
+        if (width < 2 || 1024 < width)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter width must be in range [2, 1024]");
+
+        if (begin_x >= end_x)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter `min_x` must be less than `max_x`");
+    }
+
+    String getName() const override
+    {
+        return "sparkbar";
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override
+    {
+        X x = assert_cast<const ColumnVector<X> *>(columns[0])->getData()[row_num];
+        if (begin_x <= x && x <= end_x)
+        {
+            Y y = assert_cast<const ColumnVector<Y> *>(columns[1])->getData()[row_num];
+            this->data(place).add(x, y);
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr __restrict rhs, Arena * /*arena*/) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
+    {
+        auto & to_column = assert_cast<ColumnString &>(to);
+        const auto & data = this->data(place);
+        render(to_column, data);
+    }
+};
+
+
 template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
 IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
 {
diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h
deleted file mode 100644
index 30e107bc4db..00000000000
--- a/src/AggregateFunctions/AggregateFunctionSparkbar.h
+++ /dev/null
@@ -1,323 +0,0 @@
-#pragma once
-
-#include <base/arithmeticOverflow.h>
-
-#include <array>
-#include <string_view>
-#include <DataTypes/DataTypeString.h>
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <base/range.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <Columns/ColumnString.h>
-#include <Common/PODArray.h>
-#include <IO/ReadBufferFromString.h>
-#include <Common/HashTable/HashMap.h>
-#include <Columns/IColumn.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
-template<typename X, typename Y>
-struct AggregateFunctionSparkbarData
-{
-    /// TODO: calculate histogram instead of storing all points
-    using Points = HashMap<X, Y>;
-    Points points;
-
-    X min_x = std::numeric_limits<X>::max();
-    X max_x = std::numeric_limits<X>::lowest();
-
-    Y min_y = std::numeric_limits<Y>::max();
-    Y max_y = std::numeric_limits<Y>::lowest();
-
-    Y insert(const X & x, const Y & y)
-    {
-        if (isNaN(y) || y <= 0)
-            return 0;
-
-        auto [it, inserted] = points.insert({x, y});
-        if (!inserted)
-        {
-            if constexpr (std::is_floating_point_v<Y>)
-            {
-                it->getMapped() += y;
-                return it->getMapped();
-            }
-            else
-            {
-                Y res;
-                bool has_overfllow = common::addOverflow(it->getMapped(), y, res);
-                it->getMapped() = has_overfllow ? std::numeric_limits<Y>::max() : res;
-            }
-        }
-        return it->getMapped();
-    }
-
-    void add(X x, Y y)
-    {
-        auto new_y = insert(x, y);
-
-        min_x = std::min(x, min_x);
-        max_x = std::max(x, max_x);
-
-        min_y = std::min(y, min_y);
-        max_y = std::max(new_y, max_y);
-    }
-
-    void merge(const AggregateFunctionSparkbarData & other)
-    {
-        if (other.points.empty())
-            return;
-
-        for (auto & point : other.points)
-        {
-            auto new_y = insert(point.getKey(), point.getMapped());
-            max_y = std::max(new_y, max_y);
-        }
-
-        min_x = std::min(other.min_x, min_x);
-        max_x = std::max(other.max_x, max_x);
-
-        min_y = std::min(other.min_y, min_y);
-        max_y = std::max(other.max_y, max_y);
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        writeBinary(min_x, buf);
-        writeBinary(max_x, buf);
-        writeBinary(min_y, buf);
-        writeBinary(max_y, buf);
-        writeVarUInt(points.size(), buf);
-
-        for (const auto & elem : points)
-        {
-            writeBinary(elem.getKey(), buf);
-            writeBinary(elem.getMapped(), buf);
-        }
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        readBinary(min_x, buf);
-        readBinary(max_x, buf);
-        readBinary(min_y, buf);
-        readBinary(max_y, buf);
-        size_t size;
-        readVarUInt(size, buf);
-
-        X x;
-        Y y;
-        for (size_t i = 0; i < size; ++i)
-        {
-            readBinary(x, buf);
-            readBinary(y, buf);
-            insert(x, y);
-        }
-    }
-};
-
-template<typename X, typename Y>
-class AggregateFunctionSparkbar final
-    : public IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar<X, Y>>
-{
-
-private:
-    static constexpr size_t BAR_LEVELS = 8;
-    const size_t width = 0;
-
-    /// Range for x specified in parameters.
-    const bool is_specified_range_x = false;
-    const X begin_x = std::numeric_limits<X>::min();
-    const X end_x = std::numeric_limits<X>::max();
-
-    size_t updateFrame(ColumnString::Chars & frame, Y value) const
-    {
-        static constexpr std::array<std::string_view, BAR_LEVELS + 1> bars{" ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"};
-        const auto & bar = (isNaN(value) || value < 1 || static_cast<Y>(BAR_LEVELS) < value) ? bars[0] : bars[static_cast<UInt8>(value)];
-        frame.insert(bar.begin(), bar.end());
-        return bar.size();
-    }
-
-    /**
-     *  The minimum value of y is rendered as the lowest height "▁",
-     *  the maximum value of y is rendered as the highest height "█", and the middle value will be rendered proportionally.
-     *  If a bucket has no y value, it will be rendered as " ".
-     */
-    void render(ColumnString & to_column, const AggregateFunctionSparkbarData<X, Y> & data) const
-    {
-        auto & values = to_column.getChars();
-        auto & offsets = to_column.getOffsets();
-
-        if (data.points.empty())
-        {
-            values.push_back('\0');
-            offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
-            return;
-        }
-
-        auto from_x = is_specified_range_x ? begin_x : data.min_x;
-        auto to_x = is_specified_range_x ? end_x : data.max_x;
-
-        if (from_x >= to_x)
-        {
-            size_t sz = updateFrame(values, 8);
-            values.push_back('\0');
-            offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
-            return;
-        }
-
-        PaddedPODArray<Y> histogram(width, 0);
-        PaddedPODArray<UInt64> count_histogram(width, 0); /// The number of points in each bucket
-
-        for (const auto & point : data.points)
-        {
-            if (point.getKey() < from_x || to_x < point.getKey())
-                continue;
-
-            X delta = to_x - from_x;
-            if (delta < std::numeric_limits<X>::max())
-                delta = delta + 1;
-
-            X value = point.getKey() - from_x;
-            Float64 w = histogram.size();
-            size_t index = std::min<size_t>(static_cast<size_t>(w / delta * value), histogram.size() - 1);
-
-            Y res;
-            bool has_overfllow = false;
-            if constexpr (std::is_floating_point_v<Y>)
-                res = histogram[index] + point.getMapped();
-            else
-                has_overfllow = common::addOverflow(histogram[index], point.getMapped(), res);
-
-            if (unlikely(has_overfllow))
-            {
-                /// In case of overflow, just saturate
-                /// Do not count new values, because we do not know how many of them were added
-                histogram[index] = std::numeric_limits<Y>::max();
-            }
-            else
-            {
-                histogram[index] = res;
-                count_histogram[index] += 1;
-            }
-        }
-
-        for (size_t i = 0; i < histogram.size(); ++i)
-        {
-            if (count_histogram[i] > 0)
-                histogram[i] /= count_histogram[i];
-        }
-
-        Y y_max = 0;
-        for (auto & y : histogram)
-        {
-            if (isNaN(y) || y <= 0)
-                continue;
-            y_max = std::max(y_max, y);
-        }
-
-        if (y_max == 0)
-        {
-            values.push_back('\0');
-            offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
-            return;
-        }
-
-        /// Scale the histogram to the range [0, BAR_LEVELS]
-        for (auto & y : histogram)
-        {
-            if (isNaN(y) || y <= 0)
-            {
-                y = 0;
-                continue;
-            }
-
-            constexpr auto levels_num = static_cast<Y>(BAR_LEVELS - 1);
-            if constexpr (std::is_floating_point_v<Y>)
-            {
-                y = y / (y_max / levels_num) + 1;
-            }
-            else
-            {
-                Y scaled;
-                bool has_overfllow = common::mulOverflow<Y>(y, levels_num, scaled);
-
-                if (has_overfllow)
-                    y = y / (y_max / levels_num) + 1;
-                else
-                    y = scaled / y_max + 1;
-            }
-        }
-
-        size_t sz = 0;
-        for (const auto & y : histogram)
-            sz += updateFrame(values, y);
-
-        values.push_back('\0');
-        offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
-    }
-
-public:
-    AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(arguments, params, std::make_shared<DataTypeString>())
-        , width(params.empty() ? 0 : params.at(0).safeGet<UInt64>())
-        , is_specified_range_x(params.size() >= 3)
-        , begin_x(is_specified_range_x ? static_cast<X>(params.at(1).safeGet<X>()) : std::numeric_limits<X>::min())
-        , end_x(is_specified_range_x ? static_cast<X>(params.at(2).safeGet<X>()) : std::numeric_limits<X>::max())
-    {
-        if (width < 2 || 1024 < width)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter width must be in range [2, 1024]");
-
-        if (begin_x >= end_x)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter `min_x` must be less than `max_x`");
-    }
-
-    String getName() const override
-    {
-        return "sparkbar";
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override
-    {
-        X x = assert_cast<const ColumnVector<X> *>(columns[0])->getData()[row_num];
-        if (begin_x <= x && x <= end_x)
-        {
-            Y y = assert_cast<const ColumnVector<Y> *>(columns[1])->getData()[row_num];
-            this->data(place).add(x, y);
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr __restrict rhs, Arena * /*arena*/) const override
-    {
-        this->data(place).merge(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
-    {
-        auto & to_column = assert_cast<ColumnString &>(to);
-        const auto & data = this->data(place);
-        render(to_column, data);
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.cpp b/src/AggregateFunctions/AggregateFunctionStatistics.cpp
index 00a47497ada..d841ad4c03d 100644
--- a/src/AggregateFunctions/AggregateFunctionStatistics.cpp
+++ b/src/AggregateFunctions/AggregateFunctionStatistics.cpp
@@ -1,7 +1,15 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
-#include <AggregateFunctions/AggregateFunctionStatistics.h>
+
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <Columns/ColumnsNumber.h>
+#include <Common/assert_cast.h>
+
+#include <cmath>
 
 
 namespace DB
@@ -16,6 +24,454 @@ namespace ErrorCodes
 namespace
 {
 
+/// This function returns true if both values are large and comparable.
+/// It is used to calculate the mean value by merging two sources.
+/// It means that if the sizes of both sources are large and comparable, then we must apply a special
+///  formula guaranteeing more stability.
+bool areComparable(UInt64 a, UInt64 b)
+{
+    const Float64 sensitivity = 0.001;
+    const UInt64 threshold = 10000;
+
+    if ((a == 0) || (b == 0))
+        return false;
+
+    auto res = std::minmax(a, b);
+    return (((1 - static_cast<Float64>(res.first) / res.second) < sensitivity) && (res.first > threshold));
+}
+
+
+/** Statistical aggregate functions
+  * varSamp - sample variance
+  * stddevSamp - mean sample quadratic deviation
+  * varPop - variance
+  * stddevPop - standard deviation
+  * covarSamp - selective covariance
+  * covarPop - covariance
+  * corr - correlation
+  */
+
+/** Parallel and incremental algorithm for calculating variance.
+  * Source: "Updating formulae and a pairwise algorithm for computing sample variances"
+  * (Chan et al., Stanford University, 12.1979)
+  */
+template <typename T, typename Op>
+class AggregateFunctionVarianceData
+{
+public:
+    void update(const IColumn & column, size_t row_num)
+    {
+        T received = assert_cast<const ColumnVector<T> &>(column).getData()[row_num];
+        Float64 val = static_cast<Float64>(received);
+        Float64 delta = val - mean;
+
+        ++count;
+        mean += delta / count;
+        m2 += delta * (val - mean);
+    }
+
+    void mergeWith(const AggregateFunctionVarianceData & source)
+    {
+        UInt64 total_count = count + source.count;
+        if (total_count == 0)
+            return;
+
+        Float64 factor = static_cast<Float64>(count * source.count) / total_count;
+        Float64 delta = mean - source.mean;
+
+        if (areComparable(count, source.count))
+            mean = (source.count * source.mean + count * mean) / total_count;
+        else
+            mean = source.mean + delta * (static_cast<Float64>(count) / total_count);
+
+        m2 += source.m2 + delta * delta * factor;
+        count = total_count;
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeVarUInt(count, buf);
+        writeBinary(mean, buf);
+        writeBinary(m2, buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readVarUInt(count, buf);
+        readBinary(mean, buf);
+        readBinary(m2, buf);
+    }
+
+    void publish(IColumn & to) const
+    {
+        assert_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(m2, count));
+    }
+
+private:
+    UInt64 count = 0;
+    Float64 mean = 0.0;
+    Float64 m2 = 0.0;
+};
+
+/** The main code for the implementation of varSamp, stddevSamp, varPop, stddevPop.
+  */
+template <typename T, typename Op>
+class AggregateFunctionVariance final
+    : public IAggregateFunctionDataHelper<AggregateFunctionVarianceData<T, Op>, AggregateFunctionVariance<T, Op>>
+{
+public:
+    explicit AggregateFunctionVariance(const DataTypePtr & arg)
+        : IAggregateFunctionDataHelper<AggregateFunctionVarianceData<T, Op>, AggregateFunctionVariance<T, Op>>({arg}, {}, std::make_shared<DataTypeFloat64>())
+    {}
+
+    String getName() const override { return Op::name; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        this->data(place).update(*columns[0], row_num);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).mergeWith(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        this->data(place).publish(to);
+    }
+};
+
+/** Implementing the varSamp function.
+  */
+struct AggregateFunctionVarSampImpl
+{
+    static constexpr auto name = "varSampStable";
+
+    static inline Float64 apply(Float64 m2, UInt64 count)
+    {
+        if (count < 2)
+            return std::numeric_limits<Float64>::infinity();
+        else
+            return m2 / (count - 1);
+    }
+};
+
+/** Implementing the stddevSamp function.
+  */
+struct AggregateFunctionStdDevSampImpl
+{
+    static constexpr auto name = "stddevSampStable";
+
+    static inline Float64 apply(Float64 m2, UInt64 count)
+    {
+        return sqrt(AggregateFunctionVarSampImpl::apply(m2, count));
+    }
+};
+
+/** Implementing the varPop function.
+  */
+struct AggregateFunctionVarPopImpl
+{
+    static constexpr auto name = "varPopStable";
+
+    static inline Float64 apply(Float64 m2, UInt64 count)
+    {
+        if (count == 0)
+            return std::numeric_limits<Float64>::infinity();
+        else if (count == 1)
+            return 0.0;
+        else
+            return m2 / count;
+    }
+};
+
+/** Implementing the stddevPop function.
+  */
+struct AggregateFunctionStdDevPopImpl
+{
+    static constexpr auto name = "stddevPopStable";
+
+    static inline Float64 apply(Float64 m2, UInt64 count)
+    {
+        return sqrt(AggregateFunctionVarPopImpl::apply(m2, count));
+    }
+};
+
+/** If `compute_marginal_moments` flag is set this class provides the successor
+  * CovarianceData support of marginal moments for calculating the correlation.
+  */
+template <bool compute_marginal_moments>
+class BaseCovarianceData
+{
+protected:
+    void incrementMarginalMoments(Float64, Float64) {}
+    void mergeWith(const BaseCovarianceData &) {}
+    void serialize(WriteBuffer &) const {}
+    void deserialize(const ReadBuffer &) {}
+};
+
+template <>
+class BaseCovarianceData<true>
+{
+protected:
+    void incrementMarginalMoments(Float64 left_incr, Float64 right_incr)
+    {
+        left_m2 += left_incr;
+        right_m2 += right_incr;
+    }
+
+    void mergeWith(const BaseCovarianceData & source)
+    {
+        left_m2 += source.left_m2;
+        right_m2 += source.right_m2;
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeBinary(left_m2, buf);
+        writeBinary(right_m2, buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readBinary(left_m2, buf);
+        readBinary(right_m2, buf);
+    }
+
+    Float64 left_m2 = 0.0;
+    Float64 right_m2 = 0.0;
+};
+
+/** Parallel and incremental algorithm for calculating covariance.
+  * Source: "Numerically Stable, Single-Pass, Parallel Statistics Algorithms"
+  * (J. Bennett et al., Sandia National Laboratories,
+  *  2009 IEEE International Conference on Cluster Computing)
+  */
+template <typename T, typename U, typename Op, bool compute_marginal_moments>
+class CovarianceData : public BaseCovarianceData<compute_marginal_moments>
+{
+private:
+    using Base = BaseCovarianceData<compute_marginal_moments>;
+
+public:
+    void update(const IColumn & column_left, const IColumn & column_right, size_t row_num)
+    {
+        T left_received = assert_cast<const ColumnVector<T> &>(column_left).getData()[row_num];
+        Float64 left_val = static_cast<Float64>(left_received);
+        Float64 left_delta = left_val - left_mean;
+
+        U right_received = assert_cast<const ColumnVector<U> &>(column_right).getData()[row_num];
+        Float64 right_val = static_cast<Float64>(right_received);
+        Float64 right_delta = right_val - right_mean;
+
+        Float64 old_right_mean = right_mean;
+
+        ++count;
+
+        left_mean += left_delta / count;
+        right_mean += right_delta / count;
+        co_moment += (left_val - left_mean) * (right_val - old_right_mean);
+
+        /// Update the marginal moments, if any.
+        if (compute_marginal_moments)
+        {
+            Float64 left_incr = left_delta * (left_val - left_mean);
+            Float64 right_incr = right_delta * (right_val - right_mean);
+            Base::incrementMarginalMoments(left_incr, right_incr);
+        }
+    }
+
+    void mergeWith(const CovarianceData & source)
+    {
+        UInt64 total_count = count + source.count;
+        if (total_count == 0)
+            return;
+
+        Float64 factor = static_cast<Float64>(count * source.count) / total_count;
+        Float64 left_delta = left_mean - source.left_mean;
+        Float64 right_delta = right_mean - source.right_mean;
+
+        if (areComparable(count, source.count))
+        {
+            left_mean = (source.count * source.left_mean + count * left_mean) / total_count;
+            right_mean = (source.count * source.right_mean + count * right_mean) / total_count;
+        }
+        else
+        {
+            left_mean = source.left_mean + left_delta * (static_cast<Float64>(count) / total_count);
+            right_mean = source.right_mean + right_delta * (static_cast<Float64>(count) / total_count);
+        }
+
+        co_moment += source.co_moment + left_delta * right_delta * factor;
+        count = total_count;
+
+        /// Update the marginal moments, if any.
+        if (compute_marginal_moments)
+        {
+            Float64 left_incr = left_delta * left_delta * factor;
+            Float64 right_incr = right_delta * right_delta * factor;
+            Base::mergeWith(source);
+            Base::incrementMarginalMoments(left_incr, right_incr);
+        }
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeVarUInt(count, buf);
+        writeBinary(left_mean, buf);
+        writeBinary(right_mean, buf);
+        writeBinary(co_moment, buf);
+        Base::serialize(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readVarUInt(count, buf);
+        readBinary(left_mean, buf);
+        readBinary(right_mean, buf);
+        readBinary(co_moment, buf);
+        Base::deserialize(buf);
+    }
+
+    void publish(IColumn & to) const
+    {
+        if constexpr (compute_marginal_moments)
+            assert_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(co_moment, Base::left_m2, Base::right_m2, count));
+        else
+            assert_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(co_moment, count));
+    }
+
+private:
+    UInt64 count = 0;
+    Float64 left_mean = 0.0;
+    Float64 right_mean = 0.0;
+    Float64 co_moment = 0.0;
+};
+
+template <typename T, typename U, typename Op, bool compute_marginal_moments = false>
+class AggregateFunctionCovariance final
+    : public IAggregateFunctionDataHelper<
+        CovarianceData<T, U, Op, compute_marginal_moments>,
+        AggregateFunctionCovariance<T, U, Op, compute_marginal_moments>>
+{
+public:
+    explicit AggregateFunctionCovariance(const DataTypes & args) : IAggregateFunctionDataHelper<
+        CovarianceData<T, U, Op, compute_marginal_moments>,
+        AggregateFunctionCovariance<T, U, Op, compute_marginal_moments>>(args, {}, std::make_shared<DataTypeFloat64>())
+    {}
+
+    String getName() const override { return Op::name; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        this->data(place).update(*columns[0], *columns[1], row_num);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).mergeWith(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        this->data(place).publish(to);
+    }
+};
+
+/** Implementing the covarSamp function.
+  */
+struct AggregateFunctionCovarSampImpl
+{
+    static constexpr auto name = "covarSampStable";
+
+    static inline Float64 apply(Float64 co_moment, UInt64 count)
+    {
+        if (count < 2)
+            return std::numeric_limits<Float64>::infinity();
+        else
+            return co_moment / (count - 1);
+    }
+};
+
+/** Implementing the covarPop function.
+  */
+struct AggregateFunctionCovarPopImpl
+{
+    static constexpr auto name = "covarPopStable";
+
+    static inline Float64 apply(Float64 co_moment, UInt64 count)
+    {
+        if (count == 0)
+            return std::numeric_limits<Float64>::infinity();
+        else if (count == 1)
+            return 0.0;
+        else
+            return co_moment / count;
+    }
+};
+
+/** `corr` function implementation.
+  */
+struct AggregateFunctionCorrImpl
+{
+    static constexpr auto name = "corrStable";
+
+    static inline Float64 apply(Float64 co_moment, Float64 left_m2, Float64 right_m2, UInt64 count)
+    {
+        if (count < 2)
+            return std::numeric_limits<Float64>::infinity();
+        else
+            return co_moment / sqrt(left_m2 * right_m2);
+    }
+};
+
+template <typename T>
+using AggregateFunctionVarSampStable = AggregateFunctionVariance<T, AggregateFunctionVarSampImpl>;
+
+template <typename T>
+using AggregateFunctionStddevSampStable = AggregateFunctionVariance<T, AggregateFunctionStdDevSampImpl>;
+
+template <typename T>
+using AggregateFunctionVarPopStable = AggregateFunctionVariance<T, AggregateFunctionVarPopImpl>;
+
+template <typename T>
+using AggregateFunctionStddevPopStable = AggregateFunctionVariance<T, AggregateFunctionStdDevPopImpl>;
+
+template <typename T, typename U>
+using AggregateFunctionCovarSampStable = AggregateFunctionCovariance<T, U, AggregateFunctionCovarSampImpl>;
+
+template <typename T, typename U>
+using AggregateFunctionCovarPopStable = AggregateFunctionCovariance<T, U, AggregateFunctionCovarPopImpl>;
+
+template <typename T, typename U>
+using AggregateFunctionCorrStable = AggregateFunctionCovariance<T, U, AggregateFunctionCorrImpl, true>;
+
+
 template <template <typename> typename FunctionTemplate>
 AggregateFunctionPtr createAggregateFunctionStatisticsUnary(
     const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.h b/src/AggregateFunctions/AggregateFunctionStatistics.h
deleted file mode 100644
index eb2d66b7e94..00000000000
--- a/src/AggregateFunctions/AggregateFunctionStatistics.h
+++ /dev/null
@@ -1,468 +0,0 @@
-#pragma once
-
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <Columns/ColumnsNumber.h>
-#include <Common/assert_cast.h>
-
-#include <cmath>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace detail
-{
-
-/// This function returns true if both values are large and comparable.
-/// It is used to calculate the mean value by merging two sources.
-/// It means that if the sizes of both sources are large and comparable, then we must apply a special
-///  formula guaranteeing more stability.
-bool areComparable(UInt64 a, UInt64 b)
-{
-    const Float64 sensitivity = 0.001;
-    const UInt64 threshold = 10000;
-
-    if ((a == 0) || (b == 0))
-        return false;
-
-    auto res = std::minmax(a, b);
-    return (((1 - static_cast<Float64>(res.first) / res.second) < sensitivity) && (res.first > threshold));
-}
-
-}
-
-/** Statistical aggregate functions
-  * varSamp - sample variance
-  * stddevSamp - mean sample quadratic deviation
-  * varPop - variance
-  * stddevPop - standard deviation
-  * covarSamp - selective covariance
-  * covarPop - covariance
-  * corr - correlation
-  */
-
-/** Parallel and incremental algorithm for calculating variance.
-  * Source: "Updating formulae and a pairwise algorithm for computing sample variances"
-  * (Chan et al., Stanford University, 12.1979)
-  */
-template <typename T, typename Op>
-class AggregateFunctionVarianceData
-{
-public:
-    void update(const IColumn & column, size_t row_num)
-    {
-        T received = assert_cast<const ColumnVector<T> &>(column).getData()[row_num];
-        Float64 val = static_cast<Float64>(received);
-        Float64 delta = val - mean;
-
-        ++count;
-        mean += delta / count;
-        m2 += delta * (val - mean);
-    }
-
-    void mergeWith(const AggregateFunctionVarianceData & source)
-    {
-        UInt64 total_count = count + source.count;
-        if (total_count == 0)
-            return;
-
-        Float64 factor = static_cast<Float64>(count * source.count) / total_count;
-        Float64 delta = mean - source.mean;
-
-        if (detail::areComparable(count, source.count))
-            mean = (source.count * source.mean + count * mean) / total_count;
-        else
-            mean = source.mean + delta * (static_cast<Float64>(count) / total_count);
-
-        m2 += source.m2 + delta * delta * factor;
-        count = total_count;
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        writeVarUInt(count, buf);
-        writeBinary(mean, buf);
-        writeBinary(m2, buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        readVarUInt(count, buf);
-        readBinary(mean, buf);
-        readBinary(m2, buf);
-    }
-
-    void publish(IColumn & to) const
-    {
-        assert_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(m2, count));
-    }
-
-private:
-    UInt64 count = 0;
-    Float64 mean = 0.0;
-    Float64 m2 = 0.0;
-};
-
-/** The main code for the implementation of varSamp, stddevSamp, varPop, stddevPop.
-  */
-template <typename T, typename Op>
-class AggregateFunctionVariance final
-    : public IAggregateFunctionDataHelper<AggregateFunctionVarianceData<T, Op>, AggregateFunctionVariance<T, Op>>
-{
-public:
-    explicit AggregateFunctionVariance(const DataTypePtr & arg)
-        : IAggregateFunctionDataHelper<AggregateFunctionVarianceData<T, Op>, AggregateFunctionVariance<T, Op>>({arg}, {}, std::make_shared<DataTypeFloat64>())
-    {}
-
-    String getName() const override { return Op::name; }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        this->data(place).update(*columns[0], row_num);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).mergeWith(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        this->data(place).publish(to);
-    }
-};
-
-/** Implementing the varSamp function.
-  */
-struct AggregateFunctionVarSampImpl
-{
-    static constexpr auto name = "varSampStable";
-
-    static inline Float64 apply(Float64 m2, UInt64 count)
-    {
-        if (count < 2)
-            return std::numeric_limits<Float64>::infinity();
-        else
-            return m2 / (count - 1);
-    }
-};
-
-/** Implementing the stddevSamp function.
-  */
-struct AggregateFunctionStdDevSampImpl
-{
-    static constexpr auto name = "stddevSampStable";
-
-    static inline Float64 apply(Float64 m2, UInt64 count)
-    {
-        return sqrt(AggregateFunctionVarSampImpl::apply(m2, count));
-    }
-};
-
-/** Implementing the varPop function.
-  */
-struct AggregateFunctionVarPopImpl
-{
-    static constexpr auto name = "varPopStable";
-
-    static inline Float64 apply(Float64 m2, UInt64 count)
-    {
-        if (count == 0)
-            return std::numeric_limits<Float64>::infinity();
-        else if (count == 1)
-            return 0.0;
-        else
-            return m2 / count;
-    }
-};
-
-/** Implementing the stddevPop function.
-  */
-struct AggregateFunctionStdDevPopImpl
-{
-    static constexpr auto name = "stddevPopStable";
-
-    static inline Float64 apply(Float64 m2, UInt64 count)
-    {
-        return sqrt(AggregateFunctionVarPopImpl::apply(m2, count));
-    }
-};
-
-/** If `compute_marginal_moments` flag is set this class provides the successor
-  * CovarianceData support of marginal moments for calculating the correlation.
-  */
-template <bool compute_marginal_moments>
-class BaseCovarianceData
-{
-protected:
-    void incrementMarginalMoments(Float64, Float64) {}
-    void mergeWith(const BaseCovarianceData &) {}
-    void serialize(WriteBuffer &) const {}
-    void deserialize(const ReadBuffer &) {}
-};
-
-template <>
-class BaseCovarianceData<true>
-{
-protected:
-    void incrementMarginalMoments(Float64 left_incr, Float64 right_incr)
-    {
-        left_m2 += left_incr;
-        right_m2 += right_incr;
-    }
-
-    void mergeWith(const BaseCovarianceData & source)
-    {
-        left_m2 += source.left_m2;
-        right_m2 += source.right_m2;
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        writeBinary(left_m2, buf);
-        writeBinary(right_m2, buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        readBinary(left_m2, buf);
-        readBinary(right_m2, buf);
-    }
-
-    Float64 left_m2 = 0.0;
-    Float64 right_m2 = 0.0;
-};
-
-/** Parallel and incremental algorithm for calculating covariance.
-  * Source: "Numerically Stable, Single-Pass, Parallel Statistics Algorithms"
-  * (J. Bennett et al., Sandia National Laboratories,
-  *  2009 IEEE International Conference on Cluster Computing)
-  */
-template <typename T, typename U, typename Op, bool compute_marginal_moments>
-class CovarianceData : public BaseCovarianceData<compute_marginal_moments>
-{
-private:
-    using Base = BaseCovarianceData<compute_marginal_moments>;
-
-public:
-    void update(const IColumn & column_left, const IColumn & column_right, size_t row_num)
-    {
-        T left_received = assert_cast<const ColumnVector<T> &>(column_left).getData()[row_num];
-        Float64 left_val = static_cast<Float64>(left_received);
-        Float64 left_delta = left_val - left_mean;
-
-        U right_received = assert_cast<const ColumnVector<U> &>(column_right).getData()[row_num];
-        Float64 right_val = static_cast<Float64>(right_received);
-        Float64 right_delta = right_val - right_mean;
-
-        Float64 old_right_mean = right_mean;
-
-        ++count;
-
-        left_mean += left_delta / count;
-        right_mean += right_delta / count;
-        co_moment += (left_val - left_mean) * (right_val - old_right_mean);
-
-        /// Update the marginal moments, if any.
-        if (compute_marginal_moments)
-        {
-            Float64 left_incr = left_delta * (left_val - left_mean);
-            Float64 right_incr = right_delta * (right_val - right_mean);
-            Base::incrementMarginalMoments(left_incr, right_incr);
-        }
-    }
-
-    void mergeWith(const CovarianceData & source)
-    {
-        UInt64 total_count = count + source.count;
-        if (total_count == 0)
-            return;
-
-        Float64 factor = static_cast<Float64>(count * source.count) / total_count;
-        Float64 left_delta = left_mean - source.left_mean;
-        Float64 right_delta = right_mean - source.right_mean;
-
-        if (detail::areComparable(count, source.count))
-        {
-            left_mean = (source.count * source.left_mean + count * left_mean) / total_count;
-            right_mean = (source.count * source.right_mean + count * right_mean) / total_count;
-        }
-        else
-        {
-            left_mean = source.left_mean + left_delta * (static_cast<Float64>(count) / total_count);
-            right_mean = source.right_mean + right_delta * (static_cast<Float64>(count) / total_count);
-        }
-
-        co_moment += source.co_moment + left_delta * right_delta * factor;
-        count = total_count;
-
-        /// Update the marginal moments, if any.
-        if (compute_marginal_moments)
-        {
-            Float64 left_incr = left_delta * left_delta * factor;
-            Float64 right_incr = right_delta * right_delta * factor;
-            Base::mergeWith(source);
-            Base::incrementMarginalMoments(left_incr, right_incr);
-        }
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        writeVarUInt(count, buf);
-        writeBinary(left_mean, buf);
-        writeBinary(right_mean, buf);
-        writeBinary(co_moment, buf);
-        Base::serialize(buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        readVarUInt(count, buf);
-        readBinary(left_mean, buf);
-        readBinary(right_mean, buf);
-        readBinary(co_moment, buf);
-        Base::deserialize(buf);
-    }
-
-    void publish(IColumn & to) const
-    {
-        if constexpr (compute_marginal_moments)
-            assert_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(co_moment, Base::left_m2, Base::right_m2, count));
-        else
-            assert_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(co_moment, count));
-    }
-
-private:
-    UInt64 count = 0;
-    Float64 left_mean = 0.0;
-    Float64 right_mean = 0.0;
-    Float64 co_moment = 0.0;
-};
-
-template <typename T, typename U, typename Op, bool compute_marginal_moments = false>
-class AggregateFunctionCovariance final
-    : public IAggregateFunctionDataHelper<
-        CovarianceData<T, U, Op, compute_marginal_moments>,
-        AggregateFunctionCovariance<T, U, Op, compute_marginal_moments>>
-{
-public:
-    explicit AggregateFunctionCovariance(const DataTypes & args) : IAggregateFunctionDataHelper<
-        CovarianceData<T, U, Op, compute_marginal_moments>,
-        AggregateFunctionCovariance<T, U, Op, compute_marginal_moments>>(args, {}, std::make_shared<DataTypeFloat64>())
-    {}
-
-    String getName() const override { return Op::name; }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        this->data(place).update(*columns[0], *columns[1], row_num);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).mergeWith(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        this->data(place).publish(to);
-    }
-};
-
-/** Implementing the covarSamp function.
-  */
-struct AggregateFunctionCovarSampImpl
-{
-    static constexpr auto name = "covarSampStable";
-
-    static inline Float64 apply(Float64 co_moment, UInt64 count)
-    {
-        if (count < 2)
-            return std::numeric_limits<Float64>::infinity();
-        else
-            return co_moment / (count - 1);
-    }
-};
-
-/** Implementing the covarPop function.
-  */
-struct AggregateFunctionCovarPopImpl
-{
-    static constexpr auto name = "covarPopStable";
-
-    static inline Float64 apply(Float64 co_moment, UInt64 count)
-    {
-        if (count == 0)
-            return std::numeric_limits<Float64>::infinity();
-        else if (count == 1)
-            return 0.0;
-        else
-            return co_moment / count;
-    }
-};
-
-/** `corr` function implementation.
-  */
-struct AggregateFunctionCorrImpl
-{
-    static constexpr auto name = "corrStable";
-
-    static inline Float64 apply(Float64 co_moment, Float64 left_m2, Float64 right_m2, UInt64 count)
-    {
-        if (count < 2)
-            return std::numeric_limits<Float64>::infinity();
-        else
-            return co_moment / sqrt(left_m2 * right_m2);
-    }
-};
-
-template <typename T>
-using AggregateFunctionVarSampStable = AggregateFunctionVariance<T, AggregateFunctionVarSampImpl>;
-
-template <typename T>
-using AggregateFunctionStddevSampStable = AggregateFunctionVariance<T, AggregateFunctionStdDevSampImpl>;
-
-template <typename T>
-using AggregateFunctionVarPopStable = AggregateFunctionVariance<T, AggregateFunctionVarPopImpl>;
-
-template <typename T>
-using AggregateFunctionStddevPopStable = AggregateFunctionVariance<T, AggregateFunctionStdDevPopImpl>;
-
-template <typename T, typename U>
-using AggregateFunctionCovarSampStable = AggregateFunctionCovariance<T, U, AggregateFunctionCovarSampImpl>;
-
-template <typename T, typename U>
-using AggregateFunctionCovarPopStable = AggregateFunctionCovariance<T, U, AggregateFunctionCovarPopImpl>;
-
-template <typename T, typename U>
-using AggregateFunctionCorrStable = AggregateFunctionCovariance<T, U, AggregateFunctionCorrImpl, true>;
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionSumCount.cpp b/src/AggregateFunctions/AggregateFunctionSumCount.cpp
index 946b5987c09..356794f81a3 100644
--- a/src/AggregateFunctions/AggregateFunctionSumCount.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumCount.cpp
@@ -1,7 +1,8 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionSumCount.h>
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <AggregateFunctions/AggregateFunctionAvg.h>
 
 
 namespace DB
@@ -16,6 +17,59 @@ namespace ErrorCodes
 
 namespace
 {
+
+template <typename T>
+class AggregateFunctionSumCount final : public AggregateFunctionAvg<T>
+{
+public:
+    using Base = AggregateFunctionAvg<T>;
+
+    explicit AggregateFunctionSumCount(const DataTypes & argument_types_, UInt32 num_scale_ = 0)
+        : Base(argument_types_, createResultType(num_scale_), num_scale_)
+    {}
+
+    static DataTypePtr createResultType(UInt32 num_scale_)
+    {
+        auto second_elem = std::make_shared<DataTypeUInt64>();
+        return std::make_shared<DataTypeTuple>(DataTypes{getReturnTypeFirstElement(num_scale_), std::move(second_elem)});
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const final
+    {
+        assert_cast<ColumnVectorOrDecimal<AvgFieldType<T>> &>((assert_cast<ColumnTuple &>(to)).getColumn(0)).getData().push_back(
+            this->data(place).numerator);
+
+        assert_cast<ColumnUInt64 &>((assert_cast<ColumnTuple &>(to)).getColumn(1)).getData().push_back(
+            this->data(place).denominator);
+    }
+
+    String getName() const final { return "sumCount"; }
+
+#if USE_EMBEDDED_COMPILER
+
+    bool isCompilable() const override
+    {
+        return false;
+    }
+
+#endif
+
+private:
+    static auto getReturnTypeFirstElement(UInt32 num_scale_)
+    {
+        using FieldType = AvgFieldType<T>;
+
+        if constexpr (!is_decimal<T>)
+            return std::make_shared<DataTypeNumber<FieldType>>();
+        else
+        {
+            using DataType = DataTypeDecimal<FieldType>;
+            return std::make_shared<DataType>(DataType::maxPrecision(), num_scale_);
+        }
+    }
+};
+
+
 bool allowType(const DataTypePtr& type) noexcept
 {
     const WhichDataType t(type);
diff --git a/src/AggregateFunctions/AggregateFunctionSumCount.h b/src/AggregateFunctions/AggregateFunctionSumCount.h
deleted file mode 100644
index 7058204ed74..00000000000
--- a/src/AggregateFunctions/AggregateFunctionSumCount.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#pragma once
-
-#include <type_traits>
-#include <DataTypes/DataTypeTuple.h>
-#include <AggregateFunctions/AggregateFunctionAvg.h>
-
-
-namespace DB
-{
-template <typename T>
-class AggregateFunctionSumCount final : public AggregateFunctionAvg<T>
-{
-public:
-    using Base = AggregateFunctionAvg<T>;
-
-    explicit AggregateFunctionSumCount(const DataTypes & argument_types_, UInt32 num_scale_ = 0)
-        : Base(argument_types_, createResultType(num_scale_), num_scale_)
-    {}
-
-    static DataTypePtr createResultType(UInt32 num_scale_)
-    {
-        auto second_elem = std::make_shared<DataTypeUInt64>();
-        return std::make_shared<DataTypeTuple>(DataTypes{getReturnTypeFirstElement(num_scale_), std::move(second_elem)});
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const final
-    {
-        assert_cast<ColumnVectorOrDecimal<AvgFieldType<T>> &>((assert_cast<ColumnTuple &>(to)).getColumn(0)).getData().push_back(
-            this->data(place).numerator);
-
-        assert_cast<ColumnUInt64 &>((assert_cast<ColumnTuple &>(to)).getColumn(1)).getData().push_back(
-            this->data(place).denominator);
-    }
-
-    String getName() const final { return "sumCount"; }
-
-#if USE_EMBEDDED_COMPILER
-
-    bool isCompilable() const override
-    {
-        return false;
-    }
-
-#endif
-
-private:
-    static auto getReturnTypeFirstElement(UInt32 num_scale_)
-    {
-        using FieldType = AvgFieldType<T>;
-
-        if constexpr (!is_decimal<T>)
-            return std::make_shared<DataTypeNumber<FieldType>>();
-        else
-        {
-            using DataType = DataTypeDecimal<FieldType>;
-            return std::make_shared<DataType>(DataType::maxPrecision(), num_scale_);
-        }
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp
index f7b3524d1b9..8630650b319 100644
--- a/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp
@@ -1,5 +1,4 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionTopK.h>
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <Common/FieldVisitorConvertToNumber.h>
@@ -7,6 +6,20 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeIPv4andIPv6.h>
 
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/ReadHelpersArena.h>
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeString.h>
+
+#include <Columns/ColumnArray.h>
+
+#include <Common/SpaceSaving.h>
+#include <Common/assert_cast.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
 
 namespace DB
 {
@@ -25,6 +38,229 @@ namespace ErrorCodes
 namespace
 {
 
+static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF;
+
+template <typename T>
+struct AggregateFunctionTopKData
+{
+    using Set = SpaceSaving<T, HashCRC32<T>>;
+
+    Set value;
+};
+
+
+template <typename T, bool is_weighted>
+class AggregateFunctionTopK
+    : public IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>
+{
+protected:
+    using State = AggregateFunctionTopKData<T>;
+    UInt64 threshold;
+    UInt64 reserved;
+
+public:
+    AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
+        : IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, createResultType(argument_types_))
+        , threshold(threshold_), reserved(load_factor * threshold)
+    {}
+
+    AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params, const DataTypePtr & result_type_)
+        : IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, result_type_)
+        , threshold(threshold_), reserved(load_factor * threshold)
+    {}
+
+    String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
+
+    static DataTypePtr createResultType(const DataTypes & argument_types_)
+    {
+        return std::make_shared<DataTypeArray>(argument_types_[0]);
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        auto & set = this->data(place).value;
+        if (set.capacity() != reserved)
+            set.resize(reserved);
+
+        if constexpr (is_weighted)
+            set.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num], columns[1]->getUInt(row_num));
+        else
+            set.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        auto & set = this->data(place).value;
+        if (set.capacity() != reserved)
+            set.resize(reserved);
+        set.merge(this->data(rhs).value);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).value.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
+    {
+        auto & set = this->data(place).value;
+        set.resize(reserved);
+        set.read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
+        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
+
+        const typename State::Set & set = this->data(place).value;
+        auto result_vec = set.topK(threshold);
+        size_t size = result_vec.size();
+
+        offsets_to.push_back(offsets_to.back() + size);
+
+        typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
+        size_t old_size = data_to.size();
+        data_to.resize(old_size + size);
+
+        size_t i = 0;
+        for (auto it = result_vec.begin(); it != result_vec.end(); ++it, ++i)
+            data_to[old_size + i] = it->key;
+    }
+};
+
+
+/// Generic implementation, it uses serialized representation as object descriptor.
+struct AggregateFunctionTopKGenericData
+{
+    using Set = SpaceSaving<StringRef, StringRefHash>;
+
+    Set value;
+};
+
+/** Template parameter with true value should be used for columns that store their elements in memory continuously.
+ *  For such columns topK() can be implemented more efficiently (especially for small numeric arrays).
+ */
+template <bool is_plain_column, bool is_weighted>
+class AggregateFunctionTopKGeneric
+    : public IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>
+{
+private:
+    using State = AggregateFunctionTopKGenericData;
+
+    UInt64 threshold;
+    UInt64 reserved;
+
+    static void deserializeAndInsert(StringRef str, IColumn & data_to);
+
+public:
+    AggregateFunctionTopKGeneric(
+        UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
+        : IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>(argument_types_, params, createResultType(argument_types_))
+        , threshold(threshold_), reserved(load_factor * threshold) {}
+
+    String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
+
+    static DataTypePtr createResultType(const DataTypes & argument_types_)
+    {
+        return std::make_shared<DataTypeArray>(argument_types_[0]);
+    }
+
+    bool allocatesMemoryInArena() const override
+    {
+        return true;
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).value.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        auto & set = this->data(place).value;
+        set.clear();
+
+        // Specialized here because there's no deserialiser for StringRef
+        size_t size = 0;
+        readVarUInt(size, buf);
+        if (unlikely(size > TOP_K_MAX_SIZE))
+            throw Exception(
+                ErrorCodes::ARGUMENT_OUT_OF_BOUND,
+                "Too large size ({}) for aggregate function '{}' state (maximum is {})",
+                size,
+                getName(),
+                TOP_K_MAX_SIZE);
+        set.resize(size);
+        for (size_t i = 0; i < size; ++i)
+        {
+            auto ref = readStringBinaryInto(*arena, buf);
+            UInt64 count;
+            UInt64 error;
+            readVarUInt(count, buf);
+            readVarUInt(error, buf);
+            set.insert(ref, count, error);
+            arena->rollback(ref.size);
+        }
+
+        set.readAlphaMap(buf);
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        auto & set = this->data(place).value;
+        if (set.capacity() != reserved)
+            set.resize(reserved);
+
+        if constexpr (is_plain_column)
+        {
+            if constexpr (is_weighted)
+                set.insert(columns[0]->getDataAt(row_num), columns[1]->getUInt(row_num));
+            else
+                set.insert(columns[0]->getDataAt(row_num));
+        }
+        else
+        {
+            const char * begin = nullptr;
+            StringRef str_serialized = columns[0]->serializeValueIntoArena(row_num, *arena, begin);
+            if constexpr (is_weighted)
+                set.insert(str_serialized, columns[1]->getUInt(row_num));
+            else
+                set.insert(str_serialized);
+            arena->rollback(str_serialized.size);
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        auto & set = this->data(place).value;
+        if (set.capacity() != reserved)
+            set.resize(reserved);
+        set.merge(this->data(rhs).value);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
+        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
+        IColumn & data_to = arr_to.getData();
+
+        auto result_vec = this->data(place).value.topK(threshold);
+        offsets_to.push_back(offsets_to.back() + result_vec.size());
+
+        for (auto & elem : result_vec)
+        {
+            if constexpr (is_plain_column)
+                data_to.insertData(elem.key.data, elem.key.size);
+            else
+                data_to.deserializeAndInsertFromArena(elem.key.data);
+        }
+    }
+};
+
+
 /// Substitute return type for Date and DateTime
 template <bool is_weighted>
 class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>
diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h
deleted file mode 100644
index 89c49b24530..00000000000
--- a/src/AggregateFunctions/AggregateFunctionTopK.h
+++ /dev/null
@@ -1,250 +0,0 @@
-#pragma once
-
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-#include <IO/ReadHelpersArena.h>
-
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-
-#include <Columns/ColumnArray.h>
-
-#include <Common/SpaceSaving.h>
-#include <Common/assert_cast.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-
-namespace DB
-{
-struct Settings;
-
-static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF;
-
-namespace ErrorCodes
-{
-    extern const int ARGUMENT_OUT_OF_BOUND;
-}
-
-template <typename T>
-struct AggregateFunctionTopKData
-{
-    using Set = SpaceSaving<T, HashCRC32<T>>;
-
-    Set value;
-};
-
-
-template <typename T, bool is_weighted>
-class AggregateFunctionTopK
-    : public IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>
-{
-protected:
-    using State = AggregateFunctionTopKData<T>;
-    UInt64 threshold;
-    UInt64 reserved;
-
-public:
-    AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
-        : IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, createResultType(argument_types_))
-        , threshold(threshold_), reserved(load_factor * threshold)
-    {}
-
-    AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params, const DataTypePtr & result_type_)
-        : IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, result_type_)
-        , threshold(threshold_), reserved(load_factor * threshold)
-    {}
-
-    String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
-
-    static DataTypePtr createResultType(const DataTypes & argument_types_)
-    {
-        return std::make_shared<DataTypeArray>(argument_types_[0]);
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        auto & set = this->data(place).value;
-        if (set.capacity() != reserved)
-            set.resize(reserved);
-
-        if constexpr (is_weighted)
-            set.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num], columns[1]->getUInt(row_num));
-        else
-            set.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        auto & set = this->data(place).value;
-        if (set.capacity() != reserved)
-            set.resize(reserved);
-        set.merge(this->data(rhs).value);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).value.write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
-    {
-        auto & set = this->data(place).value;
-        set.resize(reserved);
-        set.read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
-        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
-
-        const typename State::Set & set = this->data(place).value;
-        auto result_vec = set.topK(threshold);
-        size_t size = result_vec.size();
-
-        offsets_to.push_back(offsets_to.back() + size);
-
-        typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
-        size_t old_size = data_to.size();
-        data_to.resize(old_size + size);
-
-        size_t i = 0;
-        for (auto it = result_vec.begin(); it != result_vec.end(); ++it, ++i)
-            data_to[old_size + i] = it->key;
-    }
-};
-
-
-/// Generic implementation, it uses serialized representation as object descriptor.
-struct AggregateFunctionTopKGenericData
-{
-    using Set = SpaceSaving<StringRef, StringRefHash>;
-
-    Set value;
-};
-
-/** Template parameter with true value should be used for columns that store their elements in memory continuously.
- *  For such columns topK() can be implemented more efficiently (especially for small numeric arrays).
- */
-template <bool is_plain_column, bool is_weighted>
-class AggregateFunctionTopKGeneric
-    : public IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>
-{
-private:
-    using State = AggregateFunctionTopKGenericData;
-
-    UInt64 threshold;
-    UInt64 reserved;
-
-    static void deserializeAndInsert(StringRef str, IColumn & data_to);
-
-public:
-    AggregateFunctionTopKGeneric(
-        UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
-        : IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>(argument_types_, params, createResultType(argument_types_))
-        , threshold(threshold_), reserved(load_factor * threshold) {}
-
-    String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
-
-    static DataTypePtr createResultType(const DataTypes & argument_types_)
-    {
-        return std::make_shared<DataTypeArray>(argument_types_[0]);
-    }
-
-    bool allocatesMemoryInArena() const override
-    {
-        return true;
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).value.write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        auto & set = this->data(place).value;
-        set.clear();
-
-        // Specialized here because there's no deserialiser for StringRef
-        size_t size = 0;
-        readVarUInt(size, buf);
-        if (unlikely(size > TOP_K_MAX_SIZE))
-            throw Exception(
-                ErrorCodes::ARGUMENT_OUT_OF_BOUND,
-                "Too large size ({}) for aggregate function '{}' state (maximum is {})",
-                size,
-                getName(),
-                TOP_K_MAX_SIZE);
-        set.resize(size);
-        for (size_t i = 0; i < size; ++i)
-        {
-            auto ref = readStringBinaryInto(*arena, buf);
-            UInt64 count;
-            UInt64 error;
-            readVarUInt(count, buf);
-            readVarUInt(error, buf);
-            set.insert(ref, count, error);
-            arena->rollback(ref.size);
-        }
-
-        set.readAlphaMap(buf);
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        auto & set = this->data(place).value;
-        if (set.capacity() != reserved)
-            set.resize(reserved);
-
-        if constexpr (is_plain_column)
-        {
-            if constexpr (is_weighted)
-                set.insert(columns[0]->getDataAt(row_num), columns[1]->getUInt(row_num));
-            else
-                set.insert(columns[0]->getDataAt(row_num));
-        }
-        else
-        {
-            const char * begin = nullptr;
-            StringRef str_serialized = columns[0]->serializeValueIntoArena(row_num, *arena, begin);
-            if constexpr (is_weighted)
-                set.insert(str_serialized, columns[1]->getUInt(row_num));
-            else
-                set.insert(str_serialized);
-            arena->rollback(str_serialized.size);
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        auto & set = this->data(place).value;
-        if (set.capacity() != reserved)
-            set.resize(reserved);
-        set.merge(this->data(rhs).value);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
-        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
-        IColumn & data_to = arr_to.getData();
-
-        auto result_vec = this->data(place).value.topK(threshold);
-        offsets_to.push_back(offsets_to.back() + result_vec.size());
-
-        for (auto & elem : result_vec)
-        {
-            if constexpr (is_plain_column)
-                data_to.insertData(elem.key.data, elem.key.size);
-            else
-                data_to.deserializeAndInsertFromArena(elem.key.data);
-        }
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
index 3c1c916e377..1a8761cecb5 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
@@ -1,5 +1,3 @@
-#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
-
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Helpers.h>
 
@@ -10,6 +8,23 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeIPv4andIPv6.h>
 
+#include <base/bit_cast.h>
+
+#include <Common/CombinedCardinalityEstimator.h>
+#include <Common/SipHash.h>
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/UniqCombinedBiasData.h>
+#include <AggregateFunctions/UniqVariadicHash.h>
+
+#include <Columns/ColumnVector.h>
+
 #include <functional>
 
 
@@ -26,119 +41,332 @@ namespace ErrorCodes
 
 namespace
 {
-    template <UInt8 K, typename HashValueType>
-    struct WithK
+
+/** Hash function for uniqCombined/uniqCombined64 (based on Ret).
+ */
+template <typename T, typename Ret>
+struct AggregateFunctionUniqCombinedTraits
+{
+    static Ret hash(T x)
     {
-        template <typename T>
-        using AggregateFunction = AggregateFunctionUniqCombined<T, K, HashValueType>;
-
-        template <bool is_exact, bool argument_is_tuple>
-        using AggregateFunctionVariadic = AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>;
-    };
-
-    template <UInt8 K, typename HashValueType>
-    AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types, const Array & params)
-    {
-        /// We use exact hash function if the arguments are not contiguous in memory, because only exact hash function has support for this case.
-        bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types);
-
-        if (argument_types.size() == 1)
-        {
-            const IDataType & argument_type = *argument_types[0];
-
-            AggregateFunctionPtr res(createWithNumericType<WithK<K, HashValueType>::template AggregateFunction>(*argument_types[0], argument_types, params));
-
-            WhichDataType which(argument_type);
-            if (res)
-                return res;
-            else if (which.isDate())
-                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate::FieldType>>(argument_types, params);
-            else if (which.isDate32())
-                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate32::FieldType>>(argument_types, params);
-            else if (which.isDateTime())
-                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDateTime::FieldType>>(argument_types, params);
-            else if (which.isStringOrFixedString())
-                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<String>>(argument_types, params);
-            else if (which.isUUID())
-                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeUUID::FieldType>>(argument_types, params);
-            else if (which.isIPv4())
-                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeIPv4::FieldType>>(argument_types, params);
-            else if (which.isIPv6())
-                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeIPv6::FieldType>>(argument_types, params);
-            else if (which.isTuple())
-            {
-                if (use_exact_hash_function)
-                    return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<true, true>>(argument_types, params);
-                else
-                    return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<false, true>>(argument_types, params);
-            }
-        }
-
-        /// "Variadic" method also works as a fallback generic case for a single argument.
-        if (use_exact_hash_function)
-            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<true, false>>(argument_types, params);
+        if constexpr (sizeof(T) > sizeof(UInt64))
+            return static_cast<Ret>(DefaultHash64<T>(x));
         else
-            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<false, false>>(argument_types, params);
+            return static_cast<Ret>(intHash64(x));
     }
+};
 
-    template <UInt8 K>
-    AggregateFunctionPtr createAggregateFunctionWithHashType(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params)
+template <typename Ret>
+struct AggregateFunctionUniqCombinedTraits<UInt128, Ret>
+{
+    static Ret hash(UInt128 x)
     {
-        if (use_64_bit_hash)
-            return createAggregateFunctionWithK<K, UInt64>(argument_types, params);
+        return static_cast<Ret>(sipHash64(x));
+    }
+};
+
+template <typename Ret>
+struct AggregateFunctionUniqCombinedTraits<Float32, Ret>
+{
+    static Ret hash(Float32 x)
+    {
+        UInt64 res = bit_cast<UInt64>(x);
+        return static_cast<Ret>(intHash64(res));
+    }
+};
+
+template <typename Ret>
+struct AggregateFunctionUniqCombinedTraits<Float64, Ret>
+{
+    static Ret hash(Float64 x)
+    {
+        UInt64 res = bit_cast<UInt64>(x);
+        return static_cast<Ret>(intHash64(res));
+    }
+};
+
+// Unlike HashTableGrower always grows to power of 2.
+struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<>
+{
+    void increaseSize() { increaseSizeDegree(1); }
+};
+
+template <typename Key, UInt8 K>
+struct AggregateFunctionUniqCombinedDataWithKey
+{
+    // TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|,
+    //                   at the moment gen-bias-data.py script doesn't work.
+
+    // We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
+    // The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
+    // For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
+    using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>, 16, K - 5 + (sizeof(Key) == sizeof(UInt32)), K, TrivialHash, Key>;
+
+    Set set;
+};
+
+template <typename Key>
+struct AggregateFunctionUniqCombinedDataWithKey<Key, 17>
+{
+    using Set = CombinedCardinalityEstimator<Key,
+        HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
+        16,
+        12 + (sizeof(Key) == sizeof(UInt32)),
+        17,
+        TrivialHash,
+        Key,
+        HyperLogLogBiasEstimator<UniqCombinedBiasData>,
+        HyperLogLogMode::FullFeatured>;
+
+    Set set;
+};
+
+
+template <typename T, UInt8 K, typename HashValueType>
+struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey<HashValueType, K>
+{
+};
+
+
+/// For String keys, 64 bit hash is always used (both for uniqCombined and uniqCombined64),
+///  because of backwards compatibility (64 bit hash was already used for uniqCombined).
+template <UInt8 K, typename HashValueType>
+struct AggregateFunctionUniqCombinedData<String, K, HashValueType> : public AggregateFunctionUniqCombinedDataWithKey<UInt64 /*always*/, K>
+{
+};
+
+template <UInt8 K, typename HashValueType>
+struct AggregateFunctionUniqCombinedData<IPv6, K, HashValueType> : public AggregateFunctionUniqCombinedDataWithKey<UInt64 /*always*/, K>
+{
+};
+
+template <typename T, UInt8 K, typename HashValueType>
+class AggregateFunctionUniqCombined final
+    : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>
+{
+public:
+    AggregateFunctionUniqCombined(const DataTypes & argument_types_, const Array & params_)
+        : IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>(argument_types_, params_, std::make_shared<DataTypeUInt64>())
+    {}
+
+    String getName() const override
+    {
+        if constexpr (std::is_same_v<HashValueType, UInt64>)
+            return "uniqCombined64";
         else
-            return createAggregateFunctionWithK<K, UInt32>(argument_types, params);
+            return "uniqCombined";
     }
 
-    AggregateFunctionPtr createAggregateFunctionUniqCombined(bool use_64_bit_hash,
-        const std::string & name, const DataTypes & argument_types, const Array & params)
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        /// log2 of the number of cells in HyperLogLog.
-        /// Reasonable default value, selected to be comparable in quality with "uniq" aggregate function.
-        UInt8 precision = 17;
-
-        if (!params.empty())
+        if constexpr (std::is_same_v<T, String> || std::is_same_v<T, IPv6>)
         {
-            if (params.size() != 1)
-                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one parameter or less.",
-                    name);
-
-            UInt64 precision_param = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
-            // This range is hardcoded below
-            if (precision_param > 20 || precision_param < 12)
-                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter for aggregate function {} is out of range: [12, 20].",
-                    name);
-            precision = precision_param;
+            StringRef value = columns[0]->getDataAt(row_num);
+            this->data(place).set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
         }
-
-        if (argument_types.empty())
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments for aggregate function {}", name);
-
-        switch (precision) // NOLINT(bugprone-switch-missing-default-case)
+        else
         {
-            case 12:
-                return createAggregateFunctionWithHashType<12>(use_64_bit_hash, argument_types, params);
-            case 13:
-                return createAggregateFunctionWithHashType<13>(use_64_bit_hash, argument_types, params);
-            case 14:
-                return createAggregateFunctionWithHashType<14>(use_64_bit_hash, argument_types, params);
-            case 15:
-                return createAggregateFunctionWithHashType<15>(use_64_bit_hash, argument_types, params);
-            case 16:
-                return createAggregateFunctionWithHashType<16>(use_64_bit_hash, argument_types, params);
-            case 17:
-                return createAggregateFunctionWithHashType<17>(use_64_bit_hash, argument_types, params);
-            case 18:
-                return createAggregateFunctionWithHashType<18>(use_64_bit_hash, argument_types, params);
-            case 19:
-                return createAggregateFunctionWithHashType<19>(use_64_bit_hash, argument_types, params);
-            case 20:
-                return createAggregateFunctionWithHashType<20>(use_64_bit_hash, argument_types, params);
+            const auto & value = assert_cast<const ColumnVector<T> &>(*columns[0]).getElement(row_num);
+            this->data(place).set.insert(AggregateFunctionUniqCombinedTraits<T, HashValueType>::hash(value));
         }
-
-        UNREACHABLE();
     }
 
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).set.merge(this->data(rhs).set);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).set.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).set.read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
+    }
+};
+
+/** For multiple arguments. To compute, hashes them.
+  * You can pass multiple arguments as is; You can also pass one argument - a tuple.
+  * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples.
+  */
+template <bool is_exact, bool argument_is_tuple, UInt8 K, typename HashValueType>
+class AggregateFunctionUniqCombinedVariadic final : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
+                                                           AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>
+{
+private:
+    size_t num_args = 0;
+
+public:
+    explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
+            AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>(arguments, params, std::make_shared<DataTypeUInt64>())
+    {
+        if (argument_is_tuple)
+            num_args = typeid_cast<const DataTypeTuple &>(*arguments[0]).getElements().size();
+        else
+            num_args = arguments.size();
+    }
+
+    String getName() const override
+    {
+        if constexpr (std::is_same_v<HashValueType, UInt64>)
+            return "uniqCombined64";
+        else
+            return "uniqCombined";
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        this->data(place).set.insert(typename AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>::Set::value_type(
+            UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).set.merge(this->data(rhs).set);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).set.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
+    {
+        this->data(place).set.read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
+    }
+};
+
+
+template <UInt8 K, typename HashValueType>
+struct WithK
+{
+    template <typename T>
+    using AggregateFunction = AggregateFunctionUniqCombined<T, K, HashValueType>;
+
+    template <bool is_exact, bool argument_is_tuple>
+    using AggregateFunctionVariadic = AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>;
+};
+
+template <UInt8 K, typename HashValueType>
+AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types, const Array & params)
+{
+    /// We use exact hash function if the arguments are not contiguous in memory, because only exact hash function has support for this case.
+    bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types);
+
+    if (argument_types.size() == 1)
+    {
+        const IDataType & argument_type = *argument_types[0];
+
+        AggregateFunctionPtr res(createWithNumericType<WithK<K, HashValueType>::template AggregateFunction>(*argument_types[0], argument_types, params));
+
+        WhichDataType which(argument_type);
+        if (res)
+            return res;
+        else if (which.isDate())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate::FieldType>>(argument_types, params);
+        else if (which.isDate32())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate32::FieldType>>(argument_types, params);
+        else if (which.isDateTime())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDateTime::FieldType>>(argument_types, params);
+        else if (which.isStringOrFixedString())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<String>>(argument_types, params);
+        else if (which.isUUID())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeUUID::FieldType>>(argument_types, params);
+        else if (which.isIPv4())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeIPv4::FieldType>>(argument_types, params);
+        else if (which.isIPv6())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeIPv6::FieldType>>(argument_types, params);
+        else if (which.isTuple())
+        {
+            if (use_exact_hash_function)
+                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<true, true>>(argument_types, params);
+            else
+                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<false, true>>(argument_types, params);
+        }
+    }
+
+    /// "Variadic" method also works as a fallback generic case for a single argument.
+    if (use_exact_hash_function)
+        return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<true, false>>(argument_types, params);
+    else
+        return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<false, false>>(argument_types, params);
+}
+
+template <UInt8 K>
+AggregateFunctionPtr createAggregateFunctionWithHashType(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params)
+{
+    if (use_64_bit_hash)
+        return createAggregateFunctionWithK<K, UInt64>(argument_types, params);
+    else
+        return createAggregateFunctionWithK<K, UInt32>(argument_types, params);
+}
+
+AggregateFunctionPtr createAggregateFunctionUniqCombined(bool use_64_bit_hash,
+    const std::string & name, const DataTypes & argument_types, const Array & params)
+{
+    /// log2 of the number of cells in HyperLogLog.
+    /// Reasonable default value, selected to be comparable in quality with "uniq" aggregate function.
+    UInt8 precision = 17;
+
+    if (!params.empty())
+    {
+        if (params.size() != 1)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one parameter or less.",
+                name);
+
+        UInt64 precision_param = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
+        // This range is hardcoded below
+        if (precision_param > 20 || precision_param < 12)
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter for aggregate function {} is out of range: [12, 20].",
+                name);
+        precision = precision_param;
+    }
+
+    if (argument_types.empty())
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments for aggregate function {}", name);
+
+    switch (precision) // NOLINT(bugprone-switch-missing-default-case)
+    {
+        case 12:
+            return createAggregateFunctionWithHashType<12>(use_64_bit_hash, argument_types, params);
+        case 13:
+            return createAggregateFunctionWithHashType<13>(use_64_bit_hash, argument_types, params);
+        case 14:
+            return createAggregateFunctionWithHashType<14>(use_64_bit_hash, argument_types, params);
+        case 15:
+            return createAggregateFunctionWithHashType<15>(use_64_bit_hash, argument_types, params);
+        case 16:
+            return createAggregateFunctionWithHashType<16>(use_64_bit_hash, argument_types, params);
+        case 17:
+            return createAggregateFunctionWithHashType<17>(use_64_bit_hash, argument_types, params);
+        case 18:
+            return createAggregateFunctionWithHashType<18>(use_64_bit_hash, argument_types, params);
+        case 19:
+            return createAggregateFunctionWithHashType<19>(use_64_bit_hash, argument_types, params);
+        case 20:
+            return createAggregateFunctionWithHashType<20>(use_64_bit_hash, argument_types, params);
+    }
+
+    UNREACHABLE();
+}
+
 }
 
 void registerAggregateFunctionUniqCombined(AggregateFunctionFactory & factory)
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h
deleted file mode 100644
index 5e8fa69f9de..00000000000
--- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h
+++ /dev/null
@@ -1,240 +0,0 @@
-#pragma once
-
-#include <base/bit_cast.h>
-
-#include <Common/CombinedCardinalityEstimator.h>
-#include <Common/SipHash.h>
-#include <Common/typeid_cast.h>
-#include <Common/assert_cast.h>
-
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeUUID.h>
-#include <DataTypes/DataTypesNumber.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/UniqCombinedBiasData.h>
-#include <AggregateFunctions/UniqVariadicHash.h>
-
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnsNumber.h>
-
-
-namespace DB
-{
-struct Settings;
-namespace detail
-{
-    /** Hash function for uniqCombined/uniqCombined64 (based on Ret).
-     */
-    template <typename T, typename Ret>
-    struct AggregateFunctionUniqCombinedTraits
-    {
-        static Ret hash(T x)
-        {
-            if constexpr (sizeof(T) > sizeof(UInt64))
-                return static_cast<Ret>(DefaultHash64<T>(x));
-            else
-                return static_cast<Ret>(intHash64(x));
-        }
-    };
-
-    template <typename Ret>
-    struct AggregateFunctionUniqCombinedTraits<UInt128, Ret>
-    {
-        static Ret hash(UInt128 x)
-        {
-            return static_cast<Ret>(sipHash64(x));
-        }
-    };
-
-    template <typename Ret>
-    struct AggregateFunctionUniqCombinedTraits<Float32, Ret>
-    {
-        static Ret hash(Float32 x)
-        {
-            UInt64 res = bit_cast<UInt64>(x);
-            return static_cast<Ret>(intHash64(res));
-        }
-    };
-
-    template <typename Ret>
-    struct AggregateFunctionUniqCombinedTraits<Float64, Ret>
-    {
-        static Ret hash(Float64 x)
-        {
-            UInt64 res = bit_cast<UInt64>(x);
-            return static_cast<Ret>(intHash64(res));
-        }
-    };
-
-}
-
-// Unlike HashTableGrower always grows to power of 2.
-struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<>
-{
-    void increaseSize() { increaseSizeDegree(1); }
-};
-
-template <typename Key, UInt8 K>
-struct AggregateFunctionUniqCombinedDataWithKey
-{
-    // TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|,
-    //                   at the moment gen-bias-data.py script doesn't work.
-
-    // We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
-    // The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
-    // For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
-    using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>, 16, K - 5 + (sizeof(Key) == sizeof(UInt32)), K, TrivialHash, Key>;
-
-    Set set;
-};
-
-template <typename Key>
-struct AggregateFunctionUniqCombinedDataWithKey<Key, 17>
-{
-    using Set = CombinedCardinalityEstimator<Key,
-        HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
-        16,
-        12 + (sizeof(Key) == sizeof(UInt32)),
-        17,
-        TrivialHash,
-        Key,
-        HyperLogLogBiasEstimator<UniqCombinedBiasData>,
-        HyperLogLogMode::FullFeatured>;
-
-    Set set;
-};
-
-
-template <typename T, UInt8 K, typename HashValueType>
-struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey<HashValueType, K>
-{
-};
-
-
-/// For String keys, 64 bit hash is always used (both for uniqCombined and uniqCombined64),
-///  because of backwards compatibility (64 bit hash was already used for uniqCombined).
-template <UInt8 K, typename HashValueType>
-struct AggregateFunctionUniqCombinedData<String, K, HashValueType> : public AggregateFunctionUniqCombinedDataWithKey<UInt64 /*always*/, K>
-{
-};
-
-template <UInt8 K, typename HashValueType>
-struct AggregateFunctionUniqCombinedData<IPv6, K, HashValueType> : public AggregateFunctionUniqCombinedDataWithKey<UInt64 /*always*/, K>
-{
-};
-
-template <typename T, UInt8 K, typename HashValueType>
-class AggregateFunctionUniqCombined final
-    : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>
-{
-public:
-    AggregateFunctionUniqCombined(const DataTypes & argument_types_, const Array & params_)
-        : IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>(argument_types_, params_, std::make_shared<DataTypeUInt64>())
-    {}
-
-    String getName() const override
-    {
-        if constexpr (std::is_same_v<HashValueType, UInt64>)
-            return "uniqCombined64";
-        else
-            return "uniqCombined";
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        if constexpr (std::is_same_v<T, String> || std::is_same_v<T, IPv6>)
-        {
-            StringRef value = columns[0]->getDataAt(row_num);
-            this->data(place).set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
-        }
-        else
-        {
-            const auto & value = assert_cast<const ColumnVector<T> &>(*columns[0]).getElement(row_num);
-            this->data(place).set.insert(detail::AggregateFunctionUniqCombinedTraits<T, HashValueType>::hash(value));
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).set.merge(this->data(rhs).set);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).set.write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).set.read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
-    }
-};
-
-/** For multiple arguments. To compute, hashes them.
-  * You can pass multiple arguments as is; You can also pass one argument - a tuple.
-  * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples.
-  */
-template <bool is_exact, bool argument_is_tuple, UInt8 K, typename HashValueType>
-class AggregateFunctionUniqCombinedVariadic final : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
-                                                           AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>
-{
-private:
-    size_t num_args = 0;
-
-public:
-    explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
-            AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>(arguments, params, std::make_shared<DataTypeUInt64>())
-    {
-        if (argument_is_tuple)
-            num_args = typeid_cast<const DataTypeTuple &>(*arguments[0]).getElements().size();
-        else
-            num_args = arguments.size();
-    }
-
-    String getName() const override
-    {
-        if constexpr (std::is_same_v<HashValueType, UInt64>)
-            return "uniqCombined64";
-        else
-            return "uniqCombined";
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        this->data(place).set.insert(typename AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>::Set::value_type(
-            UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).set.merge(this->data(rhs).set);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).set.write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
-    {
-        this->data(place).set.read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp
index 9fc3a05b1c5..4e99aa98c36 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp
+++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp
@@ -1,12 +1,24 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/Helpers.h>
-#include <AggregateFunctions/AggregateFunctionUniqUpTo.h>
 #include <Common/FieldVisitorConvertToNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDate32.h>
 #include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeFixedString.h>
+
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/UniqVariadicHash.h>
+
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeUUID.h>
+
+#include <Columns/ColumnsNumber.h>
+
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
 
 
 namespace DB
@@ -24,6 +36,258 @@ namespace ErrorCodes
 namespace
 {
 
+/** Counts the number of unique values up to no more than specified in the parameter.
+  *
+  * Example: uniqUpTo(3)(UserID)
+  * - will count the number of unique visitors, return 1, 2, 3 or 4 if visitors > = 4.
+  *
+  * For strings, a non-cryptographic hash function is used, due to which the calculation may be a bit inaccurate.
+  */
+
+template <typename T>
+struct AggregateFunctionUniqUpToData
+{
+/** If count == threshold + 1 - this means that it is "overflowed" (values greater than threshold).
+  * In this case (for example, after calling the merge function), the `data` array does not necessarily contain the initialized values
+  * - example: combine a state in which there are few values, with another state that has overflowed;
+  *   then set count to `threshold + 1`, and values from another state are not copied.
+  */
+    UInt8 count = 0;
+    char data_ptr[0];
+
+    T load(size_t i) const
+    {
+        return unalignedLoad<T>(data_ptr + i * sizeof(T));
+    }
+
+    void store(size_t i, const T & x)
+    {
+        unalignedStore<T>(data_ptr + i * sizeof(T), x);
+    }
+
+    size_t size() const
+    {
+        return count;
+    }
+
+    /// threshold - for how many elements there is room in a `data`.
+    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
+    void ALWAYS_INLINE insert(T x, UInt8 threshold)
+    {
+        /// The state is already full - nothing needs to be done.
+        if (count > threshold)
+            return;
+
+        /// Linear search for the matching element.
+        for (size_t i = 0; i < count; ++i)
+            if (load(i) == x)
+                return;
+
+        /// Did not find the matching element. If there is room for one more element, insert it.
+        if (count < threshold)
+            store(count, x);
+
+        /// After increasing count, the state may be overflowed.
+        ++count;
+    }
+
+    void merge(const AggregateFunctionUniqUpToData<T> & rhs, UInt8 threshold)
+    {
+        if (count > threshold)
+            return;
+
+        if (rhs.count > threshold)
+        {
+        /// If `rhs` is overflowed, then set `count` too also overflowed for the current state.
+            count = rhs.count;
+            return;
+        }
+
+        for (size_t i = 0; i < rhs.count; ++i)
+            insert(rhs.load(i), threshold);
+    }
+
+    void write(WriteBuffer & wb, UInt8 threshold) const
+    {
+        writeBinary(count, wb);
+
+        /// Write values only if the state is not overflowed. Otherwise, they are not needed, and only the fact that the state is overflowed is important.
+        if (count <= threshold)
+            wb.write(data_ptr, count * sizeof(T));
+    }
+
+    void read(ReadBuffer & rb, UInt8 threshold)
+    {
+        readBinary(count, rb);
+
+        if (count <= threshold)
+            rb.readStrict(data_ptr, count * sizeof(T));
+    }
+
+    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
+    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
+    {
+        insert(assert_cast<const ColumnVector<T> &>(column).getData()[row_num], threshold);
+    }
+};
+
+
+/// For strings, their hashes are remembered.
+template <>
+struct AggregateFunctionUniqUpToData<String> : AggregateFunctionUniqUpToData<UInt64>
+{
+    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
+    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
+    {
+        /// Keep in mind that calculations are approximate.
+        StringRef value = column.getDataAt(row_num);
+        insert(CityHash_v1_0_2::CityHash64(value.data, value.size), threshold);
+    }
+};
+
+template <>
+struct AggregateFunctionUniqUpToData<UInt128> : AggregateFunctionUniqUpToData<UInt64>
+{
+    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
+    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
+    {
+        UInt128 value = assert_cast<const ColumnVector<UInt128> &>(column).getData()[row_num];
+        insert(sipHash64(value), threshold);
+    }
+};
+
+template <>
+struct AggregateFunctionUniqUpToData<UInt256> : AggregateFunctionUniqUpToData<UInt64>
+{
+    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
+    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
+    {
+        UInt256 value = assert_cast<const ColumnVector<UInt256> &>(column).getData()[row_num];
+        insert(sipHash64(value), threshold);
+    }
+};
+
+template <>
+struct AggregateFunctionUniqUpToData<Int256> : AggregateFunctionUniqUpToData<UInt64>
+{
+    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
+    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
+    {
+        Int256 value = assert_cast<const ColumnVector<Int256> &>(column).getData()[row_num];
+        insert(sipHash64(value), threshold);
+    }
+};
+
+
+template <typename T>
+class AggregateFunctionUniqUpTo final : public IAggregateFunctionDataHelper<AggregateFunctionUniqUpToData<T>, AggregateFunctionUniqUpTo<T>>
+{
+private:
+    UInt8 threshold;
+
+public:
+    AggregateFunctionUniqUpTo(UInt8 threshold_, const DataTypes & argument_types_, const Array & params_)
+        : IAggregateFunctionDataHelper<AggregateFunctionUniqUpToData<T>, AggregateFunctionUniqUpTo<T>>(argument_types_, params_, std::make_shared<DataTypeUInt64>())
+        , threshold(threshold_)
+    {
+    }
+
+    size_t sizeOfData() const override
+    {
+        return sizeof(AggregateFunctionUniqUpToData<T>) + sizeof(T) * threshold;
+    }
+
+    String getName() const override { return "uniqUpTo"; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
+    void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        this->data(place).add(*columns[0], row_num, threshold);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs), threshold);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).write(buf, threshold);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).read(buf, threshold);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).size());
+    }
+};
+
+
+/** For multiple arguments. To compute, hashes them.
+  * You can pass multiple arguments as is; You can also pass one argument - a tuple.
+  * But (for the possibility of effective implementation), you can not pass several arguments, among which there are tuples.
+  */
+template <bool is_exact, bool argument_is_tuple>
+class AggregateFunctionUniqUpToVariadic final
+    : public IAggregateFunctionDataHelper<AggregateFunctionUniqUpToData<UInt64>, AggregateFunctionUniqUpToVariadic<is_exact, argument_is_tuple>>
+{
+private:
+    size_t num_args = 0;
+    UInt8 threshold;
+
+public:
+    AggregateFunctionUniqUpToVariadic(const DataTypes & arguments, const Array & params, UInt8 threshold_)
+        : IAggregateFunctionDataHelper<AggregateFunctionUniqUpToData<UInt64>, AggregateFunctionUniqUpToVariadic<is_exact, argument_is_tuple>>(arguments, params, std::make_shared<DataTypeUInt64>())
+        , threshold(threshold_)
+    {
+        if (argument_is_tuple)
+            num_args = typeid_cast<const DataTypeTuple &>(*arguments[0]).getElements().size();
+        else
+            num_args = arguments.size();
+    }
+
+    size_t sizeOfData() const override
+    {
+        return sizeof(AggregateFunctionUniqUpToData<UInt64>) + sizeof(UInt64) * threshold;
+    }
+
+    String getName() const override { return "uniqUpTo"; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        this->data(place).insert(UInt64(UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)), threshold);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs), threshold);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).write(buf, threshold);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
+    {
+        this->data(place).read(buf, threshold);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).size());
+    }
+};
+
+
 constexpr UInt8 uniq_upto_max_threshold = 100;
 
 
diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
deleted file mode 100644
index 377f2580070..00000000000
--- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
+++ /dev/null
@@ -1,278 +0,0 @@
-#pragma once
-
-#include <base/unaligned.h>
-
-#include <Common/typeid_cast.h>
-#include <Common/assert_cast.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/UniqVariadicHash.h>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeUUID.h>
-
-#include <Columns/ColumnsNumber.h>
-
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-
-
-namespace DB
-{
-struct Settings;
-
-
-/** Counts the number of unique values up to no more than specified in the parameter.
-  *
-  * Example: uniqUpTo(3)(UserID)
-  * - will count the number of unique visitors, return 1, 2, 3 or 4 if visitors > = 4.
-  *
-  * For strings, a non-cryptographic hash function is used, due to which the calculation may be a bit inaccurate.
-  */
-
-template <typename T>
-struct AggregateFunctionUniqUpToData
-{
-/** If count == threshold + 1 - this means that it is "overflowed" (values greater than threshold).
-  * In this case (for example, after calling the merge function), the `data` array does not necessarily contain the initialized values
-  * - example: combine a state in which there are few values, with another state that has overflowed;
-  *   then set count to `threshold + 1`, and values from another state are not copied.
-  */
-    UInt8 count = 0;
-    char data_ptr[0];
-
-    T load(size_t i) const
-    {
-        return unalignedLoad<T>(data_ptr + i * sizeof(T));
-    }
-
-    void store(size_t i, const T & x)
-    {
-        unalignedStore<T>(data_ptr + i * sizeof(T), x);
-    }
-
-    size_t size() const
-    {
-        return count;
-    }
-
-    /// threshold - for how many elements there is room in a `data`.
-    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
-    void ALWAYS_INLINE insert(T x, UInt8 threshold)
-    {
-        /// The state is already full - nothing needs to be done.
-        if (count > threshold)
-            return;
-
-        /// Linear search for the matching element.
-        for (size_t i = 0; i < count; ++i)
-            if (load(i) == x)
-                return;
-
-        /// Did not find the matching element. If there is room for one more element, insert it.
-        if (count < threshold)
-            store(count, x);
-
-        /// After increasing count, the state may be overflowed.
-        ++count;
-    }
-
-    void merge(const AggregateFunctionUniqUpToData<T> & rhs, UInt8 threshold)
-    {
-        if (count > threshold)
-            return;
-
-        if (rhs.count > threshold)
-        {
-        /// If `rhs` is overflowed, then set `count` too also overflowed for the current state.
-            count = rhs.count;
-            return;
-        }
-
-        for (size_t i = 0; i < rhs.count; ++i)
-            insert(rhs.load(i), threshold);
-    }
-
-    void write(WriteBuffer & wb, UInt8 threshold) const
-    {
-        writeBinary(count, wb);
-
-        /// Write values only if the state is not overflowed. Otherwise, they are not needed, and only the fact that the state is overflowed is important.
-        if (count <= threshold)
-            wb.write(data_ptr, count * sizeof(T));
-    }
-
-    void read(ReadBuffer & rb, UInt8 threshold)
-    {
-        readBinary(count, rb);
-
-        if (count <= threshold)
-            rb.readStrict(data_ptr, count * sizeof(T));
-    }
-
-    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
-    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
-    {
-        insert(assert_cast<const ColumnVector<T> &>(column).getData()[row_num], threshold);
-    }
-};
-
-
-/// For strings, their hashes are remembered.
-template <>
-struct AggregateFunctionUniqUpToData<String> : AggregateFunctionUniqUpToData<UInt64>
-{
-    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
-    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
-    {
-        /// Keep in mind that calculations are approximate.
-        StringRef value = column.getDataAt(row_num);
-        insert(CityHash_v1_0_2::CityHash64(value.data, value.size), threshold);
-    }
-};
-
-template <>
-struct AggregateFunctionUniqUpToData<UInt128> : AggregateFunctionUniqUpToData<UInt64>
-{
-    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
-    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
-    {
-        UInt128 value = assert_cast<const ColumnVector<UInt128> &>(column).getData()[row_num];
-        insert(sipHash64(value), threshold);
-    }
-};
-
-template <>
-struct AggregateFunctionUniqUpToData<UInt256> : AggregateFunctionUniqUpToData<UInt64>
-{
-    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
-    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
-    {
-        UInt256 value = assert_cast<const ColumnVector<UInt256> &>(column).getData()[row_num];
-        insert(sipHash64(value), threshold);
-    }
-};
-
-template <>
-struct AggregateFunctionUniqUpToData<Int256> : AggregateFunctionUniqUpToData<UInt64>
-{
-    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
-    void ALWAYS_INLINE add(const IColumn & column, size_t row_num, UInt8 threshold)
-    {
-        Int256 value = assert_cast<const ColumnVector<Int256> &>(column).getData()[row_num];
-        insert(sipHash64(value), threshold);
-    }
-};
-
-
-template <typename T>
-class AggregateFunctionUniqUpTo final : public IAggregateFunctionDataHelper<AggregateFunctionUniqUpToData<T>, AggregateFunctionUniqUpTo<T>>
-{
-private:
-    UInt8 threshold;
-
-public:
-    AggregateFunctionUniqUpTo(UInt8 threshold_, const DataTypes & argument_types_, const Array & params_)
-        : IAggregateFunctionDataHelper<AggregateFunctionUniqUpToData<T>, AggregateFunctionUniqUpTo<T>>(argument_types_, params_, std::make_shared<DataTypeUInt64>())
-        , threshold(threshold_)
-    {
-    }
-
-    size_t sizeOfData() const override
-    {
-        return sizeof(AggregateFunctionUniqUpToData<T>) + sizeof(T) * threshold;
-    }
-
-    String getName() const override { return "uniqUpTo"; }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
-    void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        this->data(place).add(*columns[0], row_num, threshold);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs), threshold);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).write(buf, threshold);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).read(buf, threshold);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).size());
-    }
-};
-
-
-/** For multiple arguments. To compute, hashes them.
-  * You can pass multiple arguments as is; You can also pass one argument - a tuple.
-  * But (for the possibility of effective implementation), you can not pass several arguments, among which there are tuples.
-  */
-template <bool is_exact, bool argument_is_tuple>
-class AggregateFunctionUniqUpToVariadic final
-    : public IAggregateFunctionDataHelper<AggregateFunctionUniqUpToData<UInt64>, AggregateFunctionUniqUpToVariadic<is_exact, argument_is_tuple>>
-{
-private:
-    size_t num_args = 0;
-    UInt8 threshold;
-
-public:
-    AggregateFunctionUniqUpToVariadic(const DataTypes & arguments, const Array & params, UInt8 threshold_)
-        : IAggregateFunctionDataHelper<AggregateFunctionUniqUpToData<UInt64>, AggregateFunctionUniqUpToVariadic<is_exact, argument_is_tuple>>(arguments, params, std::make_shared<DataTypeUInt64>())
-        , threshold(threshold_)
-    {
-        if (argument_is_tuple)
-            num_args = typeid_cast<const DataTypeTuple &>(*arguments[0]).getElements().size();
-        else
-            num_args = arguments.size();
-    }
-
-    size_t sizeOfData() const override
-    {
-        return sizeof(AggregateFunctionUniqUpToData<UInt64>) + sizeof(UInt64) * threshold;
-    }
-
-    String getName() const override { return "uniqUpTo"; }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        this->data(place).insert(UInt64(UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)), threshold);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs), threshold);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).write(buf, threshold);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
-    {
-        this->data(place).read(buf, threshold);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).size());
-    }
-};
-
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp b/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp
index ffb93b5d3b2..9e54a824677 100644
--- a/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp
+++ b/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp
@@ -1,7 +1,11 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
-#include <AggregateFunctions/AggregateFunctionVarianceMatrix.h>
+#include <Columns/ColumnArray.h>
+#include <Common/PODArray_fwd.h>
+#include <DataTypes/DataTypeArray.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/Moments.h>
+#include <DataTypes/DataTypesNumber.h>
 
 
 namespace DB
@@ -16,6 +20,149 @@ namespace ErrorCodes
 namespace
 {
 
+enum class StatisticsMatrixFunctionKind
+{
+    covarPopMatrix,
+    covarSampMatrix,
+    corrMatrix
+};
+
+template <StatisticsMatrixFunctionKind _kind>
+struct AggregateFunctionVarianceMatrixData
+{
+    using DataType = std::conditional_t<_kind == StatisticsMatrixFunctionKind::corrMatrix, CorrMoments<Float64>, CovarMoments<Float64>>;
+
+    AggregateFunctionVarianceMatrixData() = default;
+
+    explicit AggregateFunctionVarianceMatrixData(const size_t _num_args)
+        : num_args(_num_args)
+    {
+        data_matrix.resize_fill(num_args * (num_args + 1) / 2, DataType());
+    }
+
+    void add(const IColumn ** column, const size_t row_num)
+    {
+        for (size_t i = 0; i < num_args; ++i)
+            for (size_t j = 0; j <= i; ++j)
+                 data_matrix[i * (i + 1) / 2 + j].add(column[i]->getFloat64(row_num), column[j]->getFloat64(row_num));
+    }
+
+    void merge(const AggregateFunctionVarianceMatrixData & other)
+    {
+        for (size_t i = 0; i < num_args; ++i)
+            for (size_t j = 0; j <= i; ++j)
+                data_matrix[i * (i + 1) / 2 + j].merge(other.data_matrix[i * (i + 1) / 2 + j]);
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        for (size_t i = 0; i < num_args; ++i)
+            for (size_t j = 0; j <= i; ++j)
+                data_matrix[i * (i + 1) / 2 + j].write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        for (size_t i = 0; i < num_args; ++i)
+            for (size_t j = 0; j <= i; ++j)
+                data_matrix[i * (i + 1) / 2 + j].read(buf);
+    }
+
+    void insertResultInto(IColumn & to) const
+    {
+        auto & data_to = assert_cast<ColumnFloat64 &>(assert_cast<ColumnArray &>(assert_cast<ColumnArray &>(to).getData()).getData()).getData();
+        auto & root_offsets_to = assert_cast<ColumnArray &>(to).getOffsets();
+        auto & nested_offsets_to = assert_cast<ColumnArray &>(assert_cast<ColumnArray &>(to).getData()).getOffsets();
+        for (size_t i = 0; i < num_args; ++i)
+        {
+            for (size_t j = 0; j < num_args; ++j)
+            {
+                auto & data = i < j ? data_matrix[j * (j + 1) / 2 + i] : data_matrix[i * (i + 1) / 2 + j];
+                if constexpr (kind == StatisticsMatrixFunctionKind::covarPopMatrix)
+                    data_to.push_back(data.getPopulation());
+                if constexpr (kind == StatisticsMatrixFunctionKind::covarSampMatrix)
+                    data_to.push_back(data.getSample());
+                if constexpr (kind == StatisticsMatrixFunctionKind::corrMatrix)
+                    data_to.push_back(data.get());
+            }
+            nested_offsets_to.push_back(nested_offsets_to.back() + num_args);
+        }
+        root_offsets_to.push_back(root_offsets_to.back() + num_args);
+    }
+
+    static constexpr StatisticsMatrixFunctionKind kind = _kind;
+    PaddedPODArray<DataType> data_matrix;
+    size_t num_args;
+};
+
+template <typename Data>
+class AggregateFunctionVarianceMatrix final
+    : public IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>
+{
+public:
+
+    explicit AggregateFunctionVarianceMatrix(const DataTypes & argument_types_)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>(argument_types_, {}, createResultType())
+    {}
+
+    AggregateFunctionVarianceMatrix(const IDataType &, const DataTypes & argument_types_)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>(argument_types_, {}, createResultType())
+    {}
+
+    String getName() const override
+    {
+        if constexpr (Data::kind == StatisticsMatrixFunctionKind::covarPopMatrix)
+            return "covarPopMatrix";
+        if constexpr (Data::kind == StatisticsMatrixFunctionKind::covarSampMatrix)
+            return "covarSampMatrix";
+        if constexpr (Data::kind == StatisticsMatrixFunctionKind::corrMatrix)
+            return "corrMatrix";
+        UNREACHABLE();
+    }
+
+    void create(AggregateDataPtr __restrict place) const override
+    {
+        new (place) Data(this->argument_types.size());
+    }
+
+    static DataTypePtr createResultType()
+    {
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>()));
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        this->data(place).add(columns, row_num);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        this->data(place).insertResultInto(to);
+    }
+};
+
+using AggregateFunctionCovarPopMatrix = AggregateFunctionVarianceMatrix<AggregateFunctionVarianceMatrixData<StatisticsMatrixFunctionKind::covarPopMatrix>>;
+using AggregateFunctionCovarSampMatrix = AggregateFunctionVarianceMatrix<AggregateFunctionVarianceMatrixData<StatisticsMatrixFunctionKind::covarSampMatrix>>;
+using AggregateFunctionCorrMatrix = AggregateFunctionVarianceMatrix<AggregateFunctionVarianceMatrixData<StatisticsMatrixFunctionKind::corrMatrix>>;
+
+
 template <typename FunctionTemplate>
 AggregateFunctionPtr createAggregateFunctionVarianceMatrix(
     const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
diff --git a/src/AggregateFunctions/AggregateFunctionVarianceMatrix.h b/src/AggregateFunctions/AggregateFunctionVarianceMatrix.h
deleted file mode 100644
index 6d05c3edf45..00000000000
--- a/src/AggregateFunctions/AggregateFunctionVarianceMatrix.h
+++ /dev/null
@@ -1,159 +0,0 @@
-#pragma once
-
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnsNumber.h>
-#include <Common/PODArray.h>
-#include <Common/PODArray_fwd.h>
-#include <DataTypes/DataTypeArray.h>
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/Moments.h>
-#include <DataTypes/DataTypesNumber.h>
-
-namespace DB
-{
-struct Settings;
-
-enum class StatisticsMatrixFunctionKind
-{
-    covarPopMatrix,
-    covarSampMatrix,
-    corrMatrix
-};
-
-template <StatisticsMatrixFunctionKind _kind>
-struct AggregateFunctionVarianceMatrixData
-{
-    using DataType = std::conditional_t<_kind == StatisticsMatrixFunctionKind::corrMatrix, CorrMoments<Float64>, CovarMoments<Float64>>;
-
-    AggregateFunctionVarianceMatrixData() = default;
-
-    explicit AggregateFunctionVarianceMatrixData(const size_t _num_args)
-        : num_args(_num_args)
-    {
-        data_matrix.resize_fill(num_args * (num_args + 1) / 2, DataType());
-    }
-
-    void add(const IColumn ** column, const size_t row_num)
-    {
-        for (size_t i = 0; i < num_args; ++i)
-            for (size_t j = 0; j <= i; ++j)
-                 data_matrix[i * (i + 1) / 2 + j].add(column[i]->getFloat64(row_num), column[j]->getFloat64(row_num));
-    }
-
-    void merge(const AggregateFunctionVarianceMatrixData & other)
-    {
-        for (size_t i = 0; i < num_args; ++i)
-            for (size_t j = 0; j <= i; ++j)
-                data_matrix[i * (i + 1) / 2 + j].merge(other.data_matrix[i * (i + 1) / 2 + j]);
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        for (size_t i = 0; i < num_args; ++i)
-            for (size_t j = 0; j <= i; ++j)
-                data_matrix[i * (i + 1) / 2 + j].write(buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        for (size_t i = 0; i < num_args; ++i)
-            for (size_t j = 0; j <= i; ++j)
-                data_matrix[i * (i + 1) / 2 + j].read(buf);
-    }
-
-    void insertResultInto(IColumn & to) const
-    {
-        auto & data_to = assert_cast<ColumnFloat64 &>(assert_cast<ColumnArray &>(assert_cast<ColumnArray &>(to).getData()).getData()).getData();
-        auto & root_offsets_to = assert_cast<ColumnArray &>(to).getOffsets();
-        auto & nested_offsets_to = assert_cast<ColumnArray &>(assert_cast<ColumnArray &>(to).getData()).getOffsets();
-        for (size_t i = 0; i < num_args; ++i)
-        {
-            for (size_t j = 0; j < num_args; ++j)
-            {
-                auto & data = i < j ? data_matrix[j * (j + 1) / 2 + i] : data_matrix[i * (i + 1) / 2 + j];
-                if constexpr (kind == StatisticsMatrixFunctionKind::covarPopMatrix)
-                    data_to.push_back(data.getPopulation());
-                if constexpr (kind == StatisticsMatrixFunctionKind::covarSampMatrix)
-                    data_to.push_back(data.getSample());
-                if constexpr (kind == StatisticsMatrixFunctionKind::corrMatrix)
-                    data_to.push_back(data.get());
-            }
-            nested_offsets_to.push_back(nested_offsets_to.back() + num_args);
-        }
-        root_offsets_to.push_back(root_offsets_to.back() + num_args);
-    }
-
-    static constexpr StatisticsMatrixFunctionKind kind = _kind;
-    PaddedPODArray<DataType> data_matrix;
-    size_t num_args;
-};
-
-template <typename Data>
-class AggregateFunctionVarianceMatrix final
-    : public IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>
-{
-public:
-
-    explicit AggregateFunctionVarianceMatrix(const DataTypes & argument_types_)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>(argument_types_, {}, createResultType())
-    {}
-
-    AggregateFunctionVarianceMatrix(const IDataType &, const DataTypes & argument_types_)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>(argument_types_, {}, createResultType())
-    {}
-
-    String getName() const override
-    {
-        if constexpr (Data::kind == StatisticsMatrixFunctionKind::covarPopMatrix)
-            return "covarPopMatrix";
-        if constexpr (Data::kind == StatisticsMatrixFunctionKind::covarSampMatrix)
-            return "covarSampMatrix";
-        if constexpr (Data::kind == StatisticsMatrixFunctionKind::corrMatrix)
-            return "corrMatrix";
-        UNREACHABLE();
-    }
-
-    void create(AggregateDataPtr __restrict place) const override
-    {
-        new (place) Data(this->argument_types.size());
-    }
-
-    static DataTypePtr createResultType()
-    {
-        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>()));
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        this->data(place).add(columns, row_num);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        this->data(place).insertResultInto(to);
-    }
-};
-
-using AggregateFunctionCovarPopMatrix = AggregateFunctionVarianceMatrix<AggregateFunctionVarianceMatrixData<StatisticsMatrixFunctionKind::covarPopMatrix>>;
-using AggregateFunctionCovarSampMatrix = AggregateFunctionVarianceMatrix<AggregateFunctionVarianceMatrixData<StatisticsMatrixFunctionKind::covarSampMatrix>>;
-using AggregateFunctionCorrMatrix = AggregateFunctionVarianceMatrix<AggregateFunctionVarianceMatrixData<StatisticsMatrixFunctionKind::corrMatrix>>;
-
-}
-
diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
index d80d683fd04..fe821abfc4b 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
@@ -1,13 +1,15 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionWindowFunnel.h>
-#include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
 #include <Core/Settings.h>
 #include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDate32.h>
 #include <DataTypes/DataTypeDateTime.h>
 
-#include <base/range.h>
+#include <unordered_set>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <Common/assert_cast.h>
 
 
 namespace DB
@@ -18,11 +20,280 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
 }
 
 namespace
 {
 
+static constexpr size_t max_events = 32;
+
+template <typename T>
+struct AggregateFunctionWindowFunnelData
+{
+    using TimestampEvent = std::pair<T, UInt8>;
+    using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
+
+    bool sorted = true;
+    TimestampEvents events_list;
+
+    size_t size() const
+    {
+        return events_list.size();
+    }
+
+    void add(T timestamp, UInt8 event)
+    {
+        /// Since most events should have already been sorted by timestamp.
+        if (sorted && events_list.size() > 0)
+        {
+            if (events_list.back().first == timestamp)
+                sorted = events_list.back().second <= event;
+            else
+                sorted = events_list.back().first <= timestamp;
+        }
+        events_list.emplace_back(timestamp, event);
+    }
+
+    void merge(const AggregateFunctionWindowFunnelData & other)
+    {
+        if (other.events_list.empty())
+            return;
+
+        const auto size = events_list.size();
+
+        events_list.insert(std::begin(other.events_list), std::end(other.events_list));
+
+        /// either sort whole container or do so partially merging ranges afterwards
+        if (!sorted && !other.sorted)
+            std::stable_sort(std::begin(events_list), std::end(events_list));
+        else
+        {
+            const auto begin = std::begin(events_list);
+            const auto middle = std::next(begin, size);
+            const auto end = std::end(events_list);
+
+            if (!sorted)
+                std::stable_sort(begin, middle);
+
+            if (!other.sorted)
+                std::stable_sort(middle, end);
+
+            std::inplace_merge(begin, middle, end);
+        }
+
+        sorted = true;
+    }
+
+    void sort()
+    {
+        if (!sorted)
+        {
+            std::stable_sort(std::begin(events_list), std::end(events_list));
+            sorted = true;
+        }
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeBinary(sorted, buf);
+        writeBinary(events_list.size(), buf);
+
+        for (const auto & events : events_list)
+        {
+            writeBinary(events.first, buf);
+            writeBinary(events.second, buf);
+        }
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readBinary(sorted, buf);
+
+        size_t size;
+        readBinary(size, buf);
+
+        /// TODO Protection against huge size
+
+        events_list.clear();
+        events_list.reserve(size);
+
+        T timestamp;
+        UInt8 event;
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            readBinary(timestamp, buf);
+            readBinary(event, buf);
+            events_list.emplace_back(timestamp, event);
+        }
+    }
+};
+
+/** Calculates the max event level in a sliding window.
+  * The max size of events is 32, that's enough for funnel analytics
+  *
+  * Usage:
+  * - windowFunnel(window)(timestamp, cond1, cond2, cond3, ....)
+  */
+template <typename T, typename Data>
+class AggregateFunctionWindowFunnel final
+    : public IAggregateFunctionDataHelper<Data, AggregateFunctionWindowFunnel<T, Data>>
+{
+private:
+    UInt64 window;
+    UInt8 events_size;
+    /// When the 'strict_deduplication' is set, it applies conditions only for the not repeating values.
+    bool strict_deduplication;
+
+    /// When the 'strict_order' is set, it doesn't allow interventions of other events.
+    /// In the case of 'A->B->D->C', it stops finding 'A->B->C' at the 'D' and the max event level is 2.
+    bool strict_order;
+
+    /// Applies conditions only to events with strictly increasing timestamps
+    bool strict_increase;
+
+    /// Loop through the entire events_list, update the event timestamp value
+    /// The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
+    /// If found, returns the max event level, else return 0.
+    /// The algorithm works in O(n) time, but the overall function works in O(n * log(n)) due to sorting.
+    UInt8 getEventLevel(Data & data) const
+    {
+        if (data.size() == 0)
+            return 0;
+        if (!strict_order && events_size == 1)
+            return 1;
+
+        data.sort();
+
+        /// events_timestamp stores the timestamp of the first and previous i-th level event happen within time window
+        std::vector<std::optional<std::pair<UInt64, UInt64>>> events_timestamp(events_size);
+        bool first_event = false;
+        for (size_t i = 0; i < data.events_list.size(); ++i)
+        {
+            const T & timestamp = data.events_list[i].first;
+            const auto & event_idx = data.events_list[i].second - 1;
+            if (strict_order && event_idx == -1)
+            {
+                if (first_event)
+                    break;
+                else
+                    continue;
+            }
+            else if (event_idx == 0)
+            {
+                events_timestamp[0] = std::make_pair(timestamp, timestamp);
+                first_event = true;
+            }
+            else if (strict_deduplication && events_timestamp[event_idx].has_value())
+            {
+                return data.events_list[i - 1].second;
+            }
+            else if (strict_order && first_event && !events_timestamp[event_idx - 1].has_value())
+            {
+                for (size_t event = 0; event < events_timestamp.size(); ++event)
+                {
+                    if (!events_timestamp[event].has_value())
+                        return event;
+                }
+            }
+            else if (events_timestamp[event_idx - 1].has_value())
+            {
+                auto first_timestamp = events_timestamp[event_idx - 1]->first;
+                bool time_matched = timestamp <= first_timestamp + window;
+                if (strict_increase)
+                    time_matched = time_matched && events_timestamp[event_idx - 1]->second < timestamp;
+                if (time_matched)
+                {
+                    events_timestamp[event_idx] = std::make_pair(first_timestamp, timestamp);
+                    if (event_idx + 1 == events_size)
+                        return events_size;
+                }
+            }
+        }
+
+        for (size_t event = events_timestamp.size(); event > 0; --event)
+        {
+            if (events_timestamp[event - 1].has_value())
+                return event;
+        }
+        return 0;
+    }
+
+public:
+    String getName() const override
+    {
+        return "windowFunnel";
+    }
+
+    AggregateFunctionWindowFunnel(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionWindowFunnel<T, Data>>(arguments, params, std::make_shared<DataTypeUInt8>())
+    {
+        events_size = arguments.size() - 1;
+        window = params.at(0).safeGet<UInt64>();
+
+        strict_deduplication = false;
+        strict_order = false;
+        strict_increase = false;
+        for (size_t i = 1; i < params.size(); ++i)
+        {
+            String option = params.at(i).safeGet<String>();
+            if (option == "strict_deduplication")
+                strict_deduplication = true;
+            else if (option == "strict_order")
+                strict_order = true;
+            else if (option == "strict_increase")
+                strict_increase = true;
+            else if (option == "strict")
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "strict is replaced with strict_deduplication in Aggregate function {}", getName());
+            else
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} doesn't support a parameter: {}", getName(), option);
+        }
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    {
+        bool has_event = false;
+        const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
+        /// reverse iteration and stable sorting are needed for events that are qualified by more than one condition.
+        for (auto i = events_size; i > 0; --i)
+        {
+            auto event = assert_cast<const ColumnVector<UInt8> *>(columns[i])->getData()[row_num];
+            if (event)
+            {
+                this->data(place).add(timestamp, i);
+                has_event = true;
+            }
+        }
+
+        if (strict_order && !has_event)
+            this->data(place).add(timestamp, 0);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnUInt8 &>(to).getData().push_back(getEventLevel(this->data(place)));
+    }
+};
+
+
 template <template <typename> class Data>
 AggregateFunctionPtr
 createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
deleted file mode 100644
index 8fee41b9f75..00000000000
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
+++ /dev/null
@@ -1,287 +0,0 @@
-#pragma once
-
-#include <unordered_set>
-#include <Columns/ColumnsNumber.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <Common/assert_cast.h>
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
-static constexpr size_t max_events = 32;
-
-template <typename T>
-struct AggregateFunctionWindowFunnelData
-{
-    using TimestampEvent = std::pair<T, UInt8>;
-    using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
-
-    bool sorted = true;
-    TimestampEvents events_list;
-
-    size_t size() const
-    {
-        return events_list.size();
-    }
-
-    void add(T timestamp, UInt8 event)
-    {
-        /// Since most events should have already been sorted by timestamp.
-        if (sorted && events_list.size() > 0)
-        {
-            if (events_list.back().first == timestamp)
-                sorted = events_list.back().second <= event;
-            else
-                sorted = events_list.back().first <= timestamp;
-        }
-        events_list.emplace_back(timestamp, event);
-    }
-
-    void merge(const AggregateFunctionWindowFunnelData & other)
-    {
-        if (other.events_list.empty())
-            return;
-
-        const auto size = events_list.size();
-
-        events_list.insert(std::begin(other.events_list), std::end(other.events_list));
-
-        /// either sort whole container or do so partially merging ranges afterwards
-        if (!sorted && !other.sorted)
-            std::stable_sort(std::begin(events_list), std::end(events_list));
-        else
-        {
-            const auto begin = std::begin(events_list);
-            const auto middle = std::next(begin, size);
-            const auto end = std::end(events_list);
-
-            if (!sorted)
-                std::stable_sort(begin, middle);
-
-            if (!other.sorted)
-                std::stable_sort(middle, end);
-
-            std::inplace_merge(begin, middle, end);
-        }
-
-        sorted = true;
-    }
-
-    void sort()
-    {
-        if (!sorted)
-        {
-            std::stable_sort(std::begin(events_list), std::end(events_list));
-            sorted = true;
-        }
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        writeBinary(sorted, buf);
-        writeBinary(events_list.size(), buf);
-
-        for (const auto & events : events_list)
-        {
-            writeBinary(events.first, buf);
-            writeBinary(events.second, buf);
-        }
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        readBinary(sorted, buf);
-
-        size_t size;
-        readBinary(size, buf);
-
-        /// TODO Protection against huge size
-
-        events_list.clear();
-        events_list.reserve(size);
-
-        T timestamp;
-        UInt8 event;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            readBinary(timestamp, buf);
-            readBinary(event, buf);
-            events_list.emplace_back(timestamp, event);
-        }
-    }
-};
-
-/** Calculates the max event level in a sliding window.
-  * The max size of events is 32, that's enough for funnel analytics
-  *
-  * Usage:
-  * - windowFunnel(window)(timestamp, cond1, cond2, cond3, ....)
-  */
-template <typename T, typename Data>
-class AggregateFunctionWindowFunnel final
-    : public IAggregateFunctionDataHelper<Data, AggregateFunctionWindowFunnel<T, Data>>
-{
-private:
-    UInt64 window;
-    UInt8 events_size;
-    /// When the 'strict_deduplication' is set, it applies conditions only for the not repeating values.
-    bool strict_deduplication;
-
-    /// When the 'strict_order' is set, it doesn't allow interventions of other events.
-    /// In the case of 'A->B->D->C', it stops finding 'A->B->C' at the 'D' and the max event level is 2.
-    bool strict_order;
-
-    /// Applies conditions only to events with strictly increasing timestamps
-    bool strict_increase;
-
-    /// Loop through the entire events_list, update the event timestamp value
-    /// The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
-    /// If found, returns the max event level, else return 0.
-    /// The algorithm works in O(n) time, but the overall function works in O(n * log(n)) due to sorting.
-    UInt8 getEventLevel(Data & data) const
-    {
-        if (data.size() == 0)
-            return 0;
-        if (!strict_order && events_size == 1)
-            return 1;
-
-        data.sort();
-
-        /// events_timestamp stores the timestamp of the first and previous i-th level event happen within time window
-        std::vector<std::optional<std::pair<UInt64, UInt64>>> events_timestamp(events_size);
-        bool first_event = false;
-        for (size_t i = 0; i < data.events_list.size(); ++i)
-        {
-            const T & timestamp = data.events_list[i].first;
-            const auto & event_idx = data.events_list[i].second - 1;
-            if (strict_order && event_idx == -1)
-            {
-                if (first_event)
-                    break;
-                else
-                    continue;
-            }
-            else if (event_idx == 0)
-            {
-                events_timestamp[0] = std::make_pair(timestamp, timestamp);
-                first_event = true;
-            }
-            else if (strict_deduplication && events_timestamp[event_idx].has_value())
-            {
-                return data.events_list[i - 1].second;
-            }
-            else if (strict_order && first_event && !events_timestamp[event_idx - 1].has_value())
-            {
-                for (size_t event = 0; event < events_timestamp.size(); ++event)
-                {
-                    if (!events_timestamp[event].has_value())
-                        return event;
-                }
-            }
-            else if (events_timestamp[event_idx - 1].has_value())
-            {
-                auto first_timestamp = events_timestamp[event_idx - 1]->first;
-                bool time_matched = timestamp <= first_timestamp + window;
-                if (strict_increase)
-                    time_matched = time_matched && events_timestamp[event_idx - 1]->second < timestamp;
-                if (time_matched)
-                {
-                    events_timestamp[event_idx] = std::make_pair(first_timestamp, timestamp);
-                    if (event_idx + 1 == events_size)
-                        return events_size;
-                }
-            }
-        }
-
-        for (size_t event = events_timestamp.size(); event > 0; --event)
-        {
-            if (events_timestamp[event - 1].has_value())
-                return event;
-        }
-        return 0;
-    }
-
-public:
-    String getName() const override
-    {
-        return "windowFunnel";
-    }
-
-    AggregateFunctionWindowFunnel(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionWindowFunnel<T, Data>>(arguments, params, std::make_shared<DataTypeUInt8>())
-    {
-        events_size = arguments.size() - 1;
-        window = params.at(0).safeGet<UInt64>();
-
-        strict_deduplication = false;
-        strict_order = false;
-        strict_increase = false;
-        for (size_t i = 1; i < params.size(); ++i)
-        {
-            String option = params.at(i).safeGet<String>();
-            if (option == "strict_deduplication")
-                strict_deduplication = true;
-            else if (option == "strict_order")
-                strict_order = true;
-            else if (option == "strict_increase")
-                strict_increase = true;
-            else if (option == "strict")
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "strict is replaced with strict_deduplication in Aggregate function {}", getName());
-            else
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} doesn't support a parameter: {}", getName(), option);
-        }
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
-    {
-        bool has_event = false;
-        const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
-        /// reverse iteration and stable sorting are needed for events that are qualified by more than one condition.
-        for (auto i = events_size; i > 0; --i)
-        {
-            auto event = assert_cast<const ColumnVector<UInt8> *>(columns[i])->getData()[row_num];
-            if (event)
-            {
-                this->data(place).add(timestamp, i);
-                has_event = true;
-            }
-        }
-
-        if (strict_order && !has_event)
-            this->data(place).add(timestamp, 0);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).merge(this->data(rhs));
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).serialize(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
-    {
-        this->data(place).deserialize(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnUInt8 &>(to).getData().push_back(getEventLevel(this->data(place)));
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/QuantileApprox.h b/src/AggregateFunctions/QuantileApprox.h
deleted file mode 100644
index 6b2a6cf4398..00000000000
--- a/src/AggregateFunctions/QuantileApprox.h
+++ /dev/null
@@ -1,477 +0,0 @@
-#pragma once
-
-#include <cmath>
-#include <base/sort.h>
-#include <Common/RadixSort.h>
-#include <IO/WriteBuffer.h>
-#include <IO/ReadBuffer.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-    extern const int NOT_IMPLEMENTED;
-}
-
-template <typename T>
-class ApproxSampler
-{
-public:
-    struct Stats
-    {
-        T value;      // the sampled value
-        Int64 g;      // the minimum rank jump from the previous value's minimum rank
-        Int64 delta;  // the maximum span of the rank
-
-        Stats() = default;
-        Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) {}
-    };
-
-    struct QueryResult
-    {
-        size_t index;
-        Int64 rank;
-        T value;
-
-        QueryResult(size_t index_, Int64 rank_, T value_) : index(index_), rank(rank_), value(value_) { }
-    };
-
-    ApproxSampler() = default;
-
-    explicit ApproxSampler(
-        double relative_error_,
-        size_t compress_threshold_ = default_compress_threshold,
-        size_t count_ = 0,
-        bool compressed_ = false)
-        : relative_error(relative_error_)
-        , compress_threshold(compress_threshold_)
-        , count(count_)
-        , compressed(compressed_)
-    {
-        sampled.reserve(compress_threshold);
-        backup_sampled.reserve(compress_threshold);
-
-        head_sampled.reserve(default_head_size);
-    }
-
-    bool isCompressed() const { return compressed; }
-    void setCompressed() { compressed = true; }
-
-    void insert(T x)
-    {
-        head_sampled.push_back(x);
-        compressed = false;
-        if (head_sampled.size() >= default_head_size)
-        {
-            withHeadBufferInserted();
-            if (sampled.size() >= compress_threshold)
-                compress();
-        }
-    }
-
-    void query(const Float64 * percentiles, const size_t * indices, size_t size, T * result) const
-    {
-        if (!head_sampled.empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot operate on an uncompressed summary, call compress() first");
-
-        if (sampled.empty())
-        {
-            for (size_t i = 0; i < size; ++i)
-                result[i] = T();
-            return;
-        }
-
-        Int64 current_max = std::numeric_limits<Int64>::min();
-        for (const auto & stats : sampled)
-            current_max = std::max(stats.delta + stats.g, current_max);
-        Int64 target_error = current_max/2;
-
-        size_t index= 0;
-        auto min_rank = sampled[0].g;
-        for (size_t i = 0; i < size; ++i)
-        {
-            double percentile = percentiles[indices[i]];
-            if (percentile <= relative_error)
-            {
-                result[indices[i]] = sampled.front().value;
-            }
-            else if (percentile >= 1 - relative_error)
-            {
-                result[indices[i]] = sampled.back().value;
-            }
-            else
-            {
-                QueryResult res = findApproxQuantile(index, min_rank, target_error, percentile);
-                index = res.index;
-                min_rank = res.rank;
-                result[indices[i]] = res.value;
-            }
-        }
-
-    }
-
-    void compress()
-    {
-        if (compressed)
-            return;
-
-        withHeadBufferInserted();
-
-        doCompress(2 * relative_error * count);
-        compressed = true;
-    }
-
-
-    void merge(const ApproxSampler & other)
-    {
-        if (other.count == 0)
-            return;
-        else if (count == 0)
-        {
-            compress_threshold = other.compress_threshold;
-            relative_error = other.relative_error;
-            count = other.count;
-            compressed = other.compressed;
-
-            sampled.resize(other.sampled.size());
-            memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
-            return;
-        }
-        else
-        {
-            // Merge the two buffers.
-            // The GK algorithm is a bit unclear about it, but we need to adjust the statistics during the
-            // merging. The main idea is that samples that come from one side will suffer from the lack of
-            // precision of the other.
-            // As a concrete example, take two QuantileSummaries whose samples (value, g, delta) are:
-            // `a = [(0, 1, 0), (20, 99, 0)]` and `b = [(10, 1, 0), (30, 49, 0)]`
-            // This means `a` has 100 values, whose minimum is 0 and maximum is 20,
-            // while `b` has 50 values, between 10 and 30.
-            // The resulting samples of the merge will be:
-            // a+b = [(0, 1, 0), (10, 1, ??), (20, 99, ??), (30, 49, 0)]
-            // The values of `g` do not change, as they represent the minimum number of values between two
-            // consecutive samples. The values of `delta` should be adjusted, however.
-            // Take the case of the sample `10` from `b`. In the original stream, it could have appeared
-            // right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`.
-            // In the GK algorithm's style of working in terms of maximum bounds, one can observe that the
-            // maximum additional uncertainty over samples coming from `b` is `max(g_a + delta_a) =
-            // floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is
-            // `floor(2 * eps_b * n_b)`.
-            // Only samples that interleave the other side are affected. That means that samples from
-            // one side that are lesser (or greater) than all samples from the other side are just copied
-            // unmodified.
-            // If the merging instances have different `relativeError`, the resulting instance will carry
-            // the largest one: `eps_ab = max(eps_a, eps_b)`.
-            // The main invariant of the GK algorithm is kept:
-            // `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
-            // `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)`
-            // Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])`
-            compress();
-
-            backup_sampled.clear();
-            backup_sampled.reserve(sampled.size() + other.sampled.size());
-            double merged_relative_error = std::max(relative_error, other.relative_error);
-            size_t merged_count = count + other.count;
-            Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
-            Int64 additional_other_delta = static_cast<Int64>(std::floor(2 * relative_error * count));
-
-            // Do a merge of two sorted lists until one of the lists is fully consumed
-            size_t self_idx = 0;
-            size_t other_idx = 0;
-            while (self_idx < sampled.size() && other_idx < other.sampled.size())
-            {
-                const Stats & self_sample = sampled[self_idx];
-                const Stats & other_sample = other.sampled[other_idx];
-
-                // Detect next sample
-                Stats next_sample;
-                Int64 additional_delta = 0;
-                if (self_sample.value < other_sample.value)
-                {
-                    ++self_idx;
-                    next_sample = self_sample;
-                    additional_delta = other_idx > 0 ? additional_self_delta : 0;
-                }
-                else
-                {
-                    ++other_idx;
-                    next_sample = other_sample;
-                    additional_delta = self_idx > 0 ? additional_other_delta : 0;
-                }
-
-                // Insert it
-                next_sample.delta += additional_delta;
-                backup_sampled.emplace_back(std::move(next_sample));
-            }
-
-            // Copy the remaining samples from the other list
-            // (by construction, at most one `while` loop will run)
-            while (self_idx < sampled.size())
-            {
-                backup_sampled.emplace_back(sampled[self_idx]);
-                ++self_idx;
-            }
-            while (other_idx < other.sampled.size())
-            {
-                backup_sampled.emplace_back(other.sampled[other_idx]);
-                ++other_idx;
-            }
-
-            std::swap(sampled, backup_sampled);
-            relative_error = merged_relative_error;
-            count = merged_count;
-            compress_threshold = other.compress_threshold;
-
-            doCompress(2 * merged_relative_error * merged_count);
-            compressed = true;
-        }
-    }
-
-    void write(WriteBuffer & buf) const
-    {
-        writeBinaryLittleEndian(compress_threshold, buf);
-        writeBinaryLittleEndian(relative_error, buf);
-        writeBinaryLittleEndian(count, buf);
-        writeBinaryLittleEndian(sampled.size(), buf);
-
-        for (const auto & stats : sampled)
-        {
-            writeBinaryLittleEndian(stats.value, buf);
-            writeBinaryLittleEndian(stats.g, buf);
-            writeBinaryLittleEndian(stats.delta, buf);
-        }
-    }
-
-    void read(ReadBuffer & buf)
-    {
-        readBinaryLittleEndian(compress_threshold, buf);
-        readBinaryLittleEndian(relative_error, buf);
-        readBinaryLittleEndian(count, buf);
-
-        size_t sampled_len = 0;
-        readBinaryLittleEndian(sampled_len, buf);
-        sampled.resize(sampled_len);
-
-        for (size_t i = 0; i < sampled_len; ++i)
-        {
-            auto stats = sampled[i];
-            readBinaryLittleEndian(stats.value, buf);
-            readBinaryLittleEndian(stats.g, buf);
-            readBinaryLittleEndian(stats.delta, buf);
-        }
-    }
-
-private:
-    QueryResult findApproxQuantile(size_t index, Int64 min_rank_at_index, double target_error, double percentile) const
-    {
-        Stats curr_sample = sampled[index];
-        Int64 rank = static_cast<Int64>(std::ceil(percentile * count));
-        size_t i = index;
-        Int64 min_rank = min_rank_at_index;
-        while (i < sampled.size() - 1)
-        {
-            Int64 max_rank = min_rank + curr_sample.delta;
-            if (max_rank - target_error <= rank && rank <= min_rank + target_error)
-                return {i, min_rank, curr_sample.value};
-            else
-            {
-                ++i;
-                curr_sample = sampled[i];
-                min_rank += curr_sample.g;
-            }
-        }
-        return {sampled.size()-1, 0, sampled.back().value};
-    }
-
-    void withHeadBufferInserted()
-    {
-        if (head_sampled.empty())
-            return;
-
-        bool use_radix_sort = head_sampled.size() >= 256 && (is_arithmetic_v<T> && !is_big_int_v<T>);
-        if (use_radix_sort)
-            RadixSort<RadixSortNumTraits<T>>::executeLSD(head_sampled.data(), head_sampled.size());
-        else
-            ::sort(head_sampled.begin(), head_sampled.end());
-
-        backup_sampled.clear();
-        backup_sampled.reserve(sampled.size() + head_sampled.size());
-
-        size_t sample_idx = 0;
-        size_t ops_idx = 0;
-        size_t current_count = count;
-        for (; ops_idx < head_sampled.size(); ++ops_idx)
-        {
-            T current_sample = head_sampled[ops_idx];
-
-            // Add all the samples before the next observation.
-            while (sample_idx < sampled.size() && sampled[sample_idx].value <= current_sample)
-            {
-                backup_sampled.emplace_back(sampled[sample_idx]);
-                ++sample_idx;
-            }
-
-            // If it is the first one to insert, of if it is the last one
-            ++current_count;
-            Int64 delta;
-            if (backup_sampled.empty() || (sample_idx == sampled.size() && ops_idx == (head_sampled.size() - 1)))
-                delta = 0;
-            else
-                delta = static_cast<Int64>(std::floor(2 * relative_error * current_count));
-
-            backup_sampled.emplace_back(current_sample, 1, delta);
-        }
-
-        // Add all the remaining existing samples
-        for (; sample_idx < sampled.size(); ++sample_idx)
-            backup_sampled.emplace_back(sampled[sample_idx]);
-
-        std::swap(sampled, backup_sampled);
-        head_sampled.clear();
-        count = current_count;
-    }
-
-
-    void doCompress(double merge_threshold)
-    {
-        if (sampled.empty())
-            return;
-
-        backup_sampled.clear();
-        // Start for the last element, which is always part of the set.
-        // The head contains the current new head, that may be merged with the current element.
-        Stats head = sampled.back();
-        ssize_t i = sampled.size() - 2;
-
-        // Do not compress the last element
-        while (i >= 1)
-        {
-            // The current sample:
-            const auto & sample1 = sampled[i];
-            // Do we need to compress?
-            if (sample1.g + head.g + head.delta < merge_threshold)
-            {
-                // Do not insert yet, just merge the current element into the head.
-                head.g += sample1.g;
-            }
-            else
-            {
-                // Prepend the current head, and keep the current sample as target for merging.
-                backup_sampled.push_back(head);
-                head = sample1;
-            }
-            --i;
-        }
-
-        backup_sampled.push_back(head);
-        // If necessary, add the minimum element:
-        auto curr_head = sampled.front();
-
-        // don't add the minimum element if `currentSamples` has only one element (both `currHead` and
-        // `head` point to the same element)
-        if (curr_head.value <= head.value && sampled.size() > 1)
-            backup_sampled.emplace_back(sampled.front());
-
-        std::reverse(backup_sampled.begin(), backup_sampled.end());
-        std::swap(sampled, backup_sampled);
-    }
-
-    double relative_error;
-    size_t compress_threshold;
-    size_t count = 0;
-    bool compressed;
-
-    PaddedPODArray<Stats> sampled;
-    PaddedPODArray<Stats> backup_sampled;
-
-    PaddedPODArray<T> head_sampled;
-
-    static constexpr size_t default_compress_threshold = 10000;
-    static constexpr size_t default_head_size = 50000;
-};
-
-template <typename Value>
-class QuantileGK
-{
-private:
-    using Data = ApproxSampler<Value>;
-    mutable Data data;
-
-public:
-    QuantileGK() = default;
-
-    explicit QuantileGK(size_t accuracy) : data(1.0 / static_cast<double>(accuracy)) { }
-
-    void add(const Value & x)
-    {
-        data.insert(x);
-    }
-
-    template <typename Weight>
-    void add(const Value &, const Weight &)
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for GKSampler");
-    }
-
-    void merge(const QuantileGK & rhs)
-    {
-        if (!data.isCompressed())
-            data.compress();
-
-        data.merge(rhs.data);
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        /// Always compress before serialization
-        if (!data.isCompressed())
-            data.compress();
-
-        data.write(buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        data.read(buf);
-
-        data.setCompressed();
-    }
-
-    /// Get the value of the `level` quantile. The level must be between 0 and 1.
-    Value get(Float64 level)
-    {
-        if (!data.isCompressed())
-            data.compress();
-
-        Value res;
-        size_t indice = 0;
-        data.query(&level, &indice, 1, &res);
-        return res;
-    }
-
-    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
-    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
-    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
-    {
-        if (!data.isCompressed())
-            data.compress();
-
-        data.query(levels, indices, size, result);
-    }
-
-    Float64 getFloat64(Float64 /*level*/)
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat64 is not implemented for GKSampler");
-    }
-
-    void getManyFloat(const Float64 * /*levels*/, const size_t * /*indices*/, size_t /*size*/, Float64 * /*result*/)
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for GKSampler");
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/QuantileExactWeighted.h b/src/AggregateFunctions/QuantileExactWeighted.h
deleted file mode 100644
index c6a779ede61..00000000000
--- a/src/AggregateFunctions/QuantileExactWeighted.h
+++ /dev/null
@@ -1,203 +0,0 @@
-#pragma once
-
-#include <base/sort.h>
-
-#include <Common/HashTable/HashMap.h>
-#include <Common/NaNUtils.h>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-/** Calculates quantile by counting number of occurrences for each value in a hash map.
-  *
-  * It uses O(distinct(N)) memory. Can be naturally applied for values with weight.
-  * In case of many identical values, it can be more efficient than QuantileExact even when weight is not used.
-  */
-template <typename Value>
-struct QuantileExactWeighted
-{
-    struct Int128Hash
-    {
-        size_t operator()(Int128 x) const
-        {
-            return CityHash_v1_0_2::Hash128to64({x >> 64, x & 0xffffffffffffffffll});
-        }
-    };
-
-    using Weight = UInt64;
-    using UnderlyingType = NativeType<Value>;
-    using Hasher = HashCRC32<UnderlyingType>;
-
-    /// When creating, the hash table must be small.
-    using Map = HashMapWithStackMemory<UnderlyingType, Weight, Hasher, 4>;
-
-    Map map;
-
-    void add(const Value & x)
-    {
-        /// We must skip NaNs as they are not compatible with comparison sorting.
-        if (!isNaN(x))
-            ++map[x];
-    }
-
-    void add(const Value & x, Weight weight)
-    {
-        if (!isNaN(x))
-            map[x] += weight;
-    }
-
-    void merge(const QuantileExactWeighted & rhs)
-    {
-        for (const auto & pair : rhs.map)
-            map[pair.getKey()] += pair.getMapped();
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        map.write(buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        typename Map::Reader reader(buf);
-        while (reader.next())
-        {
-            const auto & pair = reader.get();
-            map[pair.first] = pair.second;
-        }
-    }
-
-    /// Get the value of the `level` quantile. The level must be between 0 and 1.
-    Value get(Float64 level) const
-    {
-        size_t size = map.size();
-
-        if (0 == size)
-            return std::numeric_limits<Value>::quiet_NaN();
-
-        /// Copy the data to a temporary array to get the element you need in order.
-        using Pair = typename Map::value_type;
-        std::unique_ptr<Pair[]> array_holder(new Pair[size]);
-        Pair * array = array_holder.get();
-
-        /// Note: 64-bit integer weight can overflow.
-        /// We do some implementation specific behaviour (return approximate or garbage results).
-        /// Float64 is used as accumulator here to get approximate results.
-        /// But weight can be already overflowed in computations in 'add' and 'merge' methods.
-        /// It will be reasonable to change the type of weight to Float64 in the map,
-        /// but we don't do that for compatibility of serialized data.
-
-        size_t i = 0;
-        Float64 sum_weight = 0;
-        for (const auto & pair : map)
-        {
-            sum_weight += pair.getMapped();
-            array[i] = pair.getValue();
-            ++i;
-        }
-
-        ::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
-
-        Float64 threshold = std::ceil(sum_weight * level);
-        Float64 accumulated = 0;
-
-        const Pair * it = array;
-        const Pair * end = array + size;
-        while (it < end)
-        {
-            accumulated += it->second;
-
-            if (accumulated >= threshold)
-                break;
-
-            ++it;
-        }
-
-        if (it == end)
-            --it;
-
-        return it->first;
-    }
-
-    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
-    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
-    void getMany(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
-    {
-        size_t size = map.size();
-
-        if (0 == size)
-        {
-            for (size_t i = 0; i < num_levels; ++i)
-                result[i] = Value();
-            return;
-        }
-
-        /// Copy the data to a temporary array to get the element you need in order.
-        using Pair = typename Map::value_type;
-        std::unique_ptr<Pair[]> array_holder(new Pair[size]);
-        Pair * array = array_holder.get();
-
-        size_t i = 0;
-        Float64 sum_weight = 0;
-        for (const auto & pair : map)
-        {
-            sum_weight += pair.getMapped();
-            array[i] = pair.getValue();
-            ++i;
-        }
-
-        ::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
-
-        Float64 accumulated = 0;
-
-        const Pair * it = array;
-        const Pair * end = array + size;
-
-        size_t level_index = 0;
-        Float64 threshold = std::ceil(sum_weight * levels[indices[level_index]]);
-
-        while (it < end)
-        {
-            accumulated += it->second;
-
-            while (accumulated >= threshold)
-            {
-                result[indices[level_index]] = it->first;
-                ++level_index;
-
-                if (level_index == num_levels)
-                    return;
-
-                threshold = std::ceil(sum_weight * levels[indices[level_index]]);
-            }
-
-            ++it;
-        }
-
-        while (level_index < num_levels)
-        {
-            result[indices[level_index]] = array[size - 1].first;
-            ++level_index;
-        }
-    }
-
-    /// The same, but in the case of an empty state, NaN is returned.
-    Float64 getFloat(Float64) const
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat is not implemented for QuantileExact");
-    }
-
-    void getManyFloat(const Float64 *, const size_t *, size_t, Float64 *) const
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for QuantileExact");
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/QuantileInterpolatedWeighted.h b/src/AggregateFunctions/QuantileInterpolatedWeighted.h
deleted file mode 100644
index 5b1eb315af3..00000000000
--- a/src/AggregateFunctions/QuantileInterpolatedWeighted.h
+++ /dev/null
@@ -1,308 +0,0 @@
-#pragma once
-
-#include <base/sort.h>
-
-#include <Common/HashTable/HashMap.h>
-#include <Common/NaNUtils.h>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-/** Approximates Quantile by:
-  * - sorting input values and weights
-  * - building a cumulative distribution based on weights
-  * - performing linear interpolation between the weights and values
-  *
-  */
-template <typename Value>
-struct QuantileInterpolatedWeighted
-{
-    struct Int128Hash
-    {
-        size_t operator()(Int128 x) const
-        {
-            return CityHash_v1_0_2::Hash128to64({x >> 64, x & 0xffffffffffffffffll});
-        }
-    };
-
-    using Weight = UInt64;
-    using UnderlyingType = NativeType<Value>;
-    using Hasher = HashCRC32<UnderlyingType>;
-
-    /// When creating, the hash table must be small.
-    using Map = HashMapWithStackMemory<UnderlyingType, Weight, Hasher, 4>;
-
-    Map map;
-
-    void add(const Value & x)
-    {
-        /// We must skip NaNs as they are not compatible with comparison sorting.
-        if (!isNaN(x))
-            ++map[x];
-    }
-
-    void add(const Value & x, Weight weight)
-    {
-        if (!isNaN(x))
-            map[x] += weight;
-    }
-
-    void merge(const QuantileInterpolatedWeighted & rhs)
-    {
-        for (const auto & pair : rhs.map)
-            map[pair.getKey()] += pair.getMapped();
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        map.write(buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        typename Map::Reader reader(buf);
-        while (reader.next())
-        {
-            const auto & pair = reader.get();
-            map[pair.first] = pair.second;
-        }
-    }
-
-    Value get(Float64 level) const
-    {
-        return getImpl<Value>(level);
-    }
-
-    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
-    {
-        getManyImpl<Value>(levels, indices, size, result);
-    }
-
-    /// The same, but in the case of an empty state, NaN is returned.
-    Float64 getFloat(Float64) const
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat is not implemented for QuantileInterpolatedWeighted");
-    }
-
-    void getManyFloat(const Float64 *, const size_t *, size_t, Float64 *) const
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for QuantileInterpolatedWeighted");
-    }
-
-private:
-    using Pair = typename std::pair<UnderlyingType, Float64>;
-
-    /// Get the value of the `level` quantile. The level must be between 0 and 1.
-    template <typename T>
-    T getImpl(Float64 level) const
-    {
-        size_t size = map.size();
-
-        if (0 == size)
-            return std::numeric_limits<Value>::quiet_NaN();
-
-        /// Maintain a vector of pair of values and weights for easier sorting and for building
-        /// a cumulative distribution using the provided weights.
-        std::vector<Pair> value_weight_pairs;
-        value_weight_pairs.reserve(size);
-
-        /// Note: weight provided must be a 64-bit integer
-        /// Float64 is used as accumulator here to get approximate results.
-        /// But weight used in the internal array is stored as Float64 as we
-        /// do some quantile estimation operation which involves division and
-        /// require Float64 level of precision.
-
-        Float64 sum_weight = 0;
-        for (const auto & pair : map)
-        {
-            sum_weight += pair.getMapped();
-            auto value = pair.getKey();
-            auto weight = pair.getMapped();
-            value_weight_pairs.push_back({value, weight});
-        }
-
-        ::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
-
-        Float64 accumulated = 0;
-
-        /// vector for populating and storing the cumulative sum using the provided weights.
-        /// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
-        std::vector<Float64> weights_cum_sum;
-        weights_cum_sum.reserve(size);
-
-        for (size_t idx = 0; idx < size; ++idx)
-        {
-            accumulated += value_weight_pairs[idx].second;
-            weights_cum_sum.push_back(accumulated);
-        }
-
-        /// The following estimation of quantile is general and the idea is:
-        /// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
-
-        /// calculates a simple cumulative distribution based on weights
-        if (sum_weight != 0)
-        {
-            for (size_t idx = 0; idx < size; ++idx)
-                value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
-        }
-
-        /// perform linear interpolation
-        size_t idx = 0;
-        if (size >= 2)
-        {
-            if (level >= value_weight_pairs[size - 2].second)
-            {
-                idx = size - 2;
-            }
-            else
-            {
-                size_t start = 0, end = size - 1;
-                while (start <= end)
-                {
-                    size_t mid = start + (end - start) / 2;
-                    if (mid > size)
-                        break;
-                    if (level > value_weight_pairs[mid + 1].second)
-                        start = mid + 1;
-                    else
-                    {
-                        idx = mid;
-                        end = mid - 1;
-                    }
-                }
-            }
-        }
-
-        size_t l = idx;
-        size_t u = idx + 1 < size ? idx + 1 : idx;
-
-        Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
-        UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
-
-        if (level < xl)
-            yr = yl;
-        if (level > xr)
-            yl = yr;
-
-        return static_cast<T>(interpolate(level, xl, xr, yl, yr));
-    }
-
-    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
-    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
-    template <typename T>
-    void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
-    {
-        size_t size = map.size();
-
-        if (0 == size)
-        {
-            for (size_t i = 0; i < num_levels; ++i)
-                result[i] = Value();
-            return;
-        }
-
-        std::vector<Pair> value_weight_pairs;
-        value_weight_pairs.reserve(size);
-
-        Float64 sum_weight = 0;
-        for (const auto & pair : map)
-        {
-            sum_weight += pair.getMapped();
-            auto value = pair.getKey();
-            auto weight = pair.getMapped();
-            value_weight_pairs.push_back({value, weight});
-        }
-
-        ::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
-
-        Float64 accumulated = 0;
-
-        /// vector for populating and storing the cumulative sum using the provided weights.
-        /// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
-        std::vector<Float64> weights_cum_sum;
-        weights_cum_sum.reserve(size);
-
-        for (size_t idx = 0; idx < size; ++idx)
-        {
-            accumulated += value_weight_pairs[idx].second;
-            weights_cum_sum.emplace_back(accumulated);
-        }
-
-
-        /// The following estimation of quantile is general and the idea is:
-        /// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
-
-        /// calculates a simple cumulative distribution based on weights
-        if (sum_weight != 0)
-        {
-            for (size_t idx = 0; idx < size; ++idx)
-                value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
-        }
-
-        for (size_t level_index = 0; level_index < num_levels; ++level_index)
-        {
-            /// perform linear interpolation for every level
-            auto level = levels[indices[level_index]];
-
-            size_t idx = 0;
-            if (size >= 2)
-            {
-                if (level >= value_weight_pairs[size - 2].second)
-                {
-                    idx = size - 2;
-                }
-                else
-                {
-                    size_t start = 0, end = size - 1;
-                    while (start <= end)
-                    {
-                        size_t mid = start + (end - start) / 2;
-                        if (mid > size)
-                            break;
-                        if (level > value_weight_pairs[mid + 1].second)
-                            start = mid + 1;
-                        else
-                        {
-                            idx = mid;
-                            end = mid - 1;
-                        }
-                    }
-                }
-            }
-
-            size_t l = idx;
-            size_t u = idx + 1 < size ? idx + 1 : idx;
-
-            Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
-            UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
-
-            if (level < xl)
-                yr = yl;
-            if (level > xr)
-                yl = yr;
-
-            result[indices[level_index]] = static_cast<T>(interpolate(level, xl, xr, yl, yr));
-        }
-    }
-
-    /// This ignores overflows or NaN's that might arise during add, sub and mul operations and doesn't aim to provide exact
-    /// results since `the quantileInterpolatedWeighted` function itself relies mainly on approximation.
-    UnderlyingType NO_SANITIZE_UNDEFINED interpolate(Float64 level, Float64 xl, Float64 xr, UnderlyingType yl, UnderlyingType yr) const
-    {
-        UnderlyingType dy = yr - yl;
-        Float64 dx = xr - xl;
-        dx = dx == 0 ? 1 : dx; /// to handle NaN behavior that might arise during integer division below.
-
-        /// yl + (dy / dx) * (level - xl)
-        return static_cast<UnderlyingType>(yl + (dy / dx) * (level - xl));
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/QuantileReservoirSampler.h b/src/AggregateFunctions/QuantileReservoirSampler.h
deleted file mode 100644
index a19064fbbaf..00000000000
--- a/src/AggregateFunctions/QuantileReservoirSampler.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#pragma once
-
-#include <AggregateFunctions/ReservoirSampler.h>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-/** Quantile calculation with "reservoir sample" algorithm.
-  * It collects pseudorandom subset of limited size from a stream of values,
-  *  and approximate quantile from it.
-  * The result is non-deterministic. Also look at QuantileReservoirSamplerDeterministic.
-  *
-  * This algorithm is quite inefficient in terms of precision for memory usage,
-  *  but very efficient in CPU (though less efficient than QuantileTiming and than QuantileExact for small sets).
-  */
-template <typename Value>
-struct QuantileReservoirSampler
-{
-    using Data = ReservoirSampler<Value, ReservoirSamplerOnEmpty::RETURN_NAN_OR_ZERO>;
-    Data data;
-
-    void add(const Value & x)
-    {
-        data.insert(x);
-    }
-
-    template <typename Weight>
-    void add(const Value &, const Weight &)
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for ReservoirSampler");
-    }
-
-    void merge(const QuantileReservoirSampler & rhs)
-    {
-        data.merge(rhs.data);
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        data.write(buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        data.read(buf);
-    }
-
-    /// Get the value of the `level` quantile. The level must be between 0 and 1.
-    Value get(Float64 level)
-    {
-        if (data.empty())
-            return {};
-
-        if constexpr (is_decimal<Value>)
-            return Value(static_cast<typename Value::NativeType>(data.quantileInterpolated(level)));
-        else
-            return static_cast<Value>(data.quantileInterpolated(level));
-    }
-
-    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
-    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
-    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
-    {
-        bool is_empty = data.empty();
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (is_empty)
-            {
-                result[i] = Value{};
-            }
-            else
-            {
-                if constexpr (is_decimal<Value>)
-                    result[indices[i]] = Value(static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]])));
-                else
-                    result[indices[i]] = Value(data.quantileInterpolated(levels[indices[i]]));
-            }
-        }
-    }
-
-    /// The same, but in the case of an empty state, NaN is returned.
-    Float64 getFloat(Float64 level)
-    {
-        return data.quantileInterpolated(level);
-    }
-
-    void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result)
-    {
-        for (size_t i = 0; i < size; ++i)
-            result[indices[i]] = data.quantileInterpolated(levels[indices[i]]);
-    }
-};
-
-}
diff --git a/src/AggregateFunctions/QuantileReservoirSamplerDeterministic.h b/src/AggregateFunctions/QuantileReservoirSamplerDeterministic.h
deleted file mode 100644
index 41f9f557834..00000000000
--- a/src/AggregateFunctions/QuantileReservoirSamplerDeterministic.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#pragma once
-
-#include <AggregateFunctions/ReservoirSamplerDeterministic.h>
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-/** Quantile calculation with "reservoir sample" algorithm.
-  * It collects pseudorandom subset of limited size from a stream of values,
-  *  and approximate quantile from it.
-  * The function accept second argument, named "determinator"
-  *  and a hash function from it is calculated and used as a source for randomness
-  *  to apply random sampling.
-  * The function is deterministic, but care should be taken with choose of "determinator" argument.
-  */
-template <typename Value>
-struct QuantileReservoirSamplerDeterministic
-{
-    using Data = ReservoirSamplerDeterministic<Value, ReservoirSamplerDeterministicOnEmpty::RETURN_NAN_OR_ZERO>;
-    Data data;
-
-    void add(const Value &)
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add without determinator is not implemented for ReservoirSamplerDeterministic");
-    }
-
-    template <typename Determinator>
-    void add(const Value & x, const Determinator & determinator)
-    {
-        data.insert(x, determinator);
-    }
-
-    void merge(const QuantileReservoirSamplerDeterministic & rhs)
-    {
-        data.merge(rhs.data);
-    }
-
-    void serialize(WriteBuffer & buf) const
-    {
-        data.write(buf);
-    }
-
-    void deserialize(ReadBuffer & buf)
-    {
-        data.read(buf);
-    }
-
-    /// Get the value of the `level` quantile. The level must be between 0 and 1.
-    Value get(Float64 level)
-    {
-        if (data.empty())
-            return {};
-
-        if constexpr (is_decimal<Value>)
-            return static_cast<typename Value::NativeType>(data.quantileInterpolated(level));
-        else
-            return static_cast<Value>(data.quantileInterpolated(level));
-    }
-
-    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
-    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
-    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
-    {
-        bool is_empty = data.empty();
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (is_empty)
-            {
-                result[i] = Value{};
-            }
-            else
-            {
-                if constexpr (is_decimal<Value>)
-                    result[indices[i]] = static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]]));
-                else
-                    result[indices[i]] = static_cast<Value>(data.quantileInterpolated(levels[indices[i]]));
-            }
-        }
-    }
-
-    /// The same, but in the case of an empty state, NaN is returned.
-    Float64 getFloat(Float64 level)
-    {
-        return data.quantileInterpolated(level);
-    }
-
-    void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result)
-    {
-        for (size_t i = 0; i < size; ++i)
-            result[indices[i]] = data.quantileInterpolated(levels[indices[i]]);
-    }
-};
-
-}
diff --git a/src/Common/HyperLogLogCounter.h b/src/Common/HyperLogLogCounter.h
index bda56a38c51..092ed6f3d80 100644
--- a/src/Common/HyperLogLogCounter.h
+++ b/src/Common/HyperLogLogCounter.h
@@ -4,7 +4,7 @@
 #include <Common/HyperLogLogBiasEstimator.h>
 #include <Common/CompactArray.h>
 #include <Common/HashTable/Hash.h>
-#include <Common/TransformEndianness.hpp>
+#include <Common/transformEndianness.h>
 
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h
index 22cff7a4942..5f27fdaa4b6 100644
--- a/src/Common/SipHash.h
+++ b/src/Common/SipHash.h
@@ -13,8 +13,6 @@
   * (~ 700 MB/sec, 15 million strings per second)
   */
 
-#include "TransformEndianness.hpp"
-
 #include <bit>
 #include <string>
 #include <type_traits>
@@ -24,9 +22,11 @@
 #include <base/unaligned.h>
 #include <base/hex.h>
 #include <Common/Exception.h>
+#include <Common/transformEndianness.h>
 
 #include <city.h>
 
+
 namespace DB::ErrorCodes
 {
     extern const int LOGICAL_ERROR;
diff --git a/src/Common/TransformEndianness.hpp b/src/Common/transformEndianness.h
similarity index 100%
rename from src/Common/TransformEndianness.hpp
rename to src/Common/transformEndianness.h
diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp
index 9e86a70f877..5293b688678 100644
--- a/src/Functions/reinterpretAs.cpp
+++ b/src/Functions/reinterpretAs.cpp
@@ -19,7 +19,7 @@
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypesNumber.h>
 
-#include <Common/TransformEndianness.hpp>
+#include <Common/transformEndianness.h>
 #include <Common/memcpySmall.h>
 #include <Common/typeid_cast.h>
 
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 40f812050db..bb97339d15b 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -17,7 +17,7 @@
 #include <Common/DateLUT.h>
 #include <Common/LocalDate.h>
 #include <Common/LocalDateTime.h>
-#include <Common/TransformEndianness.hpp>
+#include <Common/transformEndianness.h>
 #include <base/StringRef.h>
 #include <base/arithmeticOverflow.h>
 #include <base/sort.h>
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index 58883ff60a9..094352638e6 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -15,7 +15,7 @@
 #include <Common/DateLUT.h>
 #include <Common/LocalDate.h>
 #include <Common/LocalDateTime.h>
-#include <Common/TransformEndianness.hpp>
+#include <Common/transformEndianness.h>
 #include <base/find_symbols.h>
 #include <base/StringRef.h>
 #include <base/DecomposedFloat.h>

From 34e52b89c75a67c53b2a10fdbaed7dcbc6ae86d3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 03:57:01 +0100
Subject: [PATCH 0195/1097] Remove useless header files

---
 .../AggregateFunctionGroupArray.cpp           | 680 ++++++++++++++++-
 .../AggregateFunctionGroupArray.h             | 690 ------------------
 .../AggregateFunctionGroupArrayInsertAt.cpp   | 201 ++++-
 .../AggregateFunctionGroupArrayInsertAt.h     | 215 ------
 .../AggregateFunctionGroupBitmap.cpp          | 312 ++++++--
 .../AggregateFunctionGroupBitmap.h            | 191 -----
 .../AggregateFunctionGroupUniqArray.cpp       | 226 +++++-
 .../AggregateFunctionGroupUniqArray.h         | 236 ------
 8 files changed, 1349 insertions(+), 1402 deletions(-)
 delete mode 100644 src/AggregateFunctions/AggregateFunctionGroupArray.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionGroupBitmap.h
 delete mode 100644 src/AggregateFunctions/AggregateFunctionGroupUniqArray.h

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index fec4a6fe50a..89ced95a414 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -1,12 +1,32 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionGroupArray.h>
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDateTime.h>
 #include <Interpreters/Context.h>
 #include <Core/ServerSettings.h>
 
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
+
+#include <Common/ArenaAllocator.h>
+#include <Common/assert_cast.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include <type_traits>
+
+#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
+
 
 namespace DB
 {
@@ -16,11 +36,665 @@ namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int BAD_ARGUMENTS;
+    extern const int TOO_LARGE_ARRAY_SIZE;
 }
 
 namespace
 {
 
+enum class Sampler
+{
+    NONE,
+    RNG,
+};
+
+template <bool Thas_limit, bool Tlast, Sampler Tsampler>
+struct GroupArrayTrait
+{
+    static constexpr bool has_limit = Thas_limit;
+    static constexpr bool last = Tlast;
+    static constexpr Sampler sampler = Tsampler;
+};
+
+template <typename Trait>
+static constexpr const char * getNameByTrait()
+{
+    if (Trait::last)
+        return "groupArrayLast";
+    if (Trait::sampler == Sampler::NONE)
+        return "groupArray";
+    else if (Trait::sampler == Sampler::RNG)
+        return "groupArraySample";
+
+    UNREACHABLE();
+}
+
+template <typename T>
+struct GroupArraySamplerData
+{
+    /// For easy serialization.
+    static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
+
+    // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
+    using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
+    using Array = PODArray<T, 32, Allocator>;
+
+    Array value;
+    size_t total_values = 0;
+    pcg32_fast rng;
+
+    UInt64 genRandom(size_t lim)
+    {
+        /// With a large number of values, we will generate random numbers several times slower.
+        if (lim <= static_cast<UInt64>(rng.max()))
+            return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);
+        else
+            return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
+    }
+
+    void randomShuffle()
+    {
+        for (size_t i = 1; i < value.size(); ++i)
+        {
+            size_t j = genRandom(i + 1);
+            std::swap(value[i], value[j]);
+        }
+    }
+};
+
+/// A particular case is an implementation for numeric types.
+template <typename T, bool has_sampler>
+struct GroupArrayNumericData;
+
+template <typename T>
+struct GroupArrayNumericData<T, false>
+{
+    /// For easy serialization.
+    static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
+
+    // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
+    using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
+    using Array = PODArray<T, 32, Allocator>;
+
+    // For groupArrayLast()
+    size_t total_values = 0;
+    Array value;
+};
+
+template <typename T>
+struct GroupArrayNumericData<T, true> : public GroupArraySamplerData<T>
+{
+};
+
+template <typename T, typename Trait>
+class GroupArrayNumericImpl final
+    : public IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>
+{
+    using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
+    static constexpr bool limit_num_elems = Trait::has_limit;
+    UInt64 max_elems;
+    UInt64 seed;
+
+public:
+    explicit GroupArrayNumericImpl(
+        const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
+        : IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
+            {data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
+        , max_elems(max_elems_)
+        , seed(seed_)
+    {
+    }
+
+    String getName() const override { return getNameByTrait<Trait>(); }
+
+    void insertWithSampler(Data & a, const T & v, Arena * arena) const
+    {
+        ++a.total_values;
+        if (a.value.size() < max_elems)
+            a.value.push_back(v, arena);
+        else
+        {
+            UInt64 rnd = a.genRandom(a.total_values);
+            if (rnd < max_elems)
+                a.value[rnd] = v;
+        }
+    }
+
+    void create(AggregateDataPtr __restrict place) const override /// NOLINT
+    {
+        [[maybe_unused]] auto a = new (place) Data;
+        if constexpr (Trait::sampler == Sampler::RNG)
+            a->rng.seed(seed);
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        const auto & row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
+        auto & cur_elems = this->data(place);
+
+        ++cur_elems.total_values;
+
+        if constexpr (Trait::sampler == Sampler::NONE)
+        {
+            if (limit_num_elems && cur_elems.value.size() >= max_elems)
+            {
+                if constexpr (Trait::last)
+                    cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
+                return;
+            }
+
+            cur_elems.value.push_back(row_value, arena);
+        }
+
+        if constexpr (Trait::sampler == Sampler::RNG)
+        {
+            if (cur_elems.value.size() < max_elems)
+                cur_elems.value.push_back(row_value, arena);
+            else
+            {
+                UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
+                if (rnd < max_elems)
+                    cur_elems.value[rnd] = row_value;
+            }
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        auto & cur_elems = this->data(place);
+        auto & rhs_elems = this->data(rhs);
+
+        if (rhs_elems.value.empty())
+            return;
+
+        if constexpr (Trait::last)
+            mergeNoSamplerLast(cur_elems, rhs_elems, arena);
+        else if constexpr (Trait::sampler == Sampler::NONE)
+            mergeNoSampler(cur_elems, rhs_elems, arena);
+        else if constexpr (Trait::sampler == Sampler::RNG)
+            mergeWithRNGSampler(cur_elems, rhs_elems, arena);
+    }
+
+    void mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
+    {
+        UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
+        cur_elems.value.resize_exact(new_elements, arena);
+        for (auto & value : rhs_elems.value)
+        {
+            cur_elems.value[cur_elems.total_values % max_elems] = value;
+            ++cur_elems.total_values;
+        }
+        assert(rhs_elems.total_values >= rhs_elems.value.size());
+        cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
+    }
+
+    void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
+    {
+        if (!limit_num_elems)
+        {
+            if (rhs_elems.value.size())
+                cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
+        }
+        else
+        {
+            UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
+            if (elems_to_insert)
+                cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena);
+        }
+    }
+
+    void mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
+    {
+        if (rhs_elems.total_values <= max_elems)
+        {
+            for (size_t i = 0; i < rhs_elems.value.size(); ++i)
+                insertWithSampler(cur_elems, rhs_elems.value[i], arena);
+        }
+        else if (cur_elems.total_values <= max_elems)
+        {
+            decltype(cur_elems.value) from;
+            from.swap(cur_elems.value, arena);
+            cur_elems.value.assign(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
+            cur_elems.total_values = rhs_elems.total_values;
+            for (size_t i = 0; i < from.size(); ++i)
+                insertWithSampler(cur_elems, from[i], arena);
+        }
+        else
+        {
+            cur_elems.randomShuffle();
+            cur_elems.total_values += rhs_elems.total_values;
+            for (size_t i = 0; i < max_elems; ++i)
+            {
+                UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
+                if (rnd < rhs_elems.total_values)
+                    cur_elems.value[i] = rhs_elems.value[i];
+            }
+        }
+    }
+
+    static void checkArraySize(size_t elems, size_t max_elems)
+    {
+        if (unlikely(elems > max_elems))
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+                            "Too large array size {} (maximum: {})", elems, max_elems);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        const auto & value = this->data(place).value;
+        const UInt64 size = value.size();
+        checkArraySize(size, max_elems);
+        writeVarUInt(size, buf);
+        for (const auto & element : value)
+            writeBinaryLittleEndian(element, buf);
+
+        if constexpr (Trait::last)
+            writeBinaryLittleEndian(this->data(place).total_values, buf);
+
+        if constexpr (Trait::sampler == Sampler::RNG)
+        {
+            writeBinaryLittleEndian(this->data(place).total_values, buf);
+            WriteBufferFromOwnString rng_buf;
+            rng_buf << this->data(place).rng;
+            writeStringBinary(rng_buf.str(), buf);
+        }
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        size_t size = 0;
+        readVarUInt(size, buf);
+        checkArraySize(size, max_elems);
+
+        auto & value = this->data(place).value;
+
+        value.resize_exact(size, arena);
+        for (auto & element : value)
+            readBinaryLittleEndian(element, buf);
+
+        if constexpr (Trait::last)
+            readBinaryLittleEndian(this->data(place).total_values, buf);
+
+        if constexpr (Trait::sampler == Sampler::RNG)
+        {
+            readBinaryLittleEndian(this->data(place).total_values, buf);
+            std::string rng_string;
+            readStringBinary(rng_string, buf);
+            ReadBufferFromString rng_buf(rng_string);
+            rng_buf >> this->data(place).rng;
+        }
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        const auto & value = this->data(place).value;
+        size_t size = value.size();
+
+        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
+        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
+
+        offsets_to.push_back(offsets_to.back() + size);
+
+        if (size)
+        {
+            typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
+            data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
+        }
+    }
+
+    bool allocatesMemoryInArena() const override { return true; }
+};
+
+
+/// General case
+
+
+/// Nodes used to implement a linked list for storage of groupArray states
+
+template <typename Node>
+struct GroupArrayNodeBase
+{
+    UInt64 size; // size of payload
+
+    /// Returns pointer to actual payload
+    char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
+
+    const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
+
+    /// Clones existing node (does not modify next field)
+    Node * clone(Arena * arena) const
+    {
+        return reinterpret_cast<Node *>(
+            const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
+    }
+
+    static void checkElementSize(size_t size, size_t max_size)
+    {
+        if (unlikely(size > max_size))
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+                            "Too large array element size {} (maximum: {})", size, max_size);
+    }
+
+    /// Write node to buffer
+    void write(WriteBuffer & buf) const
+    {
+        checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
+        writeVarUInt(size, buf);
+        buf.write(data(), size);
+    }
+
+    /// Reads and allocates node from ReadBuffer's data (doesn't set next)
+    static Node * read(ReadBuffer & buf, Arena * arena)
+    {
+        UInt64 size;
+        readVarUInt(size, buf);
+        checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
+
+        Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + size, alignof(Node)));
+        node->size = size;
+        buf.readStrict(node->data(), size);
+        return node;
+    }
+};
+
+struct GroupArrayNodeString : public GroupArrayNodeBase<GroupArrayNodeString>
+{
+    using Node = GroupArrayNodeString;
+
+    /// Create node from string
+    static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
+    {
+        StringRef string = assert_cast<const ColumnString &>(column).getDataAt(row_num);
+
+        Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node)));
+        node->size = string.size;
+        memcpy(node->data(), string.data, string.size);
+
+        return node;
+    }
+
+    void insertInto(IColumn & column)
+    {
+        assert_cast<ColumnString &>(column).insertData(data(), size);
+    }
+};
+
+struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
+{
+    using Node = GroupArrayNodeGeneral;
+
+    static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
+    {
+        const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node));
+        StringRef value = column.serializeValueIntoArena(row_num, *arena, begin);
+
+        Node * node = reinterpret_cast<Node *>(const_cast<char *>(begin));
+        node->size = value.size;
+
+        return node;
+    }
+
+    void insertInto(IColumn & column) { column.deserializeAndInsertFromArena(data()); }
+};
+
+template <typename Node, bool has_sampler>
+struct GroupArrayGeneralData;
+
+template <typename Node>
+struct GroupArrayGeneralData<Node, false>
+{
+    // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
+    using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
+    using Array = PODArray<Node *, 32, Allocator>;
+
+    // For groupArrayLast()
+    size_t total_values = 0;
+    Array value;
+};
+
+template <typename Node>
+struct GroupArrayGeneralData<Node, true> : public GroupArraySamplerData<Node *>
+{
+};
+
+/// Implementation of groupArray for String or any ComplexObject via Array
+template <typename Node, typename Trait>
+class GroupArrayGeneralImpl final
+    : public IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>
+{
+    static constexpr bool limit_num_elems = Trait::has_limit;
+    using Data = GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>;
+    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
+    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
+
+    DataTypePtr & data_type;
+    UInt64 max_elems;
+    UInt64 seed;
+
+public:
+    GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
+        : IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
+            {data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
+        , data_type(this->argument_types[0])
+        , max_elems(max_elems_)
+        , seed(seed_)
+    {
+    }
+
+    String getName() const override { return getNameByTrait<Trait>(); }
+
+    void insertWithSampler(Data & a, const Node * v, Arena * arena) const
+    {
+        ++a.total_values;
+        if (a.value.size() < max_elems)
+            a.value.push_back(v->clone(arena), arena);
+        else
+        {
+            UInt64 rnd = a.genRandom(a.total_values);
+            if (rnd < max_elems)
+                a.value[rnd] = v->clone(arena);
+        }
+    }
+
+    void create(AggregateDataPtr __restrict place) const override /// NOLINT
+    {
+        [[maybe_unused]] auto a = new (place) Data;
+        if constexpr (Trait::sampler == Sampler::RNG)
+            a->rng.seed(seed);
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        auto & cur_elems = data(place);
+
+        ++cur_elems.total_values;
+
+        if constexpr (Trait::sampler == Sampler::NONE)
+        {
+            if (limit_num_elems && cur_elems.value.size() >= max_elems)
+            {
+                if (Trait::last)
+                {
+                    Node * node = Node::allocate(*columns[0], row_num, arena);
+                    cur_elems.value[(cur_elems.total_values - 1) % max_elems] = node;
+                }
+                return;
+            }
+
+            Node * node = Node::allocate(*columns[0], row_num, arena);
+            cur_elems.value.push_back(node, arena);
+        }
+
+        if constexpr (Trait::sampler == Sampler::RNG)
+        {
+            if (cur_elems.value.size() < max_elems)
+                cur_elems.value.push_back(Node::allocate(*columns[0], row_num, arena), arena);
+            else
+            {
+                UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
+                if (rnd < max_elems)
+                    cur_elems.value[rnd] = Node::allocate(*columns[0], row_num, arena);
+            }
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        auto & cur_elems = data(place);
+        auto & rhs_elems = data(rhs);
+
+        if (rhs_elems.value.empty())
+            return;
+
+        if constexpr (Trait::last)
+            mergeNoSamplerLast(cur_elems, rhs_elems, arena);
+        else if constexpr (Trait::sampler == Sampler::NONE)
+            mergeNoSampler(cur_elems, rhs_elems, arena);
+        else if constexpr (Trait::sampler == Sampler::RNG)
+            mergeWithRNGSampler(cur_elems, rhs_elems, arena);
+    }
+
+    void ALWAYS_INLINE mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
+    {
+        UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
+        cur_elems.value.resize_exact(new_elements, arena);
+        for (auto & value : rhs_elems.value)
+        {
+            cur_elems.value[cur_elems.total_values % max_elems] = value->clone(arena);
+            ++cur_elems.total_values;
+        }
+        assert(rhs_elems.total_values >= rhs_elems.value.size());
+        cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
+    }
+
+    void ALWAYS_INLINE mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
+    {
+        UInt64 new_elems;
+        if (limit_num_elems)
+        {
+            if (cur_elems.value.size() >= max_elems)
+                return;
+            new_elems = std::min(rhs_elems.value.size(), static_cast<size_t>(max_elems) - cur_elems.value.size());
+        }
+        else
+            new_elems = rhs_elems.value.size();
+
+        for (UInt64 i = 0; i < new_elems; ++i)
+            cur_elems.value.push_back(rhs_elems.value[i]->clone(arena), arena);
+    }
+
+    void ALWAYS_INLINE mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
+    {
+        if (rhs_elems.total_values <= max_elems)
+        {
+            for (size_t i = 0; i < rhs_elems.value.size(); ++i)
+                insertWithSampler(cur_elems, rhs_elems.value[i], arena);
+        }
+        else if (cur_elems.total_values <= max_elems)
+        {
+            decltype(cur_elems.value) from;
+            from.swap(cur_elems.value, arena);
+            for (auto & node : rhs_elems.value)
+                cur_elems.value.push_back(node->clone(arena), arena);
+            cur_elems.total_values = rhs_elems.total_values;
+            for (size_t i = 0; i < from.size(); ++i)
+                insertWithSampler(cur_elems, from[i], arena);
+        }
+        else
+        {
+            cur_elems.randomShuffle();
+            cur_elems.total_values += rhs_elems.total_values;
+            for (size_t i = 0; i < max_elems; ++i)
+            {
+                UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
+                if (rnd < rhs_elems.total_values)
+                    cur_elems.value[i] = rhs_elems.value[i]->clone(arena);
+            }
+        }
+    }
+
+    static void checkArraySize(size_t elems, size_t max_elems)
+    {
+        if (unlikely(elems > max_elems))
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+                            "Too large array size {} (maximum: {})", elems, max_elems);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        UInt64 elems = data(place).value.size();
+        checkArraySize(elems, max_elems);
+        writeVarUInt(elems, buf);
+
+        auto & value = data(place).value;
+        for (auto & node : value)
+            node->write(buf);
+
+        if constexpr (Trait::last)
+            writeBinaryLittleEndian(data(place).total_values, buf);
+
+        if constexpr (Trait::sampler == Sampler::RNG)
+        {
+            writeBinaryLittleEndian(data(place).total_values, buf);
+            WriteBufferFromOwnString rng_buf;
+            rng_buf << data(place).rng;
+            writeStringBinary(rng_buf.str(), buf);
+        }
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        UInt64 elems;
+        readVarUInt(elems, buf);
+
+        if (unlikely(elems == 0))
+            return;
+
+        checkArraySize(elems, max_elems);
+
+        auto & value = data(place).value;
+
+        value.resize_exact(elems, arena);
+        for (UInt64 i = 0; i < elems; ++i)
+            value[i] = Node::read(buf, arena);
+
+        if constexpr (Trait::last)
+            readBinaryLittleEndian(data(place).total_values, buf);
+
+        if constexpr (Trait::sampler == Sampler::RNG)
+        {
+            readBinaryLittleEndian(data(place).total_values, buf);
+            std::string rng_string;
+            readStringBinary(rng_string, buf);
+            ReadBufferFromString rng_buf(rng_string);
+            rng_buf >> data(place).rng;
+        }
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto & column_array = assert_cast<ColumnArray &>(to);
+
+        auto & offsets = column_array.getOffsets();
+        offsets.push_back(offsets.back() + data(place).value.size());
+
+        auto & column_data = column_array.getData();
+
+        if (std::is_same_v<Node, GroupArrayNodeString>)
+        {
+            auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
+            string_offsets.reserve(string_offsets.size() + data(place).value.size());
+        }
+
+        auto & value = data(place).value;
+        for (auto & node : value)
+            node->insertInto(column_data);
+    }
+
+    bool allocatesMemoryInArena() const override { return true; }
+};
+
+
 template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
 IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
 {
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h
deleted file mode 100644
index 49552b57c82..00000000000
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.h
+++ /dev/null
@@ -1,690 +0,0 @@
-#pragma once
-
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadBufferFromString.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/Operators.h>
-
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnVector.h>
-
-#include <Common/ArenaAllocator.h>
-#include <Common/assert_cast.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-#include <type_traits>
-
-#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int TOO_LARGE_ARRAY_SIZE;
-}
-
-enum class Sampler
-{
-    NONE,
-    RNG,
-};
-
-template <bool Thas_limit, bool Tlast, Sampler Tsampler>
-struct GroupArrayTrait
-{
-    static constexpr bool has_limit = Thas_limit;
-    static constexpr bool last = Tlast;
-    static constexpr Sampler sampler = Tsampler;
-};
-
-template <typename Trait>
-static constexpr const char * getNameByTrait()
-{
-    if (Trait::last)
-        return "groupArrayLast";
-    if (Trait::sampler == Sampler::NONE)
-        return "groupArray";
-    else if (Trait::sampler == Sampler::RNG)
-        return "groupArraySample";
-
-    UNREACHABLE();
-}
-
-template <typename T>
-struct GroupArraySamplerData
-{
-    /// For easy serialization.
-    static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
-
-    // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
-    using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
-    using Array = PODArray<T, 32, Allocator>;
-
-    Array value;
-    size_t total_values = 0;
-    pcg32_fast rng;
-
-    UInt64 genRandom(size_t lim)
-    {
-        /// With a large number of values, we will generate random numbers several times slower.
-        if (lim <= static_cast<UInt64>(rng.max()))
-            return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);
-        else
-            return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
-    }
-
-    void randomShuffle()
-    {
-        for (size_t i = 1; i < value.size(); ++i)
-        {
-            size_t j = genRandom(i + 1);
-            std::swap(value[i], value[j]);
-        }
-    }
-};
-
-/// A particular case is an implementation for numeric types.
-template <typename T, bool has_sampler>
-struct GroupArrayNumericData;
-
-template <typename T>
-struct GroupArrayNumericData<T, false>
-{
-    /// For easy serialization.
-    static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
-
-    // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
-    using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
-    using Array = PODArray<T, 32, Allocator>;
-
-    // For groupArrayLast()
-    size_t total_values = 0;
-    Array value;
-};
-
-template <typename T>
-struct GroupArrayNumericData<T, true> : public GroupArraySamplerData<T>
-{
-};
-
-template <typename T, typename Trait>
-class GroupArrayNumericImpl final
-    : public IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>
-{
-    using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
-    static constexpr bool limit_num_elems = Trait::has_limit;
-    UInt64 max_elems;
-    UInt64 seed;
-
-public:
-    explicit GroupArrayNumericImpl(
-        const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
-        : IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
-            {data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
-        , max_elems(max_elems_)
-        , seed(seed_)
-    {
-    }
-
-    String getName() const override { return getNameByTrait<Trait>(); }
-
-    void insertWithSampler(Data & a, const T & v, Arena * arena) const
-    {
-        ++a.total_values;
-        if (a.value.size() < max_elems)
-            a.value.push_back(v, arena);
-        else
-        {
-            UInt64 rnd = a.genRandom(a.total_values);
-            if (rnd < max_elems)
-                a.value[rnd] = v;
-        }
-    }
-
-    void create(AggregateDataPtr __restrict place) const override /// NOLINT
-    {
-        [[maybe_unused]] auto a = new (place) Data;
-        if constexpr (Trait::sampler == Sampler::RNG)
-            a->rng.seed(seed);
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        const auto & row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
-        auto & cur_elems = this->data(place);
-
-        ++cur_elems.total_values;
-
-        if constexpr (Trait::sampler == Sampler::NONE)
-        {
-            if (limit_num_elems && cur_elems.value.size() >= max_elems)
-            {
-                if constexpr (Trait::last)
-                    cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value;
-                return;
-            }
-
-            cur_elems.value.push_back(row_value, arena);
-        }
-
-        if constexpr (Trait::sampler == Sampler::RNG)
-        {
-            if (cur_elems.value.size() < max_elems)
-                cur_elems.value.push_back(row_value, arena);
-            else
-            {
-                UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
-                if (rnd < max_elems)
-                    cur_elems.value[rnd] = row_value;
-            }
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
-    {
-        auto & cur_elems = this->data(place);
-        auto & rhs_elems = this->data(rhs);
-
-        if (rhs_elems.value.empty())
-            return;
-
-        if constexpr (Trait::last)
-            mergeNoSamplerLast(cur_elems, rhs_elems, arena);
-        else if constexpr (Trait::sampler == Sampler::NONE)
-            mergeNoSampler(cur_elems, rhs_elems, arena);
-        else if constexpr (Trait::sampler == Sampler::RNG)
-            mergeWithRNGSampler(cur_elems, rhs_elems, arena);
-    }
-
-    void mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
-    {
-        UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
-        cur_elems.value.resize_exact(new_elements, arena);
-        for (auto & value : rhs_elems.value)
-        {
-            cur_elems.value[cur_elems.total_values % max_elems] = value;
-            ++cur_elems.total_values;
-        }
-        assert(rhs_elems.total_values >= rhs_elems.value.size());
-        cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
-    }
-
-    void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
-    {
-        if (!limit_num_elems)
-        {
-            if (rhs_elems.value.size())
-                cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
-        }
-        else
-        {
-            UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
-            if (elems_to_insert)
-                cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena);
-        }
-    }
-
-    void mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
-    {
-        if (rhs_elems.total_values <= max_elems)
-        {
-            for (size_t i = 0; i < rhs_elems.value.size(); ++i)
-                insertWithSampler(cur_elems, rhs_elems.value[i], arena);
-        }
-        else if (cur_elems.total_values <= max_elems)
-        {
-            decltype(cur_elems.value) from;
-            from.swap(cur_elems.value, arena);
-            cur_elems.value.assign(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
-            cur_elems.total_values = rhs_elems.total_values;
-            for (size_t i = 0; i < from.size(); ++i)
-                insertWithSampler(cur_elems, from[i], arena);
-        }
-        else
-        {
-            cur_elems.randomShuffle();
-            cur_elems.total_values += rhs_elems.total_values;
-            for (size_t i = 0; i < max_elems; ++i)
-            {
-                UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
-                if (rnd < rhs_elems.total_values)
-                    cur_elems.value[i] = rhs_elems.value[i];
-            }
-        }
-    }
-
-    static void checkArraySize(size_t elems, size_t max_elems)
-    {
-        if (unlikely(elems > max_elems))
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
-                            "Too large array size {} (maximum: {})", elems, max_elems);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        const auto & value = this->data(place).value;
-        const UInt64 size = value.size();
-        checkArraySize(size, max_elems);
-        writeVarUInt(size, buf);
-        for (const auto & element : value)
-            writeBinaryLittleEndian(element, buf);
-
-        if constexpr (Trait::last)
-            writeBinaryLittleEndian(this->data(place).total_values, buf);
-
-        if constexpr (Trait::sampler == Sampler::RNG)
-        {
-            writeBinaryLittleEndian(this->data(place).total_values, buf);
-            WriteBufferFromOwnString rng_buf;
-            rng_buf << this->data(place).rng;
-            writeStringBinary(rng_buf.str(), buf);
-        }
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        size_t size = 0;
-        readVarUInt(size, buf);
-        checkArraySize(size, max_elems);
-
-        auto & value = this->data(place).value;
-
-        value.resize_exact(size, arena);
-        for (auto & element : value)
-            readBinaryLittleEndian(element, buf);
-
-        if constexpr (Trait::last)
-            readBinaryLittleEndian(this->data(place).total_values, buf);
-
-        if constexpr (Trait::sampler == Sampler::RNG)
-        {
-            readBinaryLittleEndian(this->data(place).total_values, buf);
-            std::string rng_string;
-            readStringBinary(rng_string, buf);
-            ReadBufferFromString rng_buf(rng_string);
-            rng_buf >> this->data(place).rng;
-        }
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        const auto & value = this->data(place).value;
-        size_t size = value.size();
-
-        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
-        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
-
-        offsets_to.push_back(offsets_to.back() + size);
-
-        if (size)
-        {
-            typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
-            data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
-        }
-    }
-
-    bool allocatesMemoryInArena() const override { return true; }
-};
-
-
-/// General case
-
-
-/// Nodes used to implement a linked list for storage of groupArray states
-
-template <typename Node>
-struct GroupArrayNodeBase
-{
-    UInt64 size; // size of payload
-
-    /// Returns pointer to actual payload
-    char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
-
-    const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
-
-    /// Clones existing node (does not modify next field)
-    Node * clone(Arena * arena) const
-    {
-        return reinterpret_cast<Node *>(
-            const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
-    }
-
-    static void checkElementSize(size_t size, size_t max_size)
-    {
-        if (unlikely(size > max_size))
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
-                            "Too large array element size {} (maximum: {})", size, max_size);
-    }
-
-    /// Write node to buffer
-    void write(WriteBuffer & buf) const
-    {
-        checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
-        writeVarUInt(size, buf);
-        buf.write(data(), size);
-    }
-
-    /// Reads and allocates node from ReadBuffer's data (doesn't set next)
-    static Node * read(ReadBuffer & buf, Arena * arena)
-    {
-        UInt64 size;
-        readVarUInt(size, buf);
-        checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
-
-        Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + size, alignof(Node)));
-        node->size = size;
-        buf.readStrict(node->data(), size);
-        return node;
-    }
-};
-
-struct GroupArrayNodeString : public GroupArrayNodeBase<GroupArrayNodeString>
-{
-    using Node = GroupArrayNodeString;
-
-    /// Create node from string
-    static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
-    {
-        StringRef string = assert_cast<const ColumnString &>(column).getDataAt(row_num);
-
-        Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node)));
-        node->size = string.size;
-        memcpy(node->data(), string.data, string.size);
-
-        return node;
-    }
-
-    void insertInto(IColumn & column)
-    {
-        assert_cast<ColumnString &>(column).insertData(data(), size);
-    }
-};
-
-struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
-{
-    using Node = GroupArrayNodeGeneral;
-
-    static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
-    {
-        const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node));
-        StringRef value = column.serializeValueIntoArena(row_num, *arena, begin);
-
-        Node * node = reinterpret_cast<Node *>(const_cast<char *>(begin));
-        node->size = value.size;
-
-        return node;
-    }
-
-    void insertInto(IColumn & column) { column.deserializeAndInsertFromArena(data()); }
-};
-
-template <typename Node, bool has_sampler>
-struct GroupArrayGeneralData;
-
-template <typename Node>
-struct GroupArrayGeneralData<Node, false>
-{
-    // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
-    using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
-    using Array = PODArray<Node *, 32, Allocator>;
-
-    // For groupArrayLast()
-    size_t total_values = 0;
-    Array value;
-};
-
-template <typename Node>
-struct GroupArrayGeneralData<Node, true> : public GroupArraySamplerData<Node *>
-{
-};
-
-/// Implementation of groupArray for String or any ComplexObject via Array
-template <typename Node, typename Trait>
-class GroupArrayGeneralImpl final
-    : public IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>
-{
-    static constexpr bool limit_num_elems = Trait::has_limit;
-    using Data = GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>;
-    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
-    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
-
-    DataTypePtr & data_type;
-    UInt64 max_elems;
-    UInt64 seed;
-
-public:
-    GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
-        : IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
-            {data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
-        , data_type(this->argument_types[0])
-        , max_elems(max_elems_)
-        , seed(seed_)
-    {
-    }
-
-    String getName() const override { return getNameByTrait<Trait>(); }
-
-    void insertWithSampler(Data & a, const Node * v, Arena * arena) const
-    {
-        ++a.total_values;
-        if (a.value.size() < max_elems)
-            a.value.push_back(v->clone(arena), arena);
-        else
-        {
-            UInt64 rnd = a.genRandom(a.total_values);
-            if (rnd < max_elems)
-                a.value[rnd] = v->clone(arena);
-        }
-    }
-
-    void create(AggregateDataPtr __restrict place) const override /// NOLINT
-    {
-        [[maybe_unused]] auto a = new (place) Data;
-        if constexpr (Trait::sampler == Sampler::RNG)
-            a->rng.seed(seed);
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        auto & cur_elems = data(place);
-
-        ++cur_elems.total_values;
-
-        if constexpr (Trait::sampler == Sampler::NONE)
-        {
-            if (limit_num_elems && cur_elems.value.size() >= max_elems)
-            {
-                if (Trait::last)
-                {
-                    Node * node = Node::allocate(*columns[0], row_num, arena);
-                    cur_elems.value[(cur_elems.total_values - 1) % max_elems] = node;
-                }
-                return;
-            }
-
-            Node * node = Node::allocate(*columns[0], row_num, arena);
-            cur_elems.value.push_back(node, arena);
-        }
-
-        if constexpr (Trait::sampler == Sampler::RNG)
-        {
-            if (cur_elems.value.size() < max_elems)
-                cur_elems.value.push_back(Node::allocate(*columns[0], row_num, arena), arena);
-            else
-            {
-                UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
-                if (rnd < max_elems)
-                    cur_elems.value[rnd] = Node::allocate(*columns[0], row_num, arena);
-            }
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
-    {
-        auto & cur_elems = data(place);
-        auto & rhs_elems = data(rhs);
-
-        if (rhs_elems.value.empty())
-            return;
-
-        if constexpr (Trait::last)
-            mergeNoSamplerLast(cur_elems, rhs_elems, arena);
-        else if constexpr (Trait::sampler == Sampler::NONE)
-            mergeNoSampler(cur_elems, rhs_elems, arena);
-        else if constexpr (Trait::sampler == Sampler::RNG)
-            mergeWithRNGSampler(cur_elems, rhs_elems, arena);
-    }
-
-    void ALWAYS_INLINE mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
-    {
-        UInt64 new_elements = std::min(static_cast<size_t>(max_elems), cur_elems.value.size() + rhs_elems.value.size());
-        cur_elems.value.resize_exact(new_elements, arena);
-        for (auto & value : rhs_elems.value)
-        {
-            cur_elems.value[cur_elems.total_values % max_elems] = value->clone(arena);
-            ++cur_elems.total_values;
-        }
-        assert(rhs_elems.total_values >= rhs_elems.value.size());
-        cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
-    }
-
-    void ALWAYS_INLINE mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
-    {
-        UInt64 new_elems;
-        if (limit_num_elems)
-        {
-            if (cur_elems.value.size() >= max_elems)
-                return;
-            new_elems = std::min(rhs_elems.value.size(), static_cast<size_t>(max_elems) - cur_elems.value.size());
-        }
-        else
-            new_elems = rhs_elems.value.size();
-
-        for (UInt64 i = 0; i < new_elems; ++i)
-            cur_elems.value.push_back(rhs_elems.value[i]->clone(arena), arena);
-    }
-
-    void ALWAYS_INLINE mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const
-    {
-        if (rhs_elems.total_values <= max_elems)
-        {
-            for (size_t i = 0; i < rhs_elems.value.size(); ++i)
-                insertWithSampler(cur_elems, rhs_elems.value[i], arena);
-        }
-        else if (cur_elems.total_values <= max_elems)
-        {
-            decltype(cur_elems.value) from;
-            from.swap(cur_elems.value, arena);
-            for (auto & node : rhs_elems.value)
-                cur_elems.value.push_back(node->clone(arena), arena);
-            cur_elems.total_values = rhs_elems.total_values;
-            for (size_t i = 0; i < from.size(); ++i)
-                insertWithSampler(cur_elems, from[i], arena);
-        }
-        else
-        {
-            cur_elems.randomShuffle();
-            cur_elems.total_values += rhs_elems.total_values;
-            for (size_t i = 0; i < max_elems; ++i)
-            {
-                UInt64 rnd = cur_elems.genRandom(cur_elems.total_values);
-                if (rnd < rhs_elems.total_values)
-                    cur_elems.value[i] = rhs_elems.value[i]->clone(arena);
-            }
-        }
-    }
-
-    static void checkArraySize(size_t elems, size_t max_elems)
-    {
-        if (unlikely(elems > max_elems))
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
-                            "Too large array size {} (maximum: {})", elems, max_elems);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        UInt64 elems = data(place).value.size();
-        checkArraySize(elems, max_elems);
-        writeVarUInt(elems, buf);
-
-        auto & value = data(place).value;
-        for (auto & node : value)
-            node->write(buf);
-
-        if constexpr (Trait::last)
-            writeBinaryLittleEndian(data(place).total_values, buf);
-
-        if constexpr (Trait::sampler == Sampler::RNG)
-        {
-            writeBinaryLittleEndian(data(place).total_values, buf);
-            WriteBufferFromOwnString rng_buf;
-            rng_buf << data(place).rng;
-            writeStringBinary(rng_buf.str(), buf);
-        }
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        UInt64 elems;
-        readVarUInt(elems, buf);
-
-        if (unlikely(elems == 0))
-            return;
-
-        checkArraySize(elems, max_elems);
-
-        auto & value = data(place).value;
-
-        value.resize_exact(elems, arena);
-        for (UInt64 i = 0; i < elems; ++i)
-            value[i] = Node::read(buf, arena);
-
-        if constexpr (Trait::last)
-            readBinaryLittleEndian(data(place).total_values, buf);
-
-        if constexpr (Trait::sampler == Sampler::RNG)
-        {
-            readBinaryLittleEndian(data(place).total_values, buf);
-            std::string rng_string;
-            readStringBinary(rng_string, buf);
-            ReadBufferFromString rng_buf(rng_string);
-            rng_buf >> data(place).rng;
-        }
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        auto & column_array = assert_cast<ColumnArray &>(to);
-
-        auto & offsets = column_array.getOffsets();
-        offsets.push_back(offsets.back() + data(place).value.size());
-
-        auto & column_data = column_array.getData();
-
-        if (std::is_same_v<Node, GroupArrayNodeString>)
-        {
-            auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
-            string_offsets.reserve(string_offsets.size() + data(place).value.size());
-        }
-
-        auto & value = data(place).value;
-        for (auto & node : value)
-            node->insertInto(column_data);
-    }
-
-    bool allocatesMemoryInArena() const override { return true; }
-};
-
-#undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp
index f5cb927e175..6d1f918fe3b 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp
@@ -1,21 +1,218 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h>
-#include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <Columns/ColumnArray.h>
+
+#include <Common/FieldVisitorToString.h>
+#include <Common/FieldVisitorConvertToNumber.h>
+#include <Common/assert_cast.h>
+#include <Interpreters/convertFieldToType.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#define AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE 0xFFFFFF
+
 
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int TOO_LARGE_ARRAY_SIZE;
+    extern const int CANNOT_CONVERT_TYPE;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 namespace
 {
 
+/** Aggregate function, that takes two arguments: value and position,
+  *  and as a result, builds an array with values are located at corresponding positions.
+  *
+  * If more than one value was inserted to single position, the any value (first in case of single thread) is stored.
+  * If no values was inserted to some position, then default value will be substituted.
+  *
+  * Aggregate function also accept optional parameters:
+  * - default value to substitute;
+  * - length to resize result arrays (if you want to have results of same length for all aggregation keys);
+  *
+  * If you want to pass length, default value should be also given.
+  */
+
+
+/// Generic case (inefficient).
+struct AggregateFunctionGroupArrayInsertAtDataGeneric
+{
+    Array value;    /// TODO Add MemoryTracker
+};
+
+
+class AggregateFunctionGroupArrayInsertAtGeneric final
+    : public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>
+{
+private:
+    DataTypePtr type;
+    SerializationPtr serialization;
+    Field default_value;
+    UInt64 length_to_resize = 0;    /// zero means - do not do resizing.
+
+public:
+    AggregateFunctionGroupArrayInsertAtGeneric(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>(arguments, params, std::make_shared<DataTypeArray>(arguments[0]))
+        , type(argument_types[0])
+        , serialization(type->getDefaultSerialization())
+    {
+        if (!params.empty())
+        {
+            if (params.size() > 2)
+                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at most two parameters.", getName());
+
+            default_value = params[0];
+
+            if (params.size() == 2)
+            {
+                length_to_resize = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
+                if (length_to_resize > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
+                    throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+                                    "Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
+            }
+        }
+
+        if (!isUnsignedInteger(arguments[1]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of aggregate function {} must be unsigned integer.", getName());
+
+        if (default_value.isNull())
+            default_value = type->getDefault();
+        else
+        {
+            Field converted = convertFieldToType(default_value, *type);
+            if (converted.isNull())
+                throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert parameter of aggregate function {} ({}) "
+                                "to type {} to be used as default value in array",
+                                getName(), applyVisitor(FieldVisitorToString(), default_value), type->getName());
+
+            default_value = converted;
+        }
+    }
+
+    String getName() const override { return "groupArrayInsertAt"; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        /// TODO Do positions need to be 1-based for this function?
+        size_t position = columns[1]->getUInt(row_num);
+
+        /// If position is larger than size to which array will be cut - simply ignore value.
+        if (length_to_resize && position >= length_to_resize)
+            return;
+
+        if (position >= AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: "
+                "position argument ({}) is greater or equals to limit ({})",
+                position, AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
+
+        Array & arr = data(place).value;
+
+        if (arr.size() <= position)
+            arr.resize(position + 1);
+        else if (!arr[position].isNull())
+            return; /// Element was already inserted to the specified position.
+
+        columns[0]->get(row_num, arr[position]);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        Array & arr_lhs = data(place).value;
+        const Array & arr_rhs = data(rhs).value;
+
+        if (arr_lhs.size() < arr_rhs.size())
+            arr_lhs.resize(arr_rhs.size());
+
+        for (size_t i = 0, size = arr_rhs.size(); i < size; ++i)
+            if (arr_lhs[i].isNull() && !arr_rhs[i].isNull())
+                arr_lhs[i] = arr_rhs[i];
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        const Array & arr = data(place).value;
+        size_t size = arr.size();
+        writeVarUInt(size, buf);
+
+        for (const Field & elem : arr)
+        {
+            if (elem.isNull())
+            {
+                writeBinary(UInt8(1), buf);
+            }
+            else
+            {
+                writeBinary(UInt8(0), buf);
+                serialization->serializeBinary(elem, buf, {});
+            }
+        }
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        size_t size = 0;
+        readVarUInt(size, buf);
+
+        if (size > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+                            "Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
+
+        Array & arr = data(place).value;
+
+        arr.resize(size);
+        for (size_t i = 0; i < size; ++i)
+        {
+            UInt8 is_null = 0;
+            readBinary(is_null, buf);
+            if (!is_null)
+                serialization->deserializeBinary(arr[i], buf, {});
+        }
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        ColumnArray & to_array = assert_cast<ColumnArray &>(to);
+        IColumn & to_data = to_array.getData();
+        ColumnArray::Offsets & to_offsets = to_array.getOffsets();
+
+        const Array & arr = data(place).value;
+
+        for (const Field & elem : arr)
+        {
+            if (!elem.isNull())
+                to_data.insert(elem);
+            else
+                to_data.insert(default_value);
+        }
+
+        size_t result_array_size = length_to_resize ? length_to_resize : arr.size();
+
+        /// Pad array if need.
+        for (size_t i = arr.size(); i < result_array_size; ++i)
+            to_data.insert(default_value);
+
+        to_offsets.push_back(to_offsets.back() + result_array_size);
+    }
+};
+
+
 AggregateFunctionPtr createAggregateFunctionGroupArrayInsertAt(
     const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
deleted file mode 100644
index 439bb613337..00000000000
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
+++ /dev/null
@@ -1,215 +0,0 @@
-#pragma once
-
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnVector.h>
-
-#include <Common/FieldVisitorToString.h>
-#include <Common/FieldVisitorConvertToNumber.h>
-#include <Common/assert_cast.h>
-#include <Interpreters/convertFieldToType.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-
-#define AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE 0xFFFFFF
-
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int TOO_LARGE_ARRAY_SIZE;
-    extern const int CANNOT_CONVERT_TYPE;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-}
-
-
-/** Aggregate function, that takes two arguments: value and position,
-  *  and as a result, builds an array with values are located at corresponding positions.
-  *
-  * If more than one value was inserted to single position, the any value (first in case of single thread) is stored.
-  * If no values was inserted to some position, then default value will be substituted.
-  *
-  * Aggregate function also accept optional parameters:
-  * - default value to substitute;
-  * - length to resize result arrays (if you want to have results of same length for all aggregation keys);
-  *
-  * If you want to pass length, default value should be also given.
-  */
-
-
-/// Generic case (inefficient).
-struct AggregateFunctionGroupArrayInsertAtDataGeneric
-{
-    Array value;    /// TODO Add MemoryTracker
-};
-
-
-class AggregateFunctionGroupArrayInsertAtGeneric final
-    : public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>
-{
-private:
-    DataTypePtr type;
-    SerializationPtr serialization;
-    Field default_value;
-    UInt64 length_to_resize = 0;    /// zero means - do not do resizing.
-
-public:
-    AggregateFunctionGroupArrayInsertAtGeneric(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>(arguments, params, std::make_shared<DataTypeArray>(arguments[0]))
-        , type(argument_types[0])
-        , serialization(type->getDefaultSerialization())
-    {
-        if (!params.empty())
-        {
-            if (params.size() > 2)
-                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at most two parameters.", getName());
-
-            default_value = params[0];
-
-            if (params.size() == 2)
-            {
-                length_to_resize = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
-                if (length_to_resize > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
-                    throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
-                                    "Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
-            }
-        }
-
-        if (!isUnsignedInteger(arguments[1]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of aggregate function {} must be unsigned integer.", getName());
-
-        if (default_value.isNull())
-            default_value = type->getDefault();
-        else
-        {
-            Field converted = convertFieldToType(default_value, *type);
-            if (converted.isNull())
-                throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert parameter of aggregate function {} ({}) "
-                                "to type {} to be used as default value in array",
-                                getName(), applyVisitor(FieldVisitorToString(), default_value), type->getName());
-
-            default_value = converted;
-        }
-    }
-
-    String getName() const override { return "groupArrayInsertAt"; }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        /// TODO Do positions need to be 1-based for this function?
-        size_t position = columns[1]->getUInt(row_num);
-
-        /// If position is larger than size to which array will be cut - simply ignore value.
-        if (length_to_resize && position >= length_to_resize)
-            return;
-
-        if (position >= AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: "
-                "position argument ({}) is greater or equals to limit ({})",
-                position, AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
-
-        Array & arr = data(place).value;
-
-        if (arr.size() <= position)
-            arr.resize(position + 1);
-        else if (!arr[position].isNull())
-            return; /// Element was already inserted to the specified position.
-
-        columns[0]->get(row_num, arr[position]);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        Array & arr_lhs = data(place).value;
-        const Array & arr_rhs = data(rhs).value;
-
-        if (arr_lhs.size() < arr_rhs.size())
-            arr_lhs.resize(arr_rhs.size());
-
-        for (size_t i = 0, size = arr_rhs.size(); i < size; ++i)
-            if (arr_lhs[i].isNull() && !arr_rhs[i].isNull())
-                arr_lhs[i] = arr_rhs[i];
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        const Array & arr = data(place).value;
-        size_t size = arr.size();
-        writeVarUInt(size, buf);
-
-        for (const Field & elem : arr)
-        {
-            if (elem.isNull())
-            {
-                writeBinary(UInt8(1), buf);
-            }
-            else
-            {
-                writeBinary(UInt8(0), buf);
-                serialization->serializeBinary(elem, buf, {});
-            }
-        }
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        size_t size = 0;
-        readVarUInt(size, buf);
-
-        if (size > AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE)
-            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
-                            "Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE);
-
-        Array & arr = data(place).value;
-
-        arr.resize(size);
-        for (size_t i = 0; i < size; ++i)
-        {
-            UInt8 is_null = 0;
-            readBinary(is_null, buf);
-            if (!is_null)
-                serialization->deserializeBinary(arr[i], buf, {});
-        }
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        ColumnArray & to_array = assert_cast<ColumnArray &>(to);
-        IColumn & to_data = to_array.getData();
-        ColumnArray::Offsets & to_offsets = to_array.getOffsets();
-
-        const Array & arr = data(place).value;
-
-        for (const Field & elem : arr)
-        {
-            if (!elem.isNull())
-                to_data.insert(elem);
-            else
-                to_data.insert(default_value);
-        }
-
-        size_t result_array_size = length_to_resize ? length_to_resize : arr.size();
-
-        /// Pad array if need.
-        for (size_t i = arr.size(); i < result_array_size; ++i)
-            to_data.insert(default_value);
-
-        to_offsets.push_back(to_offsets.back() + result_array_size);
-    }
-};
-
-
-#undef AGGREGATE_FUNCTION_GROUP_ARRAY_INSERT_AT_MAX_SIZE
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
index fd350b47026..0cc5874f65e 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
@@ -2,8 +2,14 @@
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <Columns/ColumnAggregateFunction.h>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Common/assert_cast.h>
+
 // TODO include this last because of a broken roaring header. See the comment inside.
-#include <AggregateFunctions/AggregateFunctionGroupBitmap.h>
+#include <AggregateFunctions/AggregateFunctionGroupBitmapData.h>
 
 
 namespace DB
@@ -17,77 +23,255 @@ namespace ErrorCodes
 
 namespace
 {
-    template <template <typename, typename> class AggregateFunctionTemplate, template <typename> typename Data, typename... TArgs>
-    IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
+
+/// Counts bitmap operation on numbers.
+template <typename T, typename Data>
+class AggregateFunctionBitmap final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmap<T, Data>>
+{
+public:
+    explicit AggregateFunctionBitmap(const DataTypePtr & type)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionBitmap<T, Data>>({type}, {}, createResultType())
     {
-        WhichDataType which(argument_type);
-        if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
-        if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
-        return nullptr;
     }
 
-    template <template <typename> typename Data>
-    AggregateFunctionPtr createAggregateFunctionBitmap(
-        const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+    String getName() const override { return Data::name(); }
+
+    static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        assertNoParameters(name, parameters);
-        assertUnary(name, argument_types);
-
-        if (!argument_types[0]->canBeUsedInBitOperations())
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "The type {} of argument for aggregate function {} "
-                            "is illegal, because it cannot be used in Bitmap operations",
-                            argument_types[0]->getName(), name);
-
-        AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
-
-        if (!res)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
-                argument_types[0]->getName(), name);
-
-        return res;
+        this->data(place).roaring_bitmap_with_small_set.add(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
     }
 
-    // Additional aggregate functions to manipulate bitmaps.
-    template <template <typename, typename> typename AggregateFunctionTemplate>
-    AggregateFunctionPtr createAggregateFunctionBitmapL2(
-        const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
-        assertNoParameters(name, parameters);
-        assertUnary(name, argument_types);
-
-        DataTypePtr argument_type_ptr = argument_types[0];
-        WhichDataType which(*argument_type_ptr);
-        if (which.idx != TypeIndex::AggregateFunction)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
-                argument_types[0]->getName(), name);
-
-        /// groupBitmap needs to know about the data type that was used to create bitmaps.
-        /// We need to look inside the type of its argument to obtain it.
-        const DataTypeAggregateFunction & datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction &>(*argument_type_ptr);
-        AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
-
-        if (aggfunc->getName() != AggregateFunctionGroupBitmapData<UInt8>::name())
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
-                argument_types[0]->getName(), name);
-
-        DataTypePtr nested_argument_type_ptr = aggfunc->getArgumentTypes()[0];
-
-        AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
-            *nested_argument_type_ptr, argument_type_ptr));
-
-        if (!res)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
-                argument_types[0]->getName(), name);
-
-        return res;
+        this->data(place).roaring_bitmap_with_small_set.merge(this->data(rhs).roaring_bitmap_with_small_set);
     }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).roaring_bitmap_with_small_set.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).roaring_bitmap_with_small_set.read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnVector<T> &>(to).getData().push_back(
+            static_cast<T>(this->data(place).roaring_bitmap_with_small_set.size()));
+    }
+};
+
+
+/// This aggregate function takes the states of AggregateFunctionBitmap as its argument.
+template <typename T, typename Data, typename Policy>
+class AggregateFunctionBitmapL2 final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>
+{
+private:
+    static constexpr size_t STATE_VERSION_1_MIN_REVISION = 54455;
+public:
+    explicit AggregateFunctionBitmapL2(const DataTypePtr & type)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>({type}, {}, createResultType())
+    {
+    }
+
+    String getName() const override { return Policy::name; }
+
+    static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    DataTypePtr getStateType() const override
+    {
+        return this->argument_types.at(0);
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        Data & data_lhs = this->data(place);
+        const Data & data_rhs = this->data(assert_cast<const ColumnAggregateFunction &>(*columns[0]).getData()[row_num]);
+        if (!data_lhs.init)
+        {
+            data_lhs.init = true;
+            data_lhs.roaring_bitmap_with_small_set.merge(data_rhs.roaring_bitmap_with_small_set);
+        }
+        else
+        {
+            Policy::apply(data_lhs, data_rhs);
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        Data & data_lhs = this->data(place);
+        const Data & data_rhs = this->data(rhs);
+
+        if (!data_rhs.init)
+            return;
+
+        if (!data_lhs.init)
+        {
+            data_lhs.init = true;
+            data_lhs.roaring_bitmap_with_small_set.merge(data_rhs.roaring_bitmap_with_small_set);
+        }
+        else
+        {
+            Policy::apply(data_lhs, data_rhs);
+        }
+    }
+
+    bool isVersioned() const override { return true; }
+
+    size_t getDefaultVersion() const override { return 1; }
+
+    size_t getVersionFromRevision(size_t revision) const override
+    {
+        if (revision >= STATE_VERSION_1_MIN_REVISION)
+            return 1;
+        else
+            return 0;
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
+    {
+        if (!version)
+            version = getDefaultVersion();
+
+        if (*version >= 1)
+            DB::writeBoolText(this->data(place).init, buf);
+
+        this->data(place).roaring_bitmap_with_small_set.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version, Arena *) const override
+    {
+        if (!version)
+            version = getDefaultVersion();
+
+        if (*version >= 1)
+            DB::readBoolText(this->data(place).init, buf);
+        this->data(place).roaring_bitmap_with_small_set.read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnVector<T> &>(to).getData().push_back(
+            static_cast<T>(this->data(place).roaring_bitmap_with_small_set.size()));
+    }
+};
+
+
+template <typename Data>
+class BitmapAndPolicy
+{
+public:
+    static constexpr auto name = "groupBitmapAnd";
+    static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_and(rhs.roaring_bitmap_with_small_set); }
+};
+
+template <typename Data>
+class BitmapOrPolicy
+{
+public:
+    static constexpr auto name = "groupBitmapOr";
+    static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_or(rhs.roaring_bitmap_with_small_set); }
+};
+
+template <typename Data>
+class BitmapXorPolicy
+{
+public:
+    static constexpr auto name = "groupBitmapXor";
+    static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_xor(rhs.roaring_bitmap_with_small_set); }
+};
+
+template <typename T, typename Data>
+using AggregateFunctionBitmapL2And = AggregateFunctionBitmapL2<T, Data, BitmapAndPolicy<Data>>;
+
+template <typename T, typename Data>
+using AggregateFunctionBitmapL2Or = AggregateFunctionBitmapL2<T, Data, BitmapOrPolicy<Data>>;
+
+template <typename T, typename Data>
+using AggregateFunctionBitmapL2Xor = AggregateFunctionBitmapL2<T, Data, BitmapXorPolicy<Data>>;
+
+
+template <template <typename, typename> class AggregateFunctionTemplate, template <typename> typename Data, typename... TArgs>
+IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
+{
+    WhichDataType which(argument_type);
+    if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename> typename Data>
+AggregateFunctionPtr createAggregateFunctionBitmap(
+    const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+{
+    assertNoParameters(name, parameters);
+    assertUnary(name, argument_types);
+
+    if (!argument_types[0]->canBeUsedInBitOperations())
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "The type {} of argument for aggregate function {} "
+                        "is illegal, because it cannot be used in Bitmap operations",
+                        argument_types[0]->getName(), name);
+
+    AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
+
+    if (!res)
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
+            argument_types[0]->getName(), name);
+
+    return res;
+}
+
+// Additional aggregate functions to manipulate bitmaps.
+template <template <typename, typename> typename AggregateFunctionTemplate>
+AggregateFunctionPtr createAggregateFunctionBitmapL2(
+    const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+{
+    assertNoParameters(name, parameters);
+    assertUnary(name, argument_types);
+
+    DataTypePtr argument_type_ptr = argument_types[0];
+    WhichDataType which(*argument_type_ptr);
+    if (which.idx != TypeIndex::AggregateFunction)
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
+            argument_types[0]->getName(), name);
+
+    /// groupBitmap needs to know about the data type that was used to create bitmaps.
+    /// We need to look inside the type of its argument to obtain it.
+    const DataTypeAggregateFunction & datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction &>(*argument_type_ptr);
+    AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
+
+    if (aggfunc->getName() != AggregateFunctionGroupBitmapData<UInt8>::name())
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
+            argument_types[0]->getName(), name);
+
+    DataTypePtr nested_argument_type_ptr = aggfunc->getArgumentTypes()[0];
+
+    AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
+        *nested_argument_type_ptr, argument_type_ptr));
+
+    if (!res)
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
+            argument_types[0]->getName(), name);
+
+    return res;
+}
+
 }
 
 void registerAggregateFunctionsBitmap(AggregateFunctionFactory & factory)
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
deleted file mode 100644
index a32bb330884..00000000000
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
+++ /dev/null
@@ -1,191 +0,0 @@
-#pragma once
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <Columns/ColumnAggregateFunction.h>
-#include <Columns/ColumnVector.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Common/assert_cast.h>
-
-// TODO include this last because of a broken roaring header. See the comment inside.
-#include <AggregateFunctions/AggregateFunctionGroupBitmapData.h>
-
-
-namespace DB
-{
-
-/// Counts bitmap operation on numbers.
-template <typename T, typename Data>
-class AggregateFunctionBitmap final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmap<T, Data>>
-{
-public:
-    explicit AggregateFunctionBitmap(const DataTypePtr & type)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionBitmap<T, Data>>({type}, {}, createResultType())
-    {
-    }
-
-    String getName() const override { return Data::name(); }
-
-    static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        this->data(place).roaring_bitmap_with_small_set.add(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).roaring_bitmap_with_small_set.merge(this->data(rhs).roaring_bitmap_with_small_set);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).roaring_bitmap_with_small_set.write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).roaring_bitmap_with_small_set.read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnVector<T> &>(to).getData().push_back(
-            static_cast<T>(this->data(place).roaring_bitmap_with_small_set.size()));
-    }
-};
-
-
-/// This aggregate function takes the states of AggregateFunctionBitmap as its argument.
-template <typename T, typename Data, typename Policy>
-class AggregateFunctionBitmapL2 final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>
-{
-private:
-    static constexpr size_t STATE_VERSION_1_MIN_REVISION = 54455;
-public:
-    explicit AggregateFunctionBitmapL2(const DataTypePtr & type)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>({type}, {}, createResultType())
-    {
-    }
-
-    String getName() const override { return Policy::name; }
-
-    static DataTypePtr createResultType() { return std::make_shared<DataTypeNumber<T>>(); }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    DataTypePtr getStateType() const override
-    {
-        return this->argument_types.at(0);
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        Data & data_lhs = this->data(place);
-        const Data & data_rhs = this->data(assert_cast<const ColumnAggregateFunction &>(*columns[0]).getData()[row_num]);
-        if (!data_lhs.init)
-        {
-            data_lhs.init = true;
-            data_lhs.roaring_bitmap_with_small_set.merge(data_rhs.roaring_bitmap_with_small_set);
-        }
-        else
-        {
-            Policy::apply(data_lhs, data_rhs);
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        Data & data_lhs = this->data(place);
-        const Data & data_rhs = this->data(rhs);
-
-        if (!data_rhs.init)
-            return;
-
-        if (!data_lhs.init)
-        {
-            data_lhs.init = true;
-            data_lhs.roaring_bitmap_with_small_set.merge(data_rhs.roaring_bitmap_with_small_set);
-        }
-        else
-        {
-            Policy::apply(data_lhs, data_rhs);
-        }
-    }
-
-    bool isVersioned() const override { return true; }
-
-    size_t getDefaultVersion() const override { return 1; }
-
-    size_t getVersionFromRevision(size_t revision) const override
-    {
-        if (revision >= STATE_VERSION_1_MIN_REVISION)
-            return 1;
-        else
-            return 0;
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
-    {
-        if (!version)
-            version = getDefaultVersion();
-
-        if (*version >= 1)
-            DB::writeBoolText(this->data(place).init, buf);
-
-        this->data(place).roaring_bitmap_with_small_set.write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version, Arena *) const override
-    {
-        if (!version)
-            version = getDefaultVersion();
-
-        if (*version >= 1)
-            DB::readBoolText(this->data(place).init, buf);
-        this->data(place).roaring_bitmap_with_small_set.read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnVector<T> &>(to).getData().push_back(
-            static_cast<T>(this->data(place).roaring_bitmap_with_small_set.size()));
-    }
-};
-
-
-template <typename Data>
-class BitmapAndPolicy
-{
-public:
-    static constexpr auto name = "groupBitmapAnd";
-    static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_and(rhs.roaring_bitmap_with_small_set); }
-};
-
-template <typename Data>
-class BitmapOrPolicy
-{
-public:
-    static constexpr auto name = "groupBitmapOr";
-    static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_or(rhs.roaring_bitmap_with_small_set); }
-};
-
-template <typename Data>
-class BitmapXorPolicy
-{
-public:
-    static constexpr auto name = "groupBitmapXor";
-    static void apply(Data & lhs, const Data & rhs) { lhs.roaring_bitmap_with_small_set.rb_xor(rhs.roaring_bitmap_with_small_set); }
-};
-
-template <typename T, typename Data>
-using AggregateFunctionBitmapL2And = AggregateFunctionBitmapL2<T, Data, BitmapAndPolicy<Data>>;
-
-template <typename T, typename Data>
-using AggregateFunctionBitmapL2Or = AggregateFunctionBitmapL2<T, Data, BitmapOrPolicy<Data>>;
-
-template <typename T, typename Data>
-using AggregateFunctionBitmapL2Xor = AggregateFunctionBitmapL2<T, Data, BitmapXorPolicy<Data>>;
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
index 9e8060d44cc..7537df1554c 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
@@ -1,14 +1,33 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/AggregateFunctionGroupUniqArray.h>
 #include <AggregateFunctions/Helpers.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeIPv4andIPv6.h>
 
+#include <cassert>
+
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/ReadHelpersArena.h>
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+
+#include <Columns/ColumnArray.h>
+
+#include <Common/HashTable/HashSet.h>
+#include <Common/HashTable/HashTableKeyHolder.h>
+#include <Common/assert_cast.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/KeyHolderHelpers.h>
+
 
 namespace DB
 {
+
 struct Settings;
 
 namespace ErrorCodes
@@ -21,6 +40,211 @@ namespace ErrorCodes
 namespace
 {
 
+template <typename T>
+struct AggregateFunctionGroupUniqArrayData
+{
+    /// When creating, the hash table must be small.
+    using Set = HashSetWithStackMemory<T, DefaultHash<T>, 4>;
+
+    Set value;
+};
+
+
+/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types.
+template <typename T, typename LimitNumElems>
+class AggregateFunctionGroupUniqArray
+    : public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>, AggregateFunctionGroupUniqArray<T, LimitNumElems>>
+{
+    static constexpr bool limit_num_elems = LimitNumElems::value;
+    UInt64 max_elems;
+
+private:
+    using State = AggregateFunctionGroupUniqArrayData<T>;
+
+public:
+    AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
+        : IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>,
+          AggregateFunctionGroupUniqArray<T, LimitNumElems>>({argument_type}, parameters_, std::make_shared<DataTypeArray>(argument_type)),
+          max_elems(max_elems_) {}
+
+    AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, const DataTypePtr & result_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
+        : IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>,
+          AggregateFunctionGroupUniqArray<T, LimitNumElems>>({argument_type}, parameters_, result_type_),
+          max_elems(max_elems_) {}
+
+
+    String getName() const override { return "groupUniqArray"; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        if (limit_num_elems && this->data(place).value.size() >= max_elems)
+            return;
+        this->data(place).value.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        if (!limit_num_elems)
+            this->data(place).value.merge(this->data(rhs).value);
+        else
+        {
+            auto & cur_set = this->data(place).value;
+            auto & rhs_set = this->data(rhs).value;
+
+            for (auto & rhs_elem : rhs_set)
+            {
+                if (cur_set.size() >= max_elems)
+                    return;
+                cur_set.insert(rhs_elem.getValue());
+            }
+        }
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        auto & set = this->data(place).value;
+        size_t size = set.size();
+        writeVarUInt(size, buf);
+        for (const auto & elem : set)
+            writeBinaryLittleEndian(elem.key, buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).value.read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
+        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
+
+        const typename State::Set & set = this->data(place).value;
+        size_t size = set.size();
+
+        offsets_to.push_back(offsets_to.back() + size);
+
+        typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
+        size_t old_size = data_to.size();
+        data_to.resize(old_size + size);
+
+        size_t i = 0;
+        for (auto it = set.begin(); it != set.end(); ++it, ++i)
+            data_to[old_size + i] = it->getValue();
+    }
+};
+
+
+/// Generic implementation, it uses serialized representation as object descriptor.
+struct AggregateFunctionGroupUniqArrayGenericData
+{
+    static constexpr size_t INITIAL_SIZE_DEGREE = 3; /// adjustable
+
+    using Set = HashSetWithSavedHashWithStackMemory<StringRef, StringRefHash,
+        INITIAL_SIZE_DEGREE>;
+
+    Set value;
+};
+
+template <bool is_plain_column>
+static void deserializeAndInsertImpl(StringRef str, IColumn & data_to);
+
+/** Template parameter with true value should be used for columns that store their elements in memory continuously.
+ *  For such columns groupUniqArray() can be implemented more efficiently (especially for small numeric arrays).
+ */
+template <bool is_plain_column = false, typename LimitNumElems = std::false_type>
+class AggregateFunctionGroupUniqArrayGeneric
+    : public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData,
+        AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>
+{
+    DataTypePtr & input_data_type;
+
+    static constexpr bool limit_num_elems = LimitNumElems::value;
+    UInt64 max_elems;
+
+    using State = AggregateFunctionGroupUniqArrayGenericData;
+
+public:
+    AggregateFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
+        : IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData, AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>({input_data_type_}, parameters_, std::make_shared<DataTypeArray>(input_data_type_))
+        , input_data_type(this->argument_types[0])
+        , max_elems(max_elems_) {}
+
+    String getName() const override { return "groupUniqArray"; }
+
+    bool allocatesMemoryInArena() const override
+    {
+        return true;
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        auto & set = this->data(place).value;
+        writeVarUInt(set.size(), buf);
+
+        for (const auto & elem : set)
+        {
+            writeStringBinary(elem.getValue(), buf);
+        }
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
+    {
+        auto & set = this->data(place).value;
+        size_t size;
+        readVarUInt(size, buf);
+
+        for (size_t i = 0; i < size; ++i)
+            set.insert(readStringBinaryInto(*arena, buf));
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        auto & set = this->data(place).value;
+        if (limit_num_elems && set.size() >= max_elems)
+            return;
+
+        bool inserted;
+        State::Set::LookupResult it;
+        auto key_holder = getKeyHolder<is_plain_column>(*columns[0], row_num, *arena);
+        set.emplace(key_holder, it, inserted);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    {
+        auto & cur_set = this->data(place).value;
+        auto & rhs_set = this->data(rhs).value;
+
+        bool inserted;
+        State::Set::LookupResult it;
+        for (auto & rhs_elem : rhs_set)
+        {
+            if (limit_num_elems && cur_set.size() >= max_elems)
+                return;
+
+            // We have to copy the keys to our arena.
+            assert(arena != nullptr);
+            cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
+        }
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
+        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
+        IColumn & data_to = arr_to.getData();
+
+        auto & set = this->data(place).value;
+        offsets_to.push_back(offsets_to.back() + set.size());
+
+        for (auto & elem : set)
+            deserializeAndInsert<is_plain_column>(elem.getValue(), data_to);
+    }
+};
+
+
 /// Substitute return type for Date and DateTime
 template <typename HasLimit>
 class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArray<DataTypeDate::FieldType, HasLimit>
diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
deleted file mode 100644
index dd8abf93d90..00000000000
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
+++ /dev/null
@@ -1,236 +0,0 @@
-#pragma once
-
-#include <cassert>
-
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
-#include <IO/ReadHelpersArena.h>
-
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-
-#include <Columns/ColumnArray.h>
-
-#include <Common/HashTable/HashSet.h>
-#include <Common/HashTable/HashTableKeyHolder.h>
-#include <Common/assert_cast.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/KeyHolderHelpers.h>
-
-#define AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE 0xFFFFFF
-
-
-namespace DB
-{
-struct Settings;
-
-
-template <typename T>
-struct AggregateFunctionGroupUniqArrayData
-{
-    /// When creating, the hash table must be small.
-    using Set = HashSetWithStackMemory<T, DefaultHash<T>, 4>;
-
-    Set value;
-};
-
-
-/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types.
-template <typename T, typename LimitNumElems>
-class AggregateFunctionGroupUniqArray
-    : public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>, AggregateFunctionGroupUniqArray<T, LimitNumElems>>
-{
-    static constexpr bool limit_num_elems = LimitNumElems::value;
-    UInt64 max_elems;
-
-private:
-    using State = AggregateFunctionGroupUniqArrayData<T>;
-
-public:
-    AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
-        : IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>,
-          AggregateFunctionGroupUniqArray<T, LimitNumElems>>({argument_type}, parameters_, std::make_shared<DataTypeArray>(argument_type)),
-          max_elems(max_elems_) {}
-
-    AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, const DataTypePtr & result_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
-        : IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>,
-          AggregateFunctionGroupUniqArray<T, LimitNumElems>>({argument_type}, parameters_, result_type_),
-          max_elems(max_elems_) {}
-
-
-    String getName() const override { return "groupUniqArray"; }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        if (limit_num_elems && this->data(place).value.size() >= max_elems)
-            return;
-        this->data(place).value.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        if (!limit_num_elems)
-            this->data(place).value.merge(this->data(rhs).value);
-        else
-        {
-            auto & cur_set = this->data(place).value;
-            auto & rhs_set = this->data(rhs).value;
-
-            for (auto & rhs_elem : rhs_set)
-            {
-                if (cur_set.size() >= max_elems)
-                    return;
-                cur_set.insert(rhs_elem.getValue());
-            }
-        }
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        auto & set = this->data(place).value;
-        size_t size = set.size();
-        writeVarUInt(size, buf);
-        for (const auto & elem : set)
-            writeBinaryLittleEndian(elem.key, buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).value.read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
-        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
-
-        const typename State::Set & set = this->data(place).value;
-        size_t size = set.size();
-
-        offsets_to.push_back(offsets_to.back() + size);
-
-        typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
-        size_t old_size = data_to.size();
-        data_to.resize(old_size + size);
-
-        size_t i = 0;
-        for (auto it = set.begin(); it != set.end(); ++it, ++i)
-            data_to[old_size + i] = it->getValue();
-    }
-};
-
-
-/// Generic implementation, it uses serialized representation as object descriptor.
-struct AggregateFunctionGroupUniqArrayGenericData
-{
-    static constexpr size_t INITIAL_SIZE_DEGREE = 3; /// adjustable
-
-    using Set = HashSetWithSavedHashWithStackMemory<StringRef, StringRefHash,
-        INITIAL_SIZE_DEGREE>;
-
-    Set value;
-};
-
-template <bool is_plain_column>
-static void deserializeAndInsertImpl(StringRef str, IColumn & data_to);
-
-/** Template parameter with true value should be used for columns that store their elements in memory continuously.
- *  For such columns groupUniqArray() can be implemented more efficiently (especially for small numeric arrays).
- */
-template <bool is_plain_column = false, typename LimitNumElems = std::false_type>
-class AggregateFunctionGroupUniqArrayGeneric
-    : public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData,
-        AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>
-{
-    DataTypePtr & input_data_type;
-
-    static constexpr bool limit_num_elems = LimitNumElems::value;
-    UInt64 max_elems;
-
-    using State = AggregateFunctionGroupUniqArrayGenericData;
-
-public:
-    AggregateFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
-        : IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData, AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>({input_data_type_}, parameters_, std::make_shared<DataTypeArray>(input_data_type_))
-        , input_data_type(this->argument_types[0])
-        , max_elems(max_elems_) {}
-
-    String getName() const override { return "groupUniqArray"; }
-
-    bool allocatesMemoryInArena() const override
-    {
-        return true;
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        auto & set = this->data(place).value;
-        writeVarUInt(set.size(), buf);
-
-        for (const auto & elem : set)
-        {
-            writeStringBinary(elem.getValue(), buf);
-        }
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
-    {
-        auto & set = this->data(place).value;
-        size_t size;
-        readVarUInt(size, buf);
-
-        for (size_t i = 0; i < size; ++i)
-            set.insert(readStringBinaryInto(*arena, buf));
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
-    {
-        auto & set = this->data(place).value;
-        if (limit_num_elems && set.size() >= max_elems)
-            return;
-
-        bool inserted;
-        State::Set::LookupResult it;
-        auto key_holder = getKeyHolder<is_plain_column>(*columns[0], row_num, *arena);
-        set.emplace(key_holder, it, inserted);
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
-    {
-        auto & cur_set = this->data(place).value;
-        auto & rhs_set = this->data(rhs).value;
-
-        bool inserted;
-        State::Set::LookupResult it;
-        for (auto & rhs_elem : rhs_set)
-        {
-            if (limit_num_elems && cur_set.size() >= max_elems)
-                return;
-
-            // We have to copy the keys to our arena.
-            assert(arena != nullptr);
-            cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
-        }
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
-        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
-        IColumn & data_to = arr_to.getData();
-
-        auto & set = this->data(place).value;
-        offsets_to.push_back(offsets_to.back() + set.size());
-
-        for (auto & elem : set)
-            deserializeAndInsert<is_plain_column>(elem.getValue(), data_to);
-    }
-};
-
-#undef AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE
-
-}

From 0f9e7594e9fdf1f124a0597932ae45b3989e67b0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 04:09:28 +0100
Subject: [PATCH 0196/1097] Remove useless header files

---
 src/Functions/FunctionJoinGet.cpp             | 112 +++++++-
 src/Functions/FunctionJoinGet.h               | 105 --------
 src/Functions/FunctionSnowflake.h             | 247 ------------------
 src/Functions/FunctionToDecimalString.cpp     |  22 --
 src/Functions/snowflake.cpp                   | 244 ++++++++++++++++-
 ...nToDecimalString.h => toDecimalString.cpp} |  27 +-
 6 files changed, 369 insertions(+), 388 deletions(-)
 delete mode 100644 src/Functions/FunctionJoinGet.h
 delete mode 100644 src/Functions/FunctionSnowflake.h
 delete mode 100644 src/Functions/FunctionToDecimalString.cpp
 rename src/{Functions/FunctionToDecimalString.h => toDecimalString.cpp} (95%)

diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp
index 00bc3d8142a..51eab38bee5 100644
--- a/src/Functions/FunctionJoinGet.cpp
+++ b/src/Functions/FunctionJoinGet.cpp
@@ -1,5 +1,3 @@
-#include <Functions/FunctionJoinGet.h>
-
 #include <Columns/ColumnString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
@@ -7,15 +5,117 @@
 #include <Interpreters/HashJoin.h>
 #include <Storages/StorageJoin.h>
 
+#include <Functions/IFunction.h>
+#include <Storages/StorageJoin.h>
+#include <Storages/TableLockHolder.h>
+#include <Core/Block.h>
+
 
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
+class HashJoin;
+using StorageJoinPtr = std::shared_ptr<StorageJoin>;
+
+template <bool or_null>
+class ExecutableFunctionJoinGet final : public IExecutableFunction, WithContext
+{
+public:
+    ExecutableFunctionJoinGet(ContextPtr context_,
+                              TableLockHolder table_lock_,
+                              StorageJoinPtr storage_join_,
+                              const DB::Block & result_columns_)
+        : WithContext(context_)
+        , table_lock(std::move(table_lock_))
+        , storage_join(std::move(storage_join_))
+        , result_columns(result_columns_)
+    {}
+
+    static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
+
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override;
+
+    String getName() const override { return name; }
+
+private:
+    TableLockHolder table_lock;
+    StorageJoinPtr storage_join;
+    DB::Block result_columns;
+};
+
+template <bool or_null>
+class FunctionJoinGet final : public IFunctionBase, WithContext
+{
+public:
+    static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
+
+    FunctionJoinGet(ContextPtr context_,
+                    TableLockHolder table_lock_,
+                    StorageJoinPtr storage_join_, String attr_name_,
+                    DataTypes argument_types_, DataTypePtr return_type_)
+        : WithContext(context_)
+        , table_lock(std::move(table_lock_))
+        , storage_join(storage_join_)
+        , attr_name(std::move(attr_name_))
+        , argument_types(std::move(argument_types_))
+        , return_type(std::move(return_type_))
+    {
+    }
+
+    String getName() const override { return name; }
+
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    const DataTypes & getArgumentTypes() const override { return argument_types; }
+    const DataTypePtr & getResultType() const override { return return_type; }
+
+    ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override;
+
+private:
+    TableLockHolder table_lock;
+    StorageJoinPtr storage_join;
+    const String attr_name;
+    DataTypes argument_types;
+    DataTypePtr return_type;
+};
+
+template <bool or_null>
+class JoinGetOverloadResolver final : public IFunctionOverloadResolver, WithContext
+{
+public:
+    static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
+    static FunctionOverloadResolverPtr create(ContextPtr context_) { return std::make_unique<JoinGetOverloadResolver>(context_); }
+
+    explicit JoinGetOverloadResolver(ContextPtr context_) : WithContext(context_) {}
+
+    bool isDeterministic() const override { return false; }
+    String getName() const override { return name; }
+
+    FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override;
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override { return {}; } // Not used
+
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+
+    bool isVariadic() const override { return true; }
+    size_t getNumberOfArguments() const override { return 0; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; }
+};
+
+
 template <bool or_null>
 ColumnPtr ExecutableFunctionJoinGet<or_null>::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const
 {
@@ -96,6 +196,8 @@ FunctionBasePtr JoinGetOverloadResolver<or_null>::buildImpl(const ColumnsWithTyp
     return std::make_unique<FunctionJoinGet<or_null>>(getContext(), table_lock, storage_join, attr_name, argument_types, return_type);
 }
 
+}
+
 REGISTER_FUNCTION(JoinGet)
 {
     // joinGet
@@ -104,10 +206,4 @@ REGISTER_FUNCTION(JoinGet)
     factory.registerFunction<JoinGetOverloadResolver<true>>();
 }
 
-template class ExecutableFunctionJoinGet<true>;
-template class ExecutableFunctionJoinGet<false>;
-template class FunctionJoinGet<true>;
-template class FunctionJoinGet<false>;
-template class JoinGetOverloadResolver<true>;
-template class JoinGetOverloadResolver<false>;
 }
diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h
deleted file mode 100644
index 998a892f3df..00000000000
--- a/src/Functions/FunctionJoinGet.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#pragma once
-
-#include <Functions/IFunction.h>
-#include <Interpreters/Context_fwd.h>
-#include <Storages/IStorage_fwd.h>
-#include <Storages/TableLockHolder.h>
-#include <Core/Block.h>
-
-namespace DB
-{
-
-class HashJoin;
-class StorageJoin;
-using StorageJoinPtr = std::shared_ptr<StorageJoin>;
-
-template <bool or_null>
-class ExecutableFunctionJoinGet final : public IExecutableFunction, WithContext
-{
-public:
-    ExecutableFunctionJoinGet(ContextPtr context_,
-                              TableLockHolder table_lock_,
-                              StorageJoinPtr storage_join_,
-                              const DB::Block & result_columns_)
-        : WithContext(context_)
-        , table_lock(std::move(table_lock_))
-        , storage_join(std::move(storage_join_))
-        , result_columns(result_columns_)
-    {}
-
-    static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
-
-    bool useDefaultImplementationForNulls() const override { return false; }
-    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override;
-
-    String getName() const override { return name; }
-
-private:
-    TableLockHolder table_lock;
-    StorageJoinPtr storage_join;
-    DB::Block result_columns;
-};
-
-template <bool or_null>
-class FunctionJoinGet final : public IFunctionBase, WithContext
-{
-public:
-    static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
-
-    FunctionJoinGet(ContextPtr context_,
-                    TableLockHolder table_lock_,
-                    StorageJoinPtr storage_join_, String attr_name_,
-                    DataTypes argument_types_, DataTypePtr return_type_)
-        : WithContext(context_)
-        , table_lock(std::move(table_lock_))
-        , storage_join(storage_join_)
-        , attr_name(std::move(attr_name_))
-        , argument_types(std::move(argument_types_))
-        , return_type(std::move(return_type_))
-    {
-    }
-
-    String getName() const override { return name; }
-
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    const DataTypes & getArgumentTypes() const override { return argument_types; }
-    const DataTypePtr & getResultType() const override { return return_type; }
-
-    ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override;
-
-private:
-    TableLockHolder table_lock;
-    StorageJoinPtr storage_join;
-    const String attr_name;
-    DataTypes argument_types;
-    DataTypePtr return_type;
-};
-
-template <bool or_null>
-class JoinGetOverloadResolver final : public IFunctionOverloadResolver, WithContext
-{
-public:
-    static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
-    static FunctionOverloadResolverPtr create(ContextPtr context_) { return std::make_unique<JoinGetOverloadResolver>(context_); }
-
-    explicit JoinGetOverloadResolver(ContextPtr context_) : WithContext(context_) {}
-
-    bool isDeterministic() const override { return false; }
-    String getName() const override { return name; }
-
-    FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override;
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override { return {}; } // Not used
-
-    bool useDefaultImplementationForNulls() const override { return false; }
-    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
-
-    bool isVariadic() const override { return true; }
-    size_t getNumberOfArguments() const override { return 0; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; }
-};
-
-}
diff --git a/src/Functions/FunctionSnowflake.h b/src/Functions/FunctionSnowflake.h
deleted file mode 100644
index 26c8138633b..00000000000
--- a/src/Functions/FunctionSnowflake.h
+++ /dev/null
@@ -1,247 +0,0 @@
-#pragma once
-
-#include <Functions/extractTimeZoneFromFunctionArguments.h>
-#include <Functions/IFunction.h>
-#include <Functions/FunctionHelpers.h>
-#include <DataTypes/DataTypeDateTime64.h>
-#include <DataTypes/DataTypesDecimal.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Columns/ColumnConst.h>
-#include <Columns/ColumnsDateTime.h>
-#include <Columns/ColumnsNumber.h>
-#include <Core/DecimalFunctions.h>
-#include <Interpreters/Context.h>
-
-#include <base/arithmeticOverflow.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
-
-/** According to Twitter's post on Snowflake, we can extract the timestamp for a snowflake ID by right shifting
- * the snowflake ID by 22 bits(10 bits machine ID and 12 bits sequence ID) and adding the Twitter epoch time of 1288834974657.
- * https://en.wikipedia.org/wiki/Snowflake_ID
- * https://blog.twitter.com/engineering/en_us/a/2010/announcing-snowflake
- * https://ws-dl.blogspot.com/2019/08/2019-08-03-tweetedat-finding-tweet.html
-*/
-static constexpr size_t snowflake_epoch = 1288834974657L;
-static constexpr int time_shift = 22;
-
-class FunctionDateTimeToSnowflake : public IFunction
-{
-private:
-    const char * name;
-
-public:
-    explicit FunctionDateTimeToSnowflake(const char * name_) : name(name_) { }
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 1; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors args{
-            {"value", &isDateTime<IDataType>, nullptr, "DateTime"}
-        };
-        validateFunctionArgumentTypes(*this, arguments, args);
-
-        return std::make_shared<DataTypeInt64>();
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
-    {
-        const auto & src = arguments[0];
-        const auto & src_column = *src.column;
-
-        auto res_column = ColumnInt64::create(input_rows_count);
-        auto & res_data = res_column->getData();
-
-        const auto & src_data = typeid_cast<const ColumnUInt32 &>(src_column).getData();
-        for (size_t i = 0; i < input_rows_count; ++i)
-            res_data[i] = (Int64(src_data[i]) * 1000 - snowflake_epoch) << time_shift;
-
-        return res_column;
-    }
-};
-
-class FunctionSnowflakeToDateTime : public IFunction
-{
-private:
-    const char * name;
-    const bool allow_nonconst_timezone_arguments;
-
-public:
-    explicit FunctionSnowflakeToDateTime(const char * name_, ContextPtr context)
-        : name(name_)
-        , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments)
-    {}
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 0; }
-    bool isVariadic() const override { return true; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors mandatory_args{
-            {"value", &isInt64<IDataType>, nullptr, "Int64"}
-        };
-        FunctionArgumentDescriptors optional_args{
-            {"time_zone", &isString<IDataType>, nullptr, "String"}
-        };
-        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
-
-        String timezone;
-        if (arguments.size() == 2)
-            timezone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, allow_nonconst_timezone_arguments);
-
-        return std::make_shared<DataTypeDateTime>(timezone);
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
-    {
-        const auto & src = arguments[0];
-        const auto & src_column = *src.column;
-
-        auto res_column = ColumnUInt32::create(input_rows_count);
-        auto & res_data = res_column->getData();
-
-        if (const auto * src_column_non_const = typeid_cast<const ColumnInt64 *>(&src_column))
-        {
-            const auto & src_data = src_column_non_const->getData();
-            for (size_t i = 0; i < input_rows_count; ++i)
-                res_data[i] = static_cast<UInt32>(
-                    ((src_data[i] >> time_shift) + snowflake_epoch) / 1000);
-        }
-        else if (const auto * src_column_const = typeid_cast<const ColumnConst *>(&src_column))
-        {
-            Int64 src_val = src_column_const->getValue<Int64>();
-            for (size_t i = 0; i < input_rows_count; ++i)
-                res_data[i] = static_cast<UInt32>(
-                    ((src_val >> time_shift) + snowflake_epoch) / 1000);
-        }
-        else
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name);
-
-        return res_column;
-    }
-};
-
-
-class FunctionDateTime64ToSnowflake : public IFunction
-{
-private:
-    const char * name;
-
-public:
-    explicit FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { }
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 1; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors args{
-            {"value", &isDateTime64<IDataType>, nullptr, "DateTime64"}
-        };
-        validateFunctionArgumentTypes(*this, arguments, args);
-
-        return std::make_shared<DataTypeInt64>();
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
-    {
-        const auto & src = arguments[0];
-
-        const auto & src_column = *src.column;
-        auto res_column = ColumnInt64::create(input_rows_count);
-        auto & res_data = res_column->getData();
-
-        const auto & src_data = typeid_cast<const ColumnDecimal<DateTime64> &>(src_column).getData();
-
-        /// timestamps in snowflake-ids are millisecond-based, convert input to milliseconds
-        UInt32 src_scale = getDecimalScale(*arguments[0].type);
-        Int64 multiplier_msec = DecimalUtils::scaleMultiplier<DateTime64>(3);
-        Int64 multiplier_src = DecimalUtils::scaleMultiplier<DateTime64>(src_scale);
-        auto factor = multiplier_msec / static_cast<double>(multiplier_src);
-
-        for (size_t i = 0; i < input_rows_count; ++i)
-            res_data[i] = static_cast<Int64>(src_data[i] * factor - snowflake_epoch) << time_shift;
-
-        return res_column;
-    }
-};
-
-
-class FunctionSnowflakeToDateTime64 : public IFunction
-{
-private:
-    const char * name;
-    const bool allow_nonconst_timezone_arguments;
-
-public:
-    explicit FunctionSnowflakeToDateTime64(const char * name_, ContextPtr context)
-        : name(name_)
-        , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments)
-    {}
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 0; }
-    bool isVariadic() const override { return true; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors mandatory_args{
-            {"value", &isInt64<IDataType>, nullptr, "Int64"}
-        };
-        FunctionArgumentDescriptors optional_args{
-            {"time_zone", &isString<IDataType>, nullptr, "String"}
-        };
-        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
-
-        String timezone;
-        if (arguments.size() == 2)
-            timezone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, allow_nonconst_timezone_arguments);
-
-        return std::make_shared<DataTypeDateTime64>(3, timezone);
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
-    {
-        const auto & src = arguments[0];
-        const auto & src_column = *src.column;
-
-        auto res_column = ColumnDecimal<DateTime64>::create(input_rows_count, 3);
-        auto & res_data = res_column->getData();
-
-        if (const auto * src_column_non_const = typeid_cast<const ColumnInt64 *>(&src_column))
-        {
-            const auto & src_data = src_column_non_const->getData();
-            for (size_t i = 0; i < input_rows_count; ++i)
-                res_data[i] = (src_data[i] >> time_shift) + snowflake_epoch;
-        }
-        else if (const auto * src_column_const = typeid_cast<const ColumnConst *>(&src_column))
-        {
-            Int64 src_val = src_column_const->getValue<Int64>();
-            for (size_t i = 0; i < input_rows_count; ++i)
-                res_data[i] = (src_val >> time_shift) + snowflake_epoch;
-        }
-        else
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name);
-
-        return res_column;
-    }
-};
-
-}
diff --git a/src/Functions/FunctionToDecimalString.cpp b/src/Functions/FunctionToDecimalString.cpp
deleted file mode 100644
index fe417b19137..00000000000
--- a/src/Functions/FunctionToDecimalString.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionToDecimalString.h>
-#include <Functions/IFunction.h>
-
-namespace DB
-{
-
-REGISTER_FUNCTION(ToDecimalString)
-{
-    factory.registerFunction<FunctionToDecimalString>(
-        FunctionDocumentation{
-            .description=R"(
-Returns string representation of a number. First argument is the number of any numeric type,
-second argument is the desired number of digits in fractional part. Returns String.
-
-        )",
-            .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}},
-            .categories{"String"}
-        }, FunctionFactory::CaseInsensitive);
-}
-
-}
diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp
index c18f1c03332..989feecb9b4 100644
--- a/src/Functions/snowflake.cpp
+++ b/src/Functions/snowflake.cpp
@@ -1,9 +1,251 @@
-#include <Functions/FunctionSnowflake.h>
 #include <Functions/FunctionFactory.h>
+#include <Functions/extractTimeZoneFromFunctionArguments.h>
+#include <Functions/IFunction.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnsNumber.h>
+#include <Core/DecimalFunctions.h>
+#include <Interpreters/Context.h>
+
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+
+/** According to Twitter's post on Snowflake, we can extract the timestamp for a snowflake ID by right shifting
+ * the snowflake ID by 22 bits(10 bits machine ID and 12 bits sequence ID) and adding the Twitter epoch time of 1288834974657.
+ * https://en.wikipedia.org/wiki/Snowflake_ID
+ * https://blog.twitter.com/engineering/en_us/a/2010/announcing-snowflake
+ * https://ws-dl.blogspot.com/2019/08/2019-08-03-tweetedat-finding-tweet.html
+*/
+static constexpr size_t snowflake_epoch = 1288834974657L;
+static constexpr int time_shift = 22;
+
+class FunctionDateTimeToSnowflake : public IFunction
+{
+private:
+    const char * name;
+
+public:
+    explicit FunctionDateTimeToSnowflake(const char * name_) : name(name_) { }
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 1; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors args{
+            {"value", &isDateTime<IDataType>, nullptr, "DateTime"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, args);
+
+        return std::make_shared<DataTypeInt64>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        const auto & src = arguments[0];
+        const auto & src_column = *src.column;
+
+        auto res_column = ColumnInt64::create(input_rows_count);
+        auto & res_data = res_column->getData();
+
+        const auto & src_data = typeid_cast<const ColumnUInt32 &>(src_column).getData();
+        for (size_t i = 0; i < input_rows_count; ++i)
+            res_data[i] = (Int64(src_data[i]) * 1000 - snowflake_epoch) << time_shift;
+
+        return res_column;
+    }
+};
+
+class FunctionSnowflakeToDateTime : public IFunction
+{
+private:
+    const char * name;
+    const bool allow_nonconst_timezone_arguments;
+
+public:
+    explicit FunctionSnowflakeToDateTime(const char * name_, ContextPtr context)
+        : name(name_)
+        , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments)
+    {}
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 0; }
+    bool isVariadic() const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors mandatory_args{
+            {"value", &isInt64<IDataType>, nullptr, "Int64"}
+        };
+        FunctionArgumentDescriptors optional_args{
+            {"time_zone", &isString<IDataType>, nullptr, "String"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
+
+        String timezone;
+        if (arguments.size() == 2)
+            timezone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, allow_nonconst_timezone_arguments);
+
+        return std::make_shared<DataTypeDateTime>(timezone);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        const auto & src = arguments[0];
+        const auto & src_column = *src.column;
+
+        auto res_column = ColumnUInt32::create(input_rows_count);
+        auto & res_data = res_column->getData();
+
+        if (const auto * src_column_non_const = typeid_cast<const ColumnInt64 *>(&src_column))
+        {
+            const auto & src_data = src_column_non_const->getData();
+            for (size_t i = 0; i < input_rows_count; ++i)
+                res_data[i] = static_cast<UInt32>(
+                    ((src_data[i] >> time_shift) + snowflake_epoch) / 1000);
+        }
+        else if (const auto * src_column_const = typeid_cast<const ColumnConst *>(&src_column))
+        {
+            Int64 src_val = src_column_const->getValue<Int64>();
+            for (size_t i = 0; i < input_rows_count; ++i)
+                res_data[i] = static_cast<UInt32>(
+                    ((src_val >> time_shift) + snowflake_epoch) / 1000);
+        }
+        else
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name);
+
+        return res_column;
+    }
+};
+
+
+class FunctionDateTime64ToSnowflake : public IFunction
+{
+private:
+    const char * name;
+
+public:
+    explicit FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { }
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 1; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors args{
+            {"value", &isDateTime64<IDataType>, nullptr, "DateTime64"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, args);
+
+        return std::make_shared<DataTypeInt64>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        const auto & src = arguments[0];
+
+        const auto & src_column = *src.column;
+        auto res_column = ColumnInt64::create(input_rows_count);
+        auto & res_data = res_column->getData();
+
+        const auto & src_data = typeid_cast<const ColumnDecimal<DateTime64> &>(src_column).getData();
+
+        /// timestamps in snowflake-ids are millisecond-based, convert input to milliseconds
+        UInt32 src_scale = getDecimalScale(*arguments[0].type);
+        Int64 multiplier_msec = DecimalUtils::scaleMultiplier<DateTime64>(3);
+        Int64 multiplier_src = DecimalUtils::scaleMultiplier<DateTime64>(src_scale);
+        auto factor = multiplier_msec / static_cast<double>(multiplier_src);
+
+        for (size_t i = 0; i < input_rows_count; ++i)
+            res_data[i] = static_cast<Int64>(src_data[i] * factor - snowflake_epoch) << time_shift;
+
+        return res_column;
+    }
+};
+
+
+class FunctionSnowflakeToDateTime64 : public IFunction
+{
+private:
+    const char * name;
+    const bool allow_nonconst_timezone_arguments;
+
+public:
+    explicit FunctionSnowflakeToDateTime64(const char * name_, ContextPtr context)
+        : name(name_)
+        , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments)
+    {}
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 0; }
+    bool isVariadic() const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors mandatory_args{
+            {"value", &isInt64<IDataType>, nullptr, "Int64"}
+        };
+        FunctionArgumentDescriptors optional_args{
+            {"time_zone", &isString<IDataType>, nullptr, "String"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
+
+        String timezone;
+        if (arguments.size() == 2)
+            timezone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, allow_nonconst_timezone_arguments);
+
+        return std::make_shared<DataTypeDateTime64>(3, timezone);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        const auto & src = arguments[0];
+        const auto & src_column = *src.column;
+
+        auto res_column = ColumnDecimal<DateTime64>::create(input_rows_count, 3);
+        auto & res_data = res_column->getData();
+
+        if (const auto * src_column_non_const = typeid_cast<const ColumnInt64 *>(&src_column))
+        {
+            const auto & src_data = src_column_non_const->getData();
+            for (size_t i = 0; i < input_rows_count; ++i)
+                res_data[i] = (src_data[i] >> time_shift) + snowflake_epoch;
+        }
+        else if (const auto * src_column_const = typeid_cast<const ColumnConst *>(&src_column))
+        {
+            Int64 src_val = src_column_const->getValue<Int64>();
+            for (size_t i = 0; i < input_rows_count; ++i)
+                res_data[i] = (src_val >> time_shift) + snowflake_epoch;
+        }
+        else
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name);
+
+        return res_column;
+    }
+};
+
+}
+
 REGISTER_FUNCTION(DateTimeToSnowflake)
 {
     factory.registerFunction("dateTimeToSnowflake",
diff --git a/src/Functions/FunctionToDecimalString.h b/src/toDecimalString.cpp
similarity index 95%
rename from src/Functions/FunctionToDecimalString.h
rename to src/toDecimalString.cpp
index 3dd946203cc..75593901bb3 100644
--- a/src/Functions/FunctionToDecimalString.h
+++ b/src/toDecimalString.cpp
@@ -1,10 +1,7 @@
-#pragma once
-
-#include <Core/Types.h>
-#include <Core/DecimalFunctions.h>
+#include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
+#include <Core/Types.h>
 #include <Functions/FunctionHelpers.h>
-#include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnDecimal.h>
@@ -14,6 +11,7 @@
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context_fwd.h>
 
+
 namespace DB
 {
 
@@ -23,6 +21,9 @@ namespace ErrorCodes
     extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER;
 }
 
+namespace
+{
+
 class FunctionToDecimalString : public IFunction
 {
 public:
@@ -260,3 +261,19 @@ private:
 };
 
 }
+
+REGISTER_FUNCTION(ToDecimalString)
+{
+    factory.registerFunction<FunctionToDecimalString>(
+        FunctionDocumentation{
+            .description=R"(
+Returns string representation of a number. First argument is the number of any numeric type,
+second argument is the desired number of digits in fractional part. Returns String.
+
+        )",
+            .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}},
+            .categories{"String"}
+        }, FunctionFactory::CaseInsensitive);
+}
+
+}

From 1c318891b800c2dd6e0b1c9753245f5b473d4a7a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 04:33:23 +0100
Subject: [PATCH 0197/1097] Remove useless header files

---
 src/TableFunctions/Hive/TableFunctionHive.cpp | 169 +++++++++++-------
 src/TableFunctions/Hive/TableFunctionHive.h   |  39 ----
 src/TableFunctions/ITableFunctionXDBC.cpp     |  81 ++++++++-
 src/TableFunctions/ITableFunctionXDBC.h       |  80 ---------
 .../TableFunctionDictionary.cpp               |   3 +-
 src/TableFunctions/TableFunctionDictionary.h  |   4 +-
 .../TableFunctionExecutable.cpp               |  43 ++++-
 src/TableFunctions/TableFunctionExecutable.h  |  44 -----
 src/TableFunctions/TableFunctionExplain.cpp   |  33 +++-
 src/TableFunctions/TableFunctionExplain.h     |  35 ----
 src/TableFunctions/TableFunctionFile.cpp      |  41 ++++-
 src/TableFunctions/TableFunctionFile.h        |  42 -----
 src/TableFunctions/TableFunctionFormat.cpp    |  33 +++-
 src/TableFunctions/TableFunctionFormat.h      |  34 ----
 .../TableFunctionGenerateRandom.cpp           |  35 +++-
 .../TableFunctionGenerateRandom.h             |  36 ----
 src/TableFunctions/TableFunctionInput.cpp     |  28 ++-
 src/TableFunctions/TableFunctionInput.h       |  33 ----
 src/TableFunctions/TableFunctionMerge.cpp     |  52 ++++--
 src/TableFunctions/TableFunctionMerge.h       |  38 ----
 src/TableFunctions/TableFunctionMongoDB.cpp   |  29 ++-
 src/TableFunctions/TableFunctionMongoDB.h     |  31 ----
 src/TableFunctions/TableFunctionMySQL.cpp     |  33 +++-
 src/TableFunctions/TableFunctionMySQL.h       |  38 ----
 src/TableFunctions/TableFunctionNull.cpp      |  37 +++-
 src/TableFunctions/TableFunctionNull.h        |  37 ----
 src/TableFunctions/TableFunctionNumbers.cpp   |  37 +++-
 src/TableFunctions/TableFunctionNumbers.h     |  31 ----
 .../TableFunctionPostgreSQL.cpp               |  34 +++-
 src/TableFunctions/TableFunctionPostgreSQL.h  |  36 ----
 src/TableFunctions/TableFunctionRedis.cpp     |  34 +++-
 src/TableFunctions/TableFunctionRedis.h       |  34 ----
 src/TableFunctions/TableFunctionSQLite.cpp    |  29 ++-
 src/TableFunctions/TableFunctionSQLite.h      |  34 ----
 src/TableFunctions/TableFunctionValues.cpp    |  30 +++-
 src/TableFunctions/TableFunctionValues.h      |  30 ----
 .../TableFunctionViewIfPermitted.cpp          |  40 ++++-
 .../TableFunctionViewIfPermitted.h            |  40 -----
 src/TableFunctions/TableFunctionZeros.cpp     |  42 ++++-
 src/TableFunctions/TableFunctionZeros.h       |  31 ----
 40 files changed, 735 insertions(+), 855 deletions(-)
 delete mode 100644 src/TableFunctions/Hive/TableFunctionHive.h
 delete mode 100644 src/TableFunctions/ITableFunctionXDBC.h
 delete mode 100644 src/TableFunctions/TableFunctionExecutable.h
 delete mode 100644 src/TableFunctions/TableFunctionExplain.h
 delete mode 100644 src/TableFunctions/TableFunctionFile.h
 delete mode 100644 src/TableFunctions/TableFunctionFormat.h
 delete mode 100644 src/TableFunctions/TableFunctionGenerateRandom.h
 delete mode 100644 src/TableFunctions/TableFunctionInput.h
 delete mode 100644 src/TableFunctions/TableFunctionMerge.h
 delete mode 100644 src/TableFunctions/TableFunctionMongoDB.h
 delete mode 100644 src/TableFunctions/TableFunctionMySQL.h
 delete mode 100644 src/TableFunctions/TableFunctionNull.h
 delete mode 100644 src/TableFunctions/TableFunctionNumbers.h
 delete mode 100644 src/TableFunctions/TableFunctionPostgreSQL.h
 delete mode 100644 src/TableFunctions/TableFunctionRedis.h
 delete mode 100644 src/TableFunctions/TableFunctionSQLite.h
 delete mode 100644 src/TableFunctions/TableFunctionValues.h
 delete mode 100644 src/TableFunctions/TableFunctionViewIfPermitted.h
 delete mode 100644 src/TableFunctions/TableFunctionZeros.h

diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp
index ebebee13092..d8885087532 100644
--- a/src/TableFunctions/Hive/TableFunctionHive.cpp
+++ b/src/TableFunctions/Hive/TableFunctionHive.cpp
@@ -1,12 +1,13 @@
-#include <TableFunctions/Hive/TableFunctionHive.h>
+#include "config.h"
 
 #if USE_HIVE
+
+#include <TableFunctions/ITableFunction.h>
+#include <Poco/Logger.h>
 #include <memory>
 #include <Common/Exception.h>
 #include <Common/ErrorCodes.h>
-#include <Parsers/ASTLiteral.h>
 #include <Parsers/ExpressionListParsers.h>
-#include <Parsers/queryToString.h>
 #include <Parsers/parseQuery.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
@@ -15,75 +16,111 @@
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
-    namespace ErrorCodes
-    {
-        extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    }
 
-    void TableFunctionHive::parseArguments(const ASTPtr & ast_function_, ContextPtr context_)
-    {
-        ASTs & args_func = ast_function_->children;
-        if (args_func.size() != 1)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName());
-
-        ASTs & args = args_func.at(0)->children;
-
-        if (args.size() != 5)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            "The signature of function {} is:\n - hive_url, hive_database, hive_table, structure, partition_by_keys",
-                            getName());
-
-        for (auto & arg : args)
-            arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context_);
-
-        hive_metastore_url = checkAndGetLiteralArgument<String>(args[0], "hive_url");
-        hive_database = checkAndGetLiteralArgument<String>(args[1], "hive_database");
-        hive_table = checkAndGetLiteralArgument<String>(args[2], "hive_table");
-        table_structure = checkAndGetLiteralArgument<String>(args[3], "structure");
-        partition_by_def = checkAndGetLiteralArgument<String>(args[4], "partition_by_keys");
-
-        actual_columns = parseColumnsListFromString(table_structure, context_);
-    }
-
-    ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/, bool /*is_insert_query*/) const { return actual_columns; }
-
-    StoragePtr TableFunctionHive::executeImpl(
-        const ASTPtr & /*ast_function_*/,
-        ContextPtr context_,
-        const std::string & table_name_,
-        ColumnsDescription /*cached_columns_*/,
-        bool /*is_insert_query*/) const
-    {
-        const Settings & settings = context_->getSettings();
-        ParserExpression partition_by_parser;
-        ASTPtr partition_by_ast = parseQuery(
-            partition_by_parser,
-            "(" + partition_by_def + ")",
-            "partition by declaration list",
-            settings.max_query_size,
-            settings.max_parser_depth);
-        StoragePtr storage;
-        storage = std::make_shared<StorageHive>(
-            hive_metastore_url,
-            hive_database,
-            hive_table,
-            StorageID(getDatabaseName(), table_name_),
-            actual_columns,
-            ConstraintsDescription{},
-            "",
-            partition_by_ast,
-            std::make_unique<HiveSettings>(),
-            context_);
-
-        return storage;
-    }
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
 
 
-    void registerTableFunctionHive(TableFunctionFactory & factory_) { factory_.registerFunction<TableFunctionHive>(); }
+namespace
+{
+
+class TableFunctionHive : public ITableFunction
+{
+public:
+    static constexpr auto name = "hive";
+    static constexpr auto storage_type_name = "Hive";
+    std::string getName() const override { return name; }
+
+    bool hasStaticStructure() const override { return true; }
+
+    StoragePtr executeImpl(
+        const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return storage_type_name; }
+    ColumnsDescription getActualTableStructure(ContextPtr, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function_, ContextPtr context_) override;
+
+private:
+    Poco::Logger * logger = &Poco::Logger::get("TableFunctionHive");
+
+    String cluster_name;
+    String hive_metastore_url;
+    String hive_database;
+    String hive_table;
+    String table_structure;
+    String partition_by_def;
+
+    ColumnsDescription actual_columns;
+};
+
+void TableFunctionHive::parseArguments(const ASTPtr & ast_function_, ContextPtr context_)
+{
+    ASTs & args_func = ast_function_->children;
+    if (args_func.size() != 1)
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName());
+
+    ASTs & args = args_func.at(0)->children;
+
+    if (args.size() != 5)
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                        "The signature of function {} is:\n - hive_url, hive_database, hive_table, structure, partition_by_keys",
+                        getName());
+
+    for (auto & arg : args)
+        arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context_);
+
+    hive_metastore_url = checkAndGetLiteralArgument<String>(args[0], "hive_url");
+    hive_database = checkAndGetLiteralArgument<String>(args[1], "hive_database");
+    hive_table = checkAndGetLiteralArgument<String>(args[2], "hive_table");
+    table_structure = checkAndGetLiteralArgument<String>(args[3], "structure");
+    partition_by_def = checkAndGetLiteralArgument<String>(args[4], "partition_by_keys");
+
+    actual_columns = parseColumnsListFromString(table_structure, context_);
+}
+
+ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/, bool /*is_insert_query*/) const { return actual_columns; }
+
+StoragePtr TableFunctionHive::executeImpl(
+    const ASTPtr & /*ast_function_*/,
+    ContextPtr context_,
+    const std::string & table_name_,
+    ColumnsDescription /*cached_columns_*/,
+    bool /*is_insert_query*/) const
+{
+    const Settings & settings = context_->getSettings();
+    ParserExpression partition_by_parser;
+    ASTPtr partition_by_ast = parseQuery(
+        partition_by_parser,
+        "(" + partition_by_def + ")",
+        "partition by declaration list",
+        settings.max_query_size,
+        settings.max_parser_depth);
+    StoragePtr storage;
+    storage = std::make_shared<StorageHive>(
+        hive_metastore_url,
+        hive_database,
+        hive_table,
+        StorageID(getDatabaseName(), table_name_),
+        actual_columns,
+        ConstraintsDescription{},
+        "",
+        partition_by_ast,
+        std::make_unique<HiveSettings>(),
+        context_);
+
+    return storage;
+}
 
 }
+
+
+void registerTableFunctionHive(TableFunctionFactory & factory_) { factory_.registerFunction<TableFunctionHive>(); }
+
+}
+
 #endif
diff --git a/src/TableFunctions/Hive/TableFunctionHive.h b/src/TableFunctions/Hive/TableFunctionHive.h
deleted file mode 100644
index 5e48be46ce1..00000000000
--- a/src/TableFunctions/Hive/TableFunctionHive.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#pragma once
-#include "config.h"
-
-#if USE_HIVE
-#include <TableFunctions/ITableFunction.h>
-#include <Poco/Logger.h>
-namespace DB
-{
-class Context;
-class TableFunctionHive : public ITableFunction
-{
-public:
-    static constexpr auto name = "hive";
-    static constexpr auto storage_type_name = "Hive";
-    std::string getName() const override { return name; }
-
-    bool hasStaticStructure() const override { return true; }
-
-    StoragePtr executeImpl(
-        const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return storage_type_name; }
-    ColumnsDescription getActualTableStructure(ContextPtr, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function_, ContextPtr context_) override;
-
-private:
-    Poco::Logger * logger = &Poco::Logger::get("TableFunctionHive");
-
-    String cluster_name;
-    String hive_metastore_url;
-    String hive_database;
-    String hive_table;
-    String table_structure;
-    String partition_by_def;
-
-    ColumnsDescription actual_columns;
-};
-}
-#endif
diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp
index 59702259b35..b1746ea769f 100644
--- a/src/TableFunctions/ITableFunctionXDBC.cpp
+++ b/src/TableFunctions/ITableFunctionXDBC.cpp
@@ -9,12 +9,16 @@
 #include <Parsers/parseQuery.h>
 #include <Storages/StorageXDBC.h>
 #include <TableFunctions/ITableFunction.h>
-#include <TableFunctions/ITableFunctionXDBC.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Poco/Net/HTTPRequest.h>
 #include <Common/Exception.h>
 #include "registerTableFunctions.h"
 
+#include <Poco/Util/AbstractConfiguration.h>
+#include <BridgeHelper/XDBCBridgeHelper.h>
+
+#include "config.h"
+
 
 namespace DB
 {
@@ -24,6 +28,79 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
+/**
+ * Base class for table functions, that works over external bridge
+ * Xdbc (Xdbc connect string, table) - creates a temporary StorageXDBC.
+ */
+class ITableFunctionXDBC : public ITableFunction
+{
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+
+    /* A factory method to create bridge helper, that will assist in remote interaction */
+    virtual BridgeHelperPtr createBridgeHelper(ContextPtr context,
+        Poco::Timespan http_timeout_,
+        const std::string & connection_string_,
+        bool use_connection_pooling_) const = 0;
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    void startBridgeIfNot(ContextPtr context) const;
+
+    String connection_string;
+    String schema_name;
+    String remote_table_name;
+    mutable BridgeHelperPtr helper;
+};
+
+class TableFunctionJDBC : public ITableFunctionXDBC
+{
+public:
+    static constexpr auto name = "jdbc";
+    std::string getName() const override
+    {
+        return name;
+    }
+
+private:
+    BridgeHelperPtr createBridgeHelper(ContextPtr context,
+        Poco::Timespan http_timeout_,
+        const std::string & connection_string_,
+        bool use_connection_pooling_) const override
+    {
+        return std::make_shared<XDBCBridgeHelper<JDBCBridgeMixin>>(context, http_timeout_, connection_string_, use_connection_pooling_);
+    }
+
+    const char * getStorageTypeName() const override { return "JDBC"; }
+};
+
+class TableFunctionODBC : public ITableFunctionXDBC
+{
+public:
+    static constexpr auto name = "odbc";
+    std::string getName() const override
+    {
+        return name;
+    }
+
+private:
+    BridgeHelperPtr createBridgeHelper(ContextPtr context,
+        Poco::Timespan http_timeout_,
+        const std::string & connection_string_,
+        bool use_connection_pooling_) const override
+    {
+        return std::make_shared<XDBCBridgeHelper<ODBCBridgeMixin>>(context, http_timeout_, connection_string_, use_connection_pooling_);
+    }
+
+    const char * getStorageTypeName() const override { return "ODBC"; }
+};
+
+
 void ITableFunctionXDBC::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     const auto & args_func = ast_function->as<ASTFunction &>();
@@ -102,6 +179,8 @@ StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & /*ast_function*/, Cont
     return result;
 }
 
+}
+
 void registerTableFunctionJDBC(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionJDBC>();
diff --git a/src/TableFunctions/ITableFunctionXDBC.h b/src/TableFunctions/ITableFunctionXDBC.h
deleted file mode 100644
index da0fa83033b..00000000000
--- a/src/TableFunctions/ITableFunctionXDBC.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#pragma once
-
-#include <Storages/StorageXDBC.h>
-#include <TableFunctions/ITableFunction.h>
-#include <Poco/Util/AbstractConfiguration.h>
-#include <BridgeHelper/XDBCBridgeHelper.h>
-
-#include "config.h"
-
-namespace DB
-{
-/**
- * Base class for table functions, that works over external bridge
- * Xdbc (Xdbc connect string, table) - creates a temporary StorageXDBC.
- */
-class ITableFunctionXDBC : public ITableFunction
-{
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-
-    /* A factory method to create bridge helper, that will assist in remote interaction */
-    virtual BridgeHelperPtr createBridgeHelper(ContextPtr context,
-        Poco::Timespan http_timeout_,
-        const std::string & connection_string_,
-        bool use_connection_pooling_) const = 0;
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    void startBridgeIfNot(ContextPtr context) const;
-
-    String connection_string;
-    String schema_name;
-    String remote_table_name;
-    mutable BridgeHelperPtr helper;
-};
-
-class TableFunctionJDBC : public ITableFunctionXDBC
-{
-public:
-    static constexpr auto name = "jdbc";
-    std::string getName() const override
-    {
-        return name;
-    }
-
-private:
-    BridgeHelperPtr createBridgeHelper(ContextPtr context,
-        Poco::Timespan http_timeout_,
-        const std::string & connection_string_,
-        bool use_connection_pooling_) const override
-    {
-        return std::make_shared<XDBCBridgeHelper<JDBCBridgeMixin>>(context, http_timeout_, connection_string_, use_connection_pooling_);
-    }
-
-    const char * getStorageTypeName() const override { return "JDBC"; }
-};
-
-class TableFunctionODBC : public ITableFunctionXDBC
-{
-public:
-    static constexpr auto name = "odbc";
-    std::string getName() const override
-    {
-        return name;
-    }
-
-private:
-    BridgeHelperPtr createBridgeHelper(ContextPtr context,
-        Poco::Timespan http_timeout_,
-        const std::string & connection_string_,
-        bool use_connection_pooling_) const override
-    {
-        return std::make_shared<XDBCBridgeHelper<ODBCBridgeMixin>>(context, http_timeout_, connection_string_, use_connection_pooling_);
-    }
-
-    const char * getStorageTypeName() const override { return "ODBC"; }
-};
-}
diff --git a/src/TableFunctions/TableFunctionDictionary.cpp b/src/TableFunctions/TableFunctionDictionary.cpp
index f0060acb411..7506036b1b2 100644
--- a/src/TableFunctions/TableFunctionDictionary.cpp
+++ b/src/TableFunctions/TableFunctionDictionary.cpp
@@ -6,7 +6,6 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 
-#include <Interpreters/Context.h>
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <Interpreters/evaluateConstantExpression.h>
 
@@ -15,6 +14,7 @@
 
 #include <TableFunctions/TableFunctionFactory.h>
 
+
 namespace DB
 {
 
@@ -87,6 +87,7 @@ StoragePtr TableFunctionDictionary::executeImpl(
     return result;
 }
 
+
 void registerTableFunctionDictionary(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionDictionary>();
diff --git a/src/TableFunctions/TableFunctionDictionary.h b/src/TableFunctions/TableFunctionDictionary.h
index d0beb292fe1..2cf722c95f0 100644
--- a/src/TableFunctions/TableFunctionDictionary.h
+++ b/src/TableFunctions/TableFunctionDictionary.h
@@ -27,4 +27,6 @@ public:
 private:
     String dictionary_name;
     ColumnsDescription dictionary_columns;
-};}
+};
+
+}
diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp
index 5a64a988156..209446dc9dd 100644
--- a/src/TableFunctions/TableFunctionExecutable.cpp
+++ b/src/TableFunctions/TableFunctionExecutable.cpp
@@ -1,4 +1,3 @@
-#include <TableFunctions/TableFunctionExecutable.h>
 
 #include <Common/Exception.h>
 #include <TableFunctions/TableFunctionFactory.h>
@@ -12,9 +11,7 @@
 #include <Parsers/parseQuery.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <Storages/StorageExecutable.h>
-#include <DataTypes/DataTypeFactory.h>
 #include <Interpreters/evaluateConstantExpression.h>
-#include <Interpreters/interpretSubquery.h>
 #include <boost/algorithm/string.hpp>
 #include "registerTableFunctions.h"
 
@@ -30,6 +27,44 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
+/* executable(script_name_optional_arguments, format, structure, input_query) - creates a temporary storage from executable file
+ *
+ *
+ * The file must be in the clickhouse data directory.
+ * The relative path begins with the clickhouse data directory.
+ */
+class TableFunctionExecutable : public ITableFunction
+{
+public:
+    static constexpr auto name = "executable";
+
+    std::string getName() const override { return name; }
+
+    bool hasStaticStructure() const override { return true; }
+
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return "Executable"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+
+    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
+
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    String script_name;
+    std::vector<String> arguments;
+    String format;
+    String structure;
+    std::vector<ASTPtr> input_queries;
+    ASTPtr settings_query = nullptr;
+};
+
+
 std::vector<size_t> TableFunctionExecutable::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
 {
     const auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
@@ -140,6 +175,8 @@ StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/,
     return storage;
 }
 
+}
+
 void registerTableFunctionExecutable(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionExecutable>();
diff --git a/src/TableFunctions/TableFunctionExecutable.h b/src/TableFunctions/TableFunctionExecutable.h
deleted file mode 100644
index aa595312fe4..00000000000
--- a/src/TableFunctions/TableFunctionExecutable.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-
-namespace DB
-{
-
-class Context;
-class ASTSetQuery;
-
-/* executable(script_name_optional_arguments, format, structure, input_query) - creates a temporary storage from executable file
- *
- *
- * The file must be in the clickhouse data directory.
- * The relative path begins with the clickhouse data directory.
- */
-class TableFunctionExecutable : public ITableFunction
-{
-public:
-    static constexpr auto name = "executable";
-
-    std::string getName() const override { return name; }
-
-    bool hasStaticStructure() const override { return true; }
-
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "Executable"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-
-    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
-
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    String script_name;
-    std::vector<String> arguments;
-    String format;
-    String structure;
-    std::vector<ASTPtr> input_queries;
-    ASTPtr settings_query = nullptr;
-};
-}
diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp
index f127979d92a..cf986e1817d 100644
--- a/src/TableFunctions/TableFunctionExplain.cpp
+++ b/src/TableFunctions/TableFunctionExplain.cpp
@@ -1,27 +1,54 @@
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
-#include <Parsers/ASTSetQuery.h>
 #include <Parsers/ParserSetQuery.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>
 #include <Storages/StorageValues.h>
 #include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionExplain.h>
 #include <TableFunctions/registerTableFunctions.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Analyzer/TableFunctionNode.h>
 #include <Interpreters/InterpreterSetQuery.h>
+#include <Interpreters/InterpreterExplainQuery.h>
 #include <Interpreters/Context.h>
 
+
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+
+class TableFunctionExplain : public ITableFunction
+{
+public:
+    static constexpr auto name = "viewExplain";
+
+    std::string getName() const override { return name; }
+
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return "Explain"; }
+
+    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
+
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+
+    InterpreterExplainQuery getInterpreter(ContextPtr context) const;
+
+    ASTPtr query = nullptr;
+};
+
 std::vector<size_t> TableFunctionExplain::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr /*context*/) const
 {
     const auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
@@ -145,6 +172,8 @@ InterpreterExplainQuery TableFunctionExplain::getInterpreter(ContextPtr context)
     return InterpreterExplainQuery(query, context);
 }
 
+}
+
 void registerTableFunctionExplain(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionExplain>({.documentation = {
diff --git a/src/TableFunctions/TableFunctionExplain.h b/src/TableFunctions/TableFunctionExplain.h
deleted file mode 100644
index 2eb7e35d0b5..00000000000
--- a/src/TableFunctions/TableFunctionExplain.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-#include <Parsers/ASTExplainQuery.h>
-#include <Interpreters/InterpreterExplainQuery.h>
-#include <base/types.h>
-
-
-namespace DB
-{
-
-class TableFunctionExplain : public ITableFunction
-{
-public:
-    static constexpr auto name = "viewExplain";
-
-    std::string getName() const override { return name; }
-
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "Explain"; }
-
-    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
-
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-
-    InterpreterExplainQuery getInterpreter(ContextPtr context) const;
-
-    ASTPtr query = nullptr;
-};
-
-}
diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp
index f75c56e6523..52d9ca26f37 100644
--- a/src/TableFunctions/TableFunctionFile.cpp
+++ b/src/TableFunctions/TableFunctionFile.cpp
@@ -1,5 +1,5 @@
-#include <TableFunctions/TableFunctionFile.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
+#include <TableFunctions/ITableFunctionFileLike.h>
 
 #include "Parsers/IAST_fwd.h"
 #include "registerTableFunctions.h"
@@ -12,6 +12,7 @@
 #include <Formats/FormatFactory.h>
 #include <Parsers/ASTIdentifier_fwd.h>
 
+
 namespace DB
 {
 
@@ -20,6 +21,42 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+
+/* file(path, format[, structure, compression]) - creates a temporary storage from file
+ *
+ * The file must be in the clickhouse data directory.
+ * The relative path begins with the clickhouse data directory.
+ */
+class TableFunctionFile : public ITableFunctionFileLike
+{
+public:
+    static constexpr auto name = "file";
+    std::string getName() const override
+    {
+        return name;
+    }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+
+    std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override
+    {
+        return {"_path", "_file"};
+    }
+
+protected:
+    int fd = -1;
+    void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context) override;
+    String getFormatFromFirstArgument() override;
+
+private:
+    StoragePtr getStorage(
+        const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
+        const std::string & table_name, const std::string & compression_method_) const override;
+    const char * getStorageTypeName() const override { return "File"; }
+};
+
 void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr & context)
 {
     if (context->getApplicationType() != Context::ApplicationType::LOCAL)
@@ -111,6 +148,8 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context
     return parseColumnsListFromString(structure, context);
 }
 
+}
+
 void registerTableFunctionFile(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionFile>();
diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h
deleted file mode 100644
index 439ae87b4ae..00000000000
--- a/src/TableFunctions/TableFunctionFile.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunctionFileLike.h>
-
-
-namespace DB
-{
-
-/* file(path, format[, structure, compression]) - creates a temporary storage from file
- *
- * The file must be in the clickhouse data directory.
- * The relative path begins with the clickhouse data directory.
- */
-class TableFunctionFile : public ITableFunctionFileLike
-{
-public:
-    static constexpr auto name = "file";
-    std::string getName() const override
-    {
-        return name;
-    }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-
-    std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override
-    {
-        return {"_path", "_file"};
-    }
-
-protected:
-    int fd = -1;
-    void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context) override;
-    String getFormatFromFirstArgument() override;
-
-private:
-    StoragePtr getStorage(
-        const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
-        const std::string & table_name, const std::string & compression_method_) const override;
-    const char * getStorageTypeName() const override { return "File"; }
-};
-
-}
diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp
index 3afe7ffde58..4b6d0f70c0a 100644
--- a/src/TableFunctions/TableFunctionFormat.cpp
+++ b/src/TableFunctions/TableFunctionFormat.cpp
@@ -18,7 +18,6 @@
 #include <Storages/StorageValues.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 
-#include <TableFunctions/TableFunctionFormat.h>
 #include <TableFunctions/TableFunctionFactory.h>
 
 
@@ -31,6 +30,32 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
+/* format(format_name, data) - ...
+ */
+class TableFunctionFormat : public ITableFunction
+{
+public:
+    static constexpr auto name = "format";
+    std::string getName() const override { return name; }
+    bool hasStaticStructure() const override { return false; }
+
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+    const char * getStorageTypeName() const override { return "Values"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    Block parseData(ColumnsDescription columns, ContextPtr context) const;
+
+    String format;
+    String data;
+    String structure = "auto";
+};
+
 void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     ASTs & args_func = ast_function->children;
@@ -104,7 +129,7 @@ StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, Con
     return res;
 }
 
-static const FunctionDocumentation format_table_function_documentation =
+const FunctionDocumentation format_table_function_documentation =
 {
     .description=R"(
 Extracts table structure from data and parses it according to specified input format.
@@ -168,8 +193,12 @@ Result:
     .categories{"format", "table-functions"}
 };
 
+}
+
+
 void registerTableFunctionFormat(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionFormat>({format_table_function_documentation, false}, TableFunctionFactory::CaseInsensitive);
 }
+
 }
diff --git a/src/TableFunctions/TableFunctionFormat.h b/src/TableFunctions/TableFunctionFormat.h
deleted file mode 100644
index e20e8b6ea4b..00000000000
--- a/src/TableFunctions/TableFunctionFormat.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-
-
-namespace DB
-{
-
-class Context;
-
-/* format(format_name, data) - ...
- */
-class TableFunctionFormat : public ITableFunction
-{
-public:
-    static constexpr auto name = "format";
-    std::string getName() const override { return name; }
-    bool hasStaticStructure() const override { return false; }
-
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-    const char * getStorageTypeName() const override { return "Values"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    Block parseData(ColumnsDescription columns, ContextPtr context) const;
-
-    String format;
-    String data;
-    String structure = "auto";
-};
-
-}
diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp
index c6a9154cc66..af284594987 100644
--- a/src/TableFunctions/TableFunctionGenerateRandom.cpp
+++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp
@@ -8,7 +8,6 @@
 
 #include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionGenerateRandom.h>
 #include <Functions/FunctionGenerateRandomStructure.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <Interpreters/evaluateConstantExpression.h>
@@ -28,6 +27,36 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
+/* generateRandom([structure, max_array_length, max_string_length, random_seed])
+ * - creates a temporary storage that generates columns with random data
+ */
+class TableFunctionGenerateRandom : public ITableFunction
+{
+public:
+    static constexpr auto name = "generateRandom";
+    std::string getName() const override { return name; }
+    bool hasStaticStructure() const override { return structure != "auto"; }
+
+    bool needStructureHint() const override { return structure == "auto"; }
+    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
+
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+    const char * getStorageTypeName() const override { return "GenerateRandom"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    String structure = "auto";
+    UInt64 max_string_length = 10;
+    UInt64 max_array_length = 10;
+    std::optional<UInt64> random_seed;
+    ColumnsDescription structure_hint;
+};
+
 void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     ASTs & args_func = ast_function->children;
@@ -122,11 +151,11 @@ StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & /*ast_functio
     return res;
 }
 
+}
+
 void registerTableFunctionGenerate(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionGenerateRandom>({.documentation = {}, .allow_readonly = true});
 }
 
 }
-
-
diff --git a/src/TableFunctions/TableFunctionGenerateRandom.h b/src/TableFunctions/TableFunctionGenerateRandom.h
deleted file mode 100644
index a5d11ce0af6..00000000000
--- a/src/TableFunctions/TableFunctionGenerateRandom.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-
-namespace DB
-{
-
-/* generateRandom([structure, max_array_length, max_string_length, random_seed])
- * - creates a temporary storage that generates columns with random data
- */
-class TableFunctionGenerateRandom : public ITableFunction
-{
-public:
-    static constexpr auto name = "generateRandom";
-    std::string getName() const override { return name; }
-    bool hasStaticStructure() const override { return structure != "auto"; }
-
-    bool needStructureHint() const override { return structure == "auto"; }
-    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
-
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-    const char * getStorageTypeName() const override { return "GenerateRandom"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    String structure = "auto";
-    UInt64 max_string_length = 10;
-    UInt64 max_array_length = 10;
-    std::optional<UInt64> random_seed;
-    ColumnsDescription structure_hint;
-};
-
-
-}
diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp
index 658a55c6fc4..29a377ceaab 100644
--- a/src/TableFunctions/TableFunctionInput.cpp
+++ b/src/TableFunctions/TableFunctionInput.cpp
@@ -1,4 +1,3 @@
-#include <TableFunctions/TableFunctionInput.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <Parsers/ASTFunction.h>
@@ -21,6 +20,31 @@ namespace ErrorCodes
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
+namespace
+{
+
+/* input(structure) - allows to make INSERT SELECT from incoming stream of data
+ */
+class TableFunctionInput : public ITableFunction
+{
+public:
+    static constexpr auto name = "input";
+    std::string getName() const override { return name; }
+    bool hasStaticStructure() const override { return true; }
+    bool needStructureHint() const override { return true; }
+    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
+
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+    const char * getStorageTypeName() const override { return "Input"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    String structure;
+    ColumnsDescription structure_hint;
+};
+
 void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     const auto * function = ast_function->as<ASTFunction>();
@@ -65,6 +89,8 @@ StoragePtr TableFunctionInput::executeImpl(const ASTPtr & /*ast_function*/, Cont
     return storage;
 }
 
+}
+
 void registerTableFunctionInput(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionInput>();
diff --git a/src/TableFunctions/TableFunctionInput.h b/src/TableFunctions/TableFunctionInput.h
deleted file mode 100644
index 3164ce43eef..00000000000
--- a/src/TableFunctions/TableFunctionInput.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-
-
-namespace DB
-{
-
-class Context;
-
-/* input(structure) - allows to make INSERT SELECT from incoming stream of data
- */
-class TableFunctionInput : public ITableFunction
-{
-public:
-    static constexpr auto name = "input";
-    std::string getName() const override { return name; }
-    bool hasStaticStructure() const override { return true; }
-    bool needStructureHint() const override { return true; }
-    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
-
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-    const char * getStorageTypeName() const override { return "Input"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    String structure;
-    ColumnsDescription structure_hint;
-};
-
-}
diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp
index 599953a1add..e7b5a34958f 100644
--- a/src/TableFunctions/TableFunctionMerge.cpp
+++ b/src/TableFunctions/TableFunctionMerge.cpp
@@ -2,7 +2,6 @@
 #include <Common/typeid_cast.h>
 #include <Storages/StorageMerge.h>
 #include <Storages/checkAndGetLiteralArgument.h>
-#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
 #include <TableFunctions/ITableFunction.h>
 #include <Analyzer/FunctionNode.h>
@@ -10,7 +9,6 @@
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/Context.h>
 #include <Access/ContextAccess.h>
-#include <TableFunctions/TableFunctionMerge.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <TableFunctions/registerTableFunctions.h>
 
@@ -26,18 +24,46 @@ namespace ErrorCodes
 
 namespace
 {
-    [[noreturn]] void throwNoTablesMatchRegexp(const String & source_database_regexp, const String & source_table_regexp)
-    {
-        throw Exception(
-            ErrorCodes::BAD_ARGUMENTS,
-            "Error while executing table function merge. Either there is no database, which matches regular expression `{}`, or there are "
-            "no tables in database matches `{}`, which fit tables expression: {}",
-            source_database_regexp,
-            source_database_regexp,
-            source_table_regexp);
-    }
+
+[[noreturn]] void throwNoTablesMatchRegexp(const String & source_database_regexp, const String & source_table_regexp)
+{
+    throw Exception(
+        ErrorCodes::BAD_ARGUMENTS,
+        "Error while executing table function merge. Either there is no database, which matches regular expression `{}`, or there are "
+        "no tables in database matches `{}`, which fit tables expression: {}",
+        source_database_regexp,
+        source_database_regexp,
+        source_table_regexp);
 }
 
+/* merge (db_name, tables_regexp) - creates a temporary StorageMerge.
+ * The structure of the table is taken from the first table that came up, suitable for regexp.
+ * If there is no such table, an exception is thrown.
+ */
+class TableFunctionMerge : public ITableFunction
+{
+public:
+    static constexpr auto name = "merge";
+    std::string getName() const override { return name; }
+
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+    const char * getStorageTypeName() const override { return "Merge"; }
+
+    using TableSet = std::set<String>;
+    using DBToTableSetMap = std::map<String, TableSet>;
+    const DBToTableSetMap & getSourceDatabasesAndTables(ContextPtr context) const;
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+    static TableSet getMatchedTablesWithAccess(const String & database_name, const String & table_regexp, const ContextPtr & context);
+
+    String source_database_name_or_regexp;
+    String source_table_regexp;
+    bool database_is_regexp = false;
+    mutable std::optional<DBToTableSetMap> source_databases_and_tables;
+};
+
 std::vector<size_t> TableFunctionMerge::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
 {
     auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
@@ -179,6 +205,8 @@ TableFunctionMerge::getMatchedTablesWithAccess(const String & database_name, con
     return tables;
 }
 
+}
+
 void registerTableFunctionMerge(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionMerge>();
diff --git a/src/TableFunctions/TableFunctionMerge.h b/src/TableFunctions/TableFunctionMerge.h
deleted file mode 100644
index 8cc5119978a..00000000000
--- a/src/TableFunctions/TableFunctionMerge.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-
-
-namespace DB
-{
-
-/* merge (db_name, tables_regexp) - creates a temporary StorageMerge.
- * The structure of the table is taken from the first table that came up, suitable for regexp.
- * If there is no such table, an exception is thrown.
- */
-class TableFunctionMerge : public ITableFunction
-{
-public:
-    static constexpr auto name = "merge";
-    std::string getName() const override { return name; }
-
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-    const char * getStorageTypeName() const override { return "Merge"; }
-
-    using TableSet = std::set<String>;
-    using DBToTableSetMap = std::map<String, TableSet>;
-    const DBToTableSetMap & getSourceDatabasesAndTables(ContextPtr context) const;
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-    static TableSet getMatchedTablesWithAccess(const String & database_name, const String & table_regexp, const ContextPtr & context);
-
-    String source_database_name_or_regexp;
-    String source_table_regexp;
-    bool database_is_regexp = false;
-    mutable std::optional<DBToTableSetMap> source_databases_and_tables;
-};
-
-
-}
diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp
index 5c7c1d98cdf..b2cf1b4675e 100644
--- a/src/TableFunctions/TableFunctionMongoDB.cpp
+++ b/src/TableFunctions/TableFunctionMongoDB.cpp
@@ -1,12 +1,11 @@
-#include <TableFunctions/TableFunctionMongoDB.h>
+#include <Storages/StorageMongoDB.h>
+#include <Storages/ExternalDataSourceConfiguration.h>
 
 #include <Common/Exception.h>
 
-#include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/Context.h>
 
 #include <Parsers/ASTFunction.h>
-#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTIdentifier.h>
 
 #include <TableFunctions/TableFunctionFactory.h>
@@ -25,6 +24,29 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
+class TableFunctionMongoDB : public ITableFunction
+{
+public:
+    static constexpr auto name = "mongodb";
+
+    std::string getName() const override { return name; }
+
+private:
+    StoragePtr executeImpl(
+            const ASTPtr & ast_function, ContextPtr context,
+            const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return "MongoDB"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    std::optional<StorageMongoDB::Configuration> configuration;
+    String structure;
+};
 
 StoragePtr TableFunctionMongoDB::executeImpl(const ASTPtr & /*ast_function*/,
         ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
@@ -97,6 +119,7 @@ void TableFunctionMongoDB::parseArguments(const ASTPtr & ast_function, ContextPt
     configuration = StorageMongoDB::getConfiguration(main_arguments, context);
 }
 
+}
 
 void registerTableFunctionMongoDB(TableFunctionFactory & factory)
 {
diff --git a/src/TableFunctions/TableFunctionMongoDB.h b/src/TableFunctions/TableFunctionMongoDB.h
deleted file mode 100644
index c2c15cabe5a..00000000000
--- a/src/TableFunctions/TableFunctionMongoDB.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-
-#include <Storages/StorageMongoDB.h>
-#include <TableFunctions/ITableFunction.h>
-#include <Storages/ExternalDataSourceConfiguration.h>
-
-namespace DB
-{
-
-class TableFunctionMongoDB : public ITableFunction
-{
-public:
-    static constexpr auto name = "mongodb";
-
-    std::string getName() const override { return name; }
-
-private:
-    StoragePtr executeImpl(
-            const ASTPtr & ast_function, ContextPtr context,
-            const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "MongoDB"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    std::optional<StorageMongoDB::Configuration> configuration;
-    String structure;
-};
-
-}
diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp
index 03bd2264551..b557a48f29e 100644
--- a/src/TableFunctions/TableFunctionMySQL.cpp
+++ b/src/TableFunctions/TableFunctionMySQL.cpp
@@ -1,15 +1,15 @@
 #include "config.h"
 
 #if USE_MYSQL
+
+#include <Storages/StorageMySQL.h>
 #include <Processors/Sources/MySQLSource.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/evaluateConstantExpression.h>
 #include <Parsers/ASTFunction.h>
 #include <Storages/MySQL/MySQLSettings.h>
 #include <Storages/MySQL/MySQLHelpers.h>
 #include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionMySQL.h>
 #include <Common/Exception.h>
 #include <Common/parseAddress.h>
 #include <Common/quoteString.h>
@@ -27,6 +27,32 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
+/* mysql ('host:port', database, table, user, password) - creates a temporary StorageMySQL.
+ * The structure of the table is taken from the mysql query DESCRIBE table.
+ * If there is no such table, an exception is thrown.
+ */
+class TableFunctionMySQL : public ITableFunction
+{
+public:
+    static constexpr auto name = "mysql";
+    std::string getName() const override
+    {
+        return name;
+    }
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+    const char * getStorageTypeName() const override { return "MySQL"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    mutable std::optional<mysqlxx::PoolWithFailover> pool;
+    std::optional<StorageMySQL::Configuration> configuration;
+};
+
 void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     const auto & args_func = ast_function->as<ASTFunction &>();
@@ -88,11 +114,14 @@ StoragePtr TableFunctionMySQL::executeImpl(
     return res;
 }
 
+}
+
 
 void registerTableFunctionMySQL(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionMySQL>();
 }
+
 }
 
 #endif
diff --git a/src/TableFunctions/TableFunctionMySQL.h b/src/TableFunctions/TableFunctionMySQL.h
deleted file mode 100644
index 04f619f5f4b..00000000000
--- a/src/TableFunctions/TableFunctionMySQL.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#pragma once
-#include "config.h"
-
-#if USE_MYSQL
-#include <TableFunctions/ITableFunction.h>
-#include <Storages/StorageMySQL.h>
-#include <mysqlxx/Pool.h>
-
-
-namespace DB
-{
-
-/* mysql ('host:port', database, table, user, password) - creates a temporary StorageMySQL.
- * The structure of the table is taken from the mysql query DESCRIBE table.
- * If there is no such table, an exception is thrown.
- */
-class TableFunctionMySQL : public ITableFunction
-{
-public:
-    static constexpr auto name = "mysql";
-    std::string getName() const override
-    {
-        return name;
-    }
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-    const char * getStorageTypeName() const override { return "MySQL"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    mutable std::optional<mysqlxx::PoolWithFailover> pool;
-    std::optional<StorageMySQL::Configuration> configuration;
-};
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp
index 57911e16d4b..75a97bccb97 100644
--- a/src/TableFunctions/TableFunctionNull.cpp
+++ b/src/TableFunctions/TableFunctionNull.cpp
@@ -1,12 +1,12 @@
 #include <Interpreters/Context.h>
-#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <Storages/StorageNull.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionNull.h>
 #include <Interpreters/evaluateConstantExpression.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <TableFunctions/ITableFunction.h>
 #include "registerTableFunctions.h"
 
 
@@ -17,6 +17,36 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
+/* null(structure) - creates a temporary null storage
+ *
+ * Used for testing purposes, for convenience writing tests and demos.
+ */
+class TableFunctionNull : public ITableFunction
+{
+public:
+    static constexpr auto name = "null";
+    std::string getName() const override { return name; }
+
+    bool needStructureHint() const override { return structure == "auto"; }
+
+    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
+
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+    const char * getStorageTypeName() const override { return "Null"; }
+
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+
+    String structure = "auto";
+    ColumnsDescription structure_hint;
+
+    const ColumnsDescription default_structure{NamesAndTypesList{{"dummy", std::make_shared<DataTypeUInt8>()}}};
+};
+
 void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     const auto * function = ast_function->as<ASTFunction>();
@@ -54,8 +84,11 @@ StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, Conte
     return res;
 }
 
+}
+
 void registerTableFunctionNull(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionNull>({.documentation = {}, .allow_readonly = true});
 }
+
 }
diff --git a/src/TableFunctions/TableFunctionNull.h b/src/TableFunctions/TableFunctionNull.h
deleted file mode 100644
index e80552d4cff..00000000000
--- a/src/TableFunctions/TableFunctionNull.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include <Core/Types.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <TableFunctions/ITableFunction.h>
-
-
-namespace DB
-{
-
-/* null(structure) - creates a temporary null storage
- *
- * Used for testing purposes, for convenience writing tests and demos.
- */
-class TableFunctionNull : public ITableFunction
-{
-public:
-    static constexpr auto name = "null";
-    std::string getName() const override { return name; }
-
-    bool needStructureHint() const override { return structure == "auto"; }
-
-    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
-
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-    const char * getStorageTypeName() const override { return "Null"; }
-
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-
-    String structure = "auto";
-    ColumnsDescription structure_hint;
-
-    const ColumnsDescription default_structure{NamesAndTypesList{{"dummy", std::make_shared<DataTypeUInt8>()}}};
-};
-}
diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp
index d6cf50bc7d6..9abd764f91d 100644
--- a/src/TableFunctions/TableFunctionNumbers.cpp
+++ b/src/TableFunctions/TableFunctionNumbers.cpp
@@ -1,5 +1,4 @@
 #include <TableFunctions/ITableFunction.h>
-#include <TableFunctions/TableFunctionNumbers.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Parsers/ASTFunction.h>
 #include <Common/typeid_cast.h>
@@ -21,6 +20,28 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
+namespace
+{
+
+/* numbers(limit), numbers_mt(limit)
+ * - the same as SELECT number FROM system.numbers LIMIT limit.
+ * Used for testing purposes, as a simple example of table function.
+ */
+template <bool multithreaded>
+class TableFunctionNumbers : public ITableFunction
+{
+public:
+    static constexpr auto name = multithreaded ? "numbers_mt" : "numbers";
+    std::string getName() const override { return name; }
+    bool hasStaticStructure() const override { return true; }
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+    const char * getStorageTypeName() const override { return "SystemNumbers"; }
+
+    UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const;
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+};
 
 template <bool multithreaded>
 ColumnsDescription TableFunctionNumbers<multithreaded>::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const
@@ -49,12 +70,6 @@ StoragePtr TableFunctionNumbers<multithreaded>::executeImpl(const ASTPtr & ast_f
     throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'limit' or 'offset, limit'.", getName());
 }
 
-void registerTableFunctionNumbers(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionNumbers<true>>({.documentation = {}, .allow_readonly = true});
-    factory.registerFunction<TableFunctionNumbers<false>>({.documentation = {}, .allow_readonly = true});
-}
-
 template <bool multithreaded>
 UInt64 TableFunctionNumbers<multithreaded>::evaluateArgument(ContextPtr context, ASTPtr & argument) const
 {
@@ -72,3 +87,11 @@ UInt64 TableFunctionNumbers<multithreaded>::evaluateArgument(ContextPtr context,
 }
 
 }
+
+void registerTableFunctionNumbers(TableFunctionFactory & factory)
+{
+    factory.registerFunction<TableFunctionNumbers<true>>({.documentation = {}, .allow_readonly = true});
+    factory.registerFunction<TableFunctionNumbers<false>>({.documentation = {}, .allow_readonly = true});
+}
+
+}
diff --git a/src/TableFunctions/TableFunctionNumbers.h b/src/TableFunctions/TableFunctionNumbers.h
deleted file mode 100644
index e380f40f7b2..00000000000
--- a/src/TableFunctions/TableFunctionNumbers.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-#include <base/types.h>
-
-
-namespace DB
-{
-
-/* numbers(limit), numbers_mt(limit)
- * - the same as SELECT number FROM system.numbers LIMIT limit.
- * Used for testing purposes, as a simple example of table function.
- */
-template <bool multithreaded>
-class TableFunctionNumbers : public ITableFunction
-{
-public:
-    static constexpr auto name = multithreaded ? "numbers_mt" : "numbers";
-    std::string getName() const override { return name; }
-    bool hasStaticStructure() const override { return true; }
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-    const char * getStorageTypeName() const override { return "SystemNumbers"; }
-
-    UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const;
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-};
-
-
-}
diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp
index 322e0df7c15..baa81c4d96d 100644
--- a/src/TableFunctions/TableFunctionPostgreSQL.cpp
+++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp
@@ -1,13 +1,15 @@
-#include <TableFunctions/TableFunctionPostgreSQL.h>
+#include "config.h"
 
 #if USE_LIBPQXX
-#include <Interpreters/evaluateConstantExpression.h>
-#include <Parsers/ASTFunction.h>
+
 #include <TableFunctions/ITableFunction.h>
+#include <Core/PostgreSQL/PoolWithFailover.h>
+#include <Storages/StoragePostgreSQL.h>
+#include <Parsers/ASTFunction.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Common/Exception.h>
-#include "registerTableFunctions.h"
 #include <Common/parseRemoteDescription.h>
+#include "registerTableFunctions.h"
 
 
 namespace DB
@@ -18,6 +20,28 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+
+class TableFunctionPostgreSQL : public ITableFunction
+{
+public:
+    static constexpr auto name = "postgresql";
+    std::string getName() const override { return name; }
+
+private:
+    StoragePtr executeImpl(
+            const ASTPtr & ast_function, ContextPtr context,
+            const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return "PostgreSQL"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    postgres::PoolWithFailoverPtr connection_pool;
+    std::optional<StoragePostgreSQL::Configuration> configuration;
+};
 
 StoragePtr TableFunctionPostgreSQL::executeImpl(const ASTPtr & /*ast_function*/,
         ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const
@@ -60,6 +84,8 @@ void TableFunctionPostgreSQL::parseArguments(const ASTPtr & ast_function, Contex
         settings.postgresql_connection_pool_auto_close_connection);
 }
 
+}
+
 
 void registerTableFunctionPostgreSQL(TableFunctionFactory & factory)
 {
diff --git a/src/TableFunctions/TableFunctionPostgreSQL.h b/src/TableFunctions/TableFunctionPostgreSQL.h
deleted file mode 100644
index f7d77567dd4..00000000000
--- a/src/TableFunctions/TableFunctionPostgreSQL.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-#include "config.h"
-
-#if USE_LIBPQXX
-#include <TableFunctions/ITableFunction.h>
-#include <Core/PostgreSQL/PoolWithFailover.h>
-#include <Storages/ExternalDataSourceConfiguration.h>
-#include <Storages/StoragePostgreSQL.h>
-
-
-namespace DB
-{
-
-class TableFunctionPostgreSQL : public ITableFunction
-{
-public:
-    static constexpr auto name = "postgresql";
-    std::string getName() const override { return name; }
-
-private:
-    StoragePtr executeImpl(
-            const ASTPtr & ast_function, ContextPtr context,
-            const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "PostgreSQL"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    postgres::PoolWithFailoverPtr connection_pool;
-    std::optional<StoragePostgreSQL::Configuration> configuration;
-};
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionRedis.cpp b/src/TableFunctions/TableFunctionRedis.cpp
index 0b7433845b4..f87ba6d1c6d 100644
--- a/src/TableFunctions/TableFunctionRedis.cpp
+++ b/src/TableFunctions/TableFunctionRedis.cpp
@@ -1,5 +1,3 @@
-#include <TableFunctions/TableFunctionRedis.h>
-
 #include <Common/Exception.h>
 #include <Common/parseAddress.h>
 
@@ -15,6 +13,10 @@
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <Interpreters/evaluateConstantExpression.h>
 
+#include <Storages/StorageRedis.h>
+#include <TableFunctions/ITableFunction.h>
+#include <Storages/ExternalDataSourceConfiguration.h>
+
 
 namespace DB
 {
@@ -24,6 +26,33 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+
+/* Implements Redis table function.
+ * Use redis(host:port, key, structure[, db_index[, password[, pool_size]]]);
+ */
+class TableFunctionRedis : public ITableFunction
+{
+public:
+    static constexpr auto name = "redis";
+    String getName() const override { return name; }
+
+private:
+    StoragePtr executeImpl(
+        const ASTPtr & ast_function, ContextPtr context,
+        const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return "Redis"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    RedisConfiguration configuration;
+    String structure;
+    String primary_key;
+};
+
 StoragePtr TableFunctionRedis::executeImpl(
     const ASTPtr & /*ast_function*/, ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
 {
@@ -85,6 +114,7 @@ void TableFunctionRedis::parseArguments(const ASTPtr & ast_function, ContextPtr
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad arguments redis table function structure should contains key.");
 }
 
+}
 
 void registerTableFunctionRedis(TableFunctionFactory & factory)
 {
diff --git a/src/TableFunctions/TableFunctionRedis.h b/src/TableFunctions/TableFunctionRedis.h
deleted file mode 100644
index a7fc0df0a15..00000000000
--- a/src/TableFunctions/TableFunctionRedis.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#pragma once
-
-#include <Storages/StorageRedis.h>
-#include <TableFunctions/ITableFunction.h>
-#include <Storages/ExternalDataSourceConfiguration.h>
-
-namespace DB
-{
-
-/* Implements Redis table function.
- * Use redis(host:port, key, structure[, db_index[, password[, pool_size]]]);
- */
-class TableFunctionRedis : public ITableFunction
-{
-public:
-    static constexpr auto name = "redis";
-    String getName() const override { return name; }
-
-private:
-    StoragePtr executeImpl(
-        const ASTPtr & ast_function, ContextPtr context,
-        const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "Redis"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    RedisConfiguration configuration;
-    String structure;
-    String primary_key;
-};
-
-}
diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp
index 27e6fcf1fd1..baaf0dac68f 100644
--- a/src/TableFunctions/TableFunctionSQLite.cpp
+++ b/src/TableFunctions/TableFunctionSQLite.cpp
@@ -1,9 +1,10 @@
-#include <TableFunctions/TableFunctionSQLite.h>
+#include "config.h"
 
 #if USE_SQLITE
 
 #include <Common/Exception.h>
-#include <Common/quoteString.h>
+#include <TableFunctions/ITableFunction.h>
+#include <Storages/StorageSQLite.h>
 
 #include <Databases/SQLite/SQLiteUtils.h>
 #include "registerTableFunctions.h"
@@ -12,7 +13,6 @@
 
 #include <Parsers/ASTFunction.h>
 
-#include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionFactory.h>
 
 #include <Storages/checkAndGetLiteralArgument.h>
@@ -27,6 +27,28 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+
+class TableFunctionSQLite : public ITableFunction
+{
+public:
+    static constexpr auto name = "sqlite";
+    std::string getName() const override { return name; }
+
+private:
+    StoragePtr executeImpl(
+            const ASTPtr & ast_function, ContextPtr context,
+            const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return "SQLite"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    String database_path, remote_table_name;
+    std::shared_ptr<sqlite3> sqlite_db;
+};
 
 StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/,
         ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const
@@ -69,6 +91,7 @@ void TableFunctionSQLite::parseArguments(const ASTPtr & ast_function, ContextPtr
     sqlite_db = openSQLiteDB(database_path, context);
 }
 
+}
 
 void registerTableFunctionSQLite(TableFunctionFactory & factory)
 {
diff --git a/src/TableFunctions/TableFunctionSQLite.h b/src/TableFunctions/TableFunctionSQLite.h
deleted file mode 100644
index 74318f058a9..00000000000
--- a/src/TableFunctions/TableFunctionSQLite.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#pragma once
-#include "config.h"
-
-#if USE_SQLITE
-#include <TableFunctions/ITableFunction.h>
-#include <Storages/StorageSQLite.h>
-
-
-namespace DB
-{
-
-class TableFunctionSQLite : public ITableFunction
-{
-public:
-    static constexpr auto name = "sqlite";
-    std::string getName() const override { return name; }
-
-private:
-    StoragePtr executeImpl(
-            const ASTPtr & ast_function, ContextPtr context,
-            const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "SQLite"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    String database_path, remote_table_name;
-    std::shared_ptr<sqlite3> sqlite_db;
-};
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp
index 42a19874704..7b2a61c25eb 100644
--- a/src/TableFunctions/TableFunctionValues.cpp
+++ b/src/TableFunctions/TableFunctionValues.cpp
@@ -9,7 +9,6 @@
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTLiteral.h>
 
-#include <TableFunctions/TableFunctionValues.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
 
@@ -30,7 +29,32 @@ namespace ErrorCodes
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
-static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args, const Block & sample_block, size_t start, ContextPtr context)
+namespace
+{
+
+/* values(structure, values...) - creates a temporary storage filling columns with values
+ * values is case-insensitive table function.
+ */
+class TableFunctionValues : public ITableFunction
+{
+public:
+    static constexpr auto name = "values";
+    std::string getName() const override { return name; }
+    bool hasStaticStructure() const override { return true; }
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+    const char * getStorageTypeName() const override { return "Values"; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    static DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context);
+
+    ColumnsDescription structure;
+    bool has_structure_in_arguments;
+};
+
+void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args, const Block & sample_block, size_t start, ContextPtr context)
 {
     if (res_columns.size() == 1) /// Parsing arguments as Fields
     {
@@ -146,6 +170,8 @@ StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, Context
     return res;
 }
 
+}
+
 void registerTableFunctionValues(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionValues>({.documentation = {}, .allow_readonly = true}, TableFunctionFactory::CaseInsensitive);
diff --git a/src/TableFunctions/TableFunctionValues.h b/src/TableFunctions/TableFunctionValues.h
deleted file mode 100644
index 7c87bff835e..00000000000
--- a/src/TableFunctions/TableFunctionValues.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-
-namespace DB
-{
-/* values(structure, values...) - creates a temporary storage filling columns with values
- * values is case-insensitive table function.
- */
-class TableFunctionValues : public ITableFunction
-{
-public:
-    static constexpr auto name = "values";
-    std::string getName() const override { return name; }
-    bool hasStaticStructure() const override { return true; }
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-    const char * getStorageTypeName() const override { return "Values"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    static DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context);
-
-    ColumnsDescription structure;
-    bool has_structure_in_arguments;
-};
-
-
-}
diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.cpp b/src/TableFunctions/TableFunctionViewIfPermitted.cpp
index d7944df1b28..b1691803988 100644
--- a/src/TableFunctions/TableFunctionViewIfPermitted.cpp
+++ b/src/TableFunctions/TableFunctionViewIfPermitted.cpp
@@ -3,12 +3,13 @@
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <base/types.h>
 #include <Storages/StorageNull.h>
 #include <Storages/StorageView.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionViewIfPermitted.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include "registerTableFunctions.h"
@@ -16,6 +17,7 @@
 
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
@@ -23,11 +25,37 @@ namespace ErrorCodes
     extern const int ACCESS_DENIED;
 }
 
-
-const ASTSelectWithUnionQuery & TableFunctionViewIfPermitted::getSelectQuery() const
+namespace
 {
-    return *create.select;
-}
+
+/* viewIfPermitted(query ELSE null('structure'))
+ * Works as "view(query)" if the current user has the permissions required to execute "query"; works as "null('structure')" otherwise.
+ */
+class TableFunctionViewIfPermitted : public ITableFunction
+{
+public:
+    static constexpr auto name = "viewIfPermitted";
+
+    std::string getName() const override { return name; }
+
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return "ViewIfPermitted"; }
+
+    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
+
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+
+    bool isPermitted(const ContextPtr & context, const ColumnsDescription & else_columns) const;
+
+    ASTCreateQuery create;
+    ASTPtr else_ast;
+    TableFunctionPtr else_table_function;
+};
+
 
 std::vector<size_t> TableFunctionViewIfPermitted::skipAnalysisForArguments(const QueryTreeNodePtr &, ContextPtr) const
 {
@@ -118,6 +146,8 @@ bool TableFunctionViewIfPermitted::isPermitted(const ContextPtr & context, const
     return true;
 }
 
+}
+
 void registerTableFunctionViewIfPermitted(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionViewIfPermitted>({.documentation = {}, .allow_readonly = true});
diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.h b/src/TableFunctions/TableFunctionViewIfPermitted.h
deleted file mode 100644
index bee4e15bfa5..00000000000
--- a/src/TableFunctions/TableFunctionViewIfPermitted.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <base/types.h>
-
-namespace DB
-{
-
-/* viewIfPermitted(query ELSE null('structure'))
- * Works as "view(query)" if the current user has the permissions required to execute "query"; works as "null('structure')" otherwise.
- */
-class TableFunctionViewIfPermitted : public ITableFunction
-{
-public:
-    static constexpr auto name = "viewIfPermitted";
-
-    std::string getName() const override { return name; }
-
-    const ASTSelectWithUnionQuery & getSelectQuery() const;
-
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "ViewIfPermitted"; }
-
-    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
-
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-
-    bool isPermitted(const ContextPtr & context, const ColumnsDescription & else_columns) const;
-
-    ASTCreateQuery create;
-    ASTPtr else_ast;
-    TableFunctionPtr else_table_function;
-};
-
-}
diff --git a/src/TableFunctions/TableFunctionZeros.cpp b/src/TableFunctions/TableFunctionZeros.cpp
index eb93626590e..f23b6540959 100644
--- a/src/TableFunctions/TableFunctionZeros.cpp
+++ b/src/TableFunctions/TableFunctionZeros.cpp
@@ -1,5 +1,4 @@
 #include <TableFunctions/ITableFunction.h>
-#include <TableFunctions/TableFunctionZeros.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Parsers/ASTFunction.h>
 #include <Storages/checkAndGetLiteralArgument.h>
@@ -15,9 +14,32 @@ namespace DB
 
 namespace ErrorCodes
 {
-extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
+/* zeros(limit), zeros_mt(limit)
+ * - the same as SELECT zero FROM system.zeros LIMIT limit.
+ * Used for testing purposes, as a simple example of table function.
+ */
+template <bool multithreaded>
+class TableFunctionZeros : public ITableFunction
+{
+public:
+    static constexpr auto name = multithreaded ? "zeros_mt" : "zeros";
+    std::string getName() const override { return name; }
+    bool hasStaticStructure() const override { return true; }
+private:
+    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
+    const char * getStorageTypeName() const override { return "SystemZeros"; }
+
+    UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const;
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+};
+
 
 template <bool multithreaded>
 ColumnsDescription TableFunctionZeros<multithreaded>::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const
@@ -46,6 +68,14 @@ StoragePtr TableFunctionZeros<multithreaded>::executeImpl(const ASTPtr & ast_fun
     throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'limit'.", getName());
 }
 
+template <bool multithreaded>
+UInt64 TableFunctionZeros<multithreaded>::evaluateArgument(ContextPtr context, ASTPtr & argument) const
+{
+    return checkAndGetLiteralArgument<UInt64>(evaluateConstantExpressionOrIdentifierAsLiteral(argument, context), "length");
+}
+
+}
+
 void registerTableFunctionZeros(TableFunctionFactory & factory)
 {
     factory.registerFunction<TableFunctionZeros<true>>({.documentation = {
@@ -71,13 +101,7 @@ void registerTableFunctionZeros(TableFunctionFactory & factory)
                 See also the `system.zeros` table.
                 )",
             .examples={{"1", "SELECT count() FROM zeros_mt(1000000000) WHERE NOT ignore(randomPrintableASCII(10))", ""}}
-}});
-}
-
-template <bool multithreaded>
-UInt64 TableFunctionZeros<multithreaded>::evaluateArgument(ContextPtr context, ASTPtr & argument) const
-{
-    return checkAndGetLiteralArgument<UInt64>(evaluateConstantExpressionOrIdentifierAsLiteral(argument, context), "length");
+    }});
 }
 
 }
diff --git a/src/TableFunctions/TableFunctionZeros.h b/src/TableFunctions/TableFunctionZeros.h
deleted file mode 100644
index 07d523ee37c..00000000000
--- a/src/TableFunctions/TableFunctionZeros.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-
-#include <TableFunctions/ITableFunction.h>
-#include <base/types.h>
-
-
-namespace DB
-{
-
-/* zeros(limit), zeros_mt(limit)
- * - the same as SELECT zero FROM system.zeros LIMIT limit.
- * Used for testing purposes, as a simple example of table function.
- */
-template <bool multithreaded>
-class TableFunctionZeros : public ITableFunction
-{
-public:
-    static constexpr auto name = multithreaded ? "zeros_mt" : "zeros";
-    std::string getName() const override { return name; }
-    bool hasStaticStructure() const override { return true; }
-private:
-    StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override;
-    const char * getStorageTypeName() const override { return "SystemZeros"; }
-
-    UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const;
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-};
-
-
-}

From 19f178f0a82e0e042055052d68443fce73d1a2d3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 04:56:51 +0100
Subject: [PATCH 0198/1097] Fix style

---
 ...egateFunctionLargestTriangleThreeBuckets.h | 38 -------------------
 .../AggregateFunctionMaxIntersections.cpp     |  1 -
 2 files changed, 39 deletions(-)
 delete mode 100644 src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h

diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h
deleted file mode 100644
index 8bc245e856b..00000000000
--- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <limits>
-#include <numeric>
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/StatCommon.h>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnTuple.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnsDateTime.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypesDecimal.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <base/types.h>
-#include <Common/PODArray_fwd.h>
-#include <Common/assert_cast.h>
-
-#include <boost/math/distributions/normal.hpp>
-
-namespace DB
-{
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-}
-
-
-
-
-}
diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp
index aacb9b14da2..66b5314e05c 100644
--- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp
@@ -23,7 +23,6 @@ struct Settings;
 
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int TOO_LARGE_ARRAY_SIZE;
 }

From 1f058d6370704325cf826604026572990a6a12fc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 04:57:13 +0100
Subject: [PATCH 0199/1097] Fix style

---
 src/Functions/FunctionJoinGet.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp
index 51eab38bee5..83fa4871e39 100644
--- a/src/Functions/FunctionJoinGet.cpp
+++ b/src/Functions/FunctionJoinGet.cpp
@@ -3,8 +3,6 @@
 #include <Functions/FunctionHelpers.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/HashJoin.h>
-#include <Storages/StorageJoin.h>
-
 #include <Functions/IFunction.h>
 #include <Storages/StorageJoin.h>
 #include <Storages/TableLockHolder.h>

From 4a10b3eff882ab8a07044cdd70495c7ef8e95ee0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 05:19:40 +0100
Subject: [PATCH 0200/1097] Remove trash (C++ templates)

---
 ...ggregateFunctionSimpleLinearRegression.cpp | 99 +++++--------------
 1 file changed, 27 insertions(+), 72 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp
index 7a20509aec2..75d2fe595d8 100644
--- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp
@@ -26,16 +26,15 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename T>
 struct AggregateFunctionSimpleLinearRegressionData final
 {
     size_t count = 0;
-    T sum_x = 0;
-    T sum_y = 0;
-    T sum_xx = 0;
-    T sum_xy = 0;
+    Float64 sum_x = 0;
+    Float64 sum_y = 0;
+    Float64 sum_xx = 0;
+    Float64 sum_xy = 0;
 
-    void add(T x, T y)
+    void add(Float64 x, Float64 y)
     {
         count += 1;
         sum_x += x;
@@ -71,20 +70,20 @@ struct AggregateFunctionSimpleLinearRegressionData final
         readBinary(sum_xy, buf);
     }
 
-    T getK() const
+    Float64 getK() const
     {
-        T divisor = sum_xx * count - sum_x * sum_x;
+        Float64 divisor = sum_xx * count - sum_x * sum_x;
 
         if (divisor == 0)
-            return std::numeric_limits<T>::quiet_NaN();
+            return std::numeric_limits<Float64>::quiet_NaN();
 
         return (sum_xy * count - sum_x * sum_y) / divisor;
     }
 
-    T getB(T k) const
+    Float64 getB(Float64 k) const
     {
         if (count == 0)
-            return std::numeric_limits<T>::quiet_NaN();
+            return std::numeric_limits<Float64>::quiet_NaN();
 
         return (sum_y - k * sum_x) / count;
     }
@@ -92,11 +91,9 @@ struct AggregateFunctionSimpleLinearRegressionData final
 
 /// Calculates simple linear regression parameters.
 /// Result is a tuple (k, b) for y = k * x + b equation, solved by least squares approximation.
-template <typename X, typename Y, typename Ret = Float64>
 class AggregateFunctionSimpleLinearRegression final : public IAggregateFunctionDataHelper<
-    AggregateFunctionSimpleLinearRegressionData<Ret>,
-    AggregateFunctionSimpleLinearRegression<X, Y, Ret>
->
+    AggregateFunctionSimpleLinearRegressionData,
+    AggregateFunctionSimpleLinearRegression>
 {
 public:
     AggregateFunctionSimpleLinearRegression(
@@ -104,8 +101,8 @@ public:
         const Array & params
     ):
         IAggregateFunctionDataHelper<
-            AggregateFunctionSimpleLinearRegressionData<Ret>,
-            AggregateFunctionSimpleLinearRegression<X, Y, Ret>
+            AggregateFunctionSimpleLinearRegressionData,
+            AggregateFunctionSimpleLinearRegression
         > {arguments, params, createResultType()}
     {
         // notice: arguments has been checked before
@@ -123,11 +120,8 @@ public:
         Arena *
     ) const override
     {
-        auto col_x = assert_cast<const ColumnVector<X> *>(columns[0]);
-        auto col_y = assert_cast<const ColumnVector<Y> *>(columns[1]);
-
-        X x = col_x->getData()[row_num];
-        Y y = col_y->getData()[row_num];
+        Float64 x = columns[0]->getFloat64(row_num);
+        Float64 y = columns[1]->getFloat64(row_num);
 
         this->data(place).add(x, y);
     }
@@ -151,8 +145,8 @@ public:
     {
         DataTypes types
         {
-            std::make_shared<DataTypeNumber<Ret>>(),
-            std::make_shared<DataTypeNumber<Ret>>(),
+            std::make_shared<DataTypeNumber<Float64>>(),
+            std::make_shared<DataTypeNumber<Float64>>(),
         };
 
         Strings names
@@ -174,12 +168,12 @@ public:
         IColumn & to,
         Arena *) const override
     {
-        Ret k = this->data(place).getK();
-        Ret b = this->data(place).getB(k);
+        Float64 k = this->data(place).getK();
+        Float64 b = this->data(place).getB(k);
 
         auto & col_tuple = assert_cast<ColumnTuple &>(to);
-        auto & col_k = assert_cast<ColumnVector<Ret> &>(col_tuple.getColumn(0));
-        auto & col_b = assert_cast<ColumnVector<Ret> &>(col_tuple.getColumn(1));
+        auto & col_k = assert_cast<ColumnVector<Float64> &>(col_tuple.getColumn(0));
+        auto & col_b = assert_cast<ColumnVector<Float64> &>(col_tuple.getColumn(1));
 
         col_k.getData().push_back(k);
         col_b.getData().push_back(b);
@@ -196,51 +190,12 @@ AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression(
     assertNoParameters(name, params);
     assertBinary(name, arguments);
 
-    const IDataType * x_arg = arguments.front().get();
-    WhichDataType which_x = x_arg;
+    if (!isNumber(arguments[0]) || !isNumber(arguments[1]))
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+            "Illegal types ({}, {}) of arguments of aggregate function {}, must "
+            "be Native Ints, Native UInts or Floats", arguments[0]->getName(), arguments[1]->getName(), name);
 
-    const IDataType * y_arg = arguments.back().get();
-    WhichDataType which_y = y_arg;
-
-
-    #define FOR_LEASTSQR_TYPES_2(M, T) \
-        M(T, UInt8) \
-        M(T, UInt16) \
-        M(T, UInt32) \
-        M(T, UInt64) \
-        M(T, Int8) \
-        M(T, Int16) \
-        M(T, Int32) \
-        M(T, Int64) \
-        M(T, Float32) \
-        M(T, Float64)
-    #define FOR_LEASTSQR_TYPES(M) \
-        FOR_LEASTSQR_TYPES_2(M, UInt8) \
-        FOR_LEASTSQR_TYPES_2(M, UInt16) \
-        FOR_LEASTSQR_TYPES_2(M, UInt32) \
-        FOR_LEASTSQR_TYPES_2(M, UInt64) \
-        FOR_LEASTSQR_TYPES_2(M, Int8) \
-        FOR_LEASTSQR_TYPES_2(M, Int16) \
-        FOR_LEASTSQR_TYPES_2(M, Int32) \
-        FOR_LEASTSQR_TYPES_2(M, Int64) \
-        FOR_LEASTSQR_TYPES_2(M, Float32) \
-        FOR_LEASTSQR_TYPES_2(M, Float64)
-    #define DISPATCH(T1, T2) \
-        if (which_x.idx == TypeIndex::T1 && which_y.idx == TypeIndex::T2) \
-            return std::make_shared<AggregateFunctionSimpleLinearRegression<T1, T2>>(/* NOLINT */ \
-                arguments, \
-                params \
-            );
-
-    FOR_LEASTSQR_TYPES(DISPATCH)
-
-    #undef FOR_LEASTSQR_TYPES_2
-    #undef FOR_LEASTSQR_TYPES
-    #undef DISPATCH
-
-    throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Illegal types ({}, {}) of arguments of aggregate function {}, must "
-                    "be Native Ints, Native UInts or Floats", x_arg->getName(), y_arg->getName(), name);
+    return std::make_shared<AggregateFunctionSimpleLinearRegression>(arguments, params);
 }
 
 }

From 9320f2c9df902a30cb7d3faac96ee1ba0ccee5b1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 05:43:04 +0100
Subject: [PATCH 0201/1097] Fix test

---
 tests/queries/0_stateless/01161_all_system_tables.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01161_all_system_tables.sh b/tests/queries/0_stateless/01161_all_system_tables.sh
index 3ba59f9a424..739df782a39 100755
--- a/tests/queries/0_stateless/01161_all_system_tables.sh
+++ b/tests/queries/0_stateless/01161_all_system_tables.sh
@@ -23,7 +23,7 @@ function run_selects()
 
     for t in "${tables_arr[@]}"
     do
-        ${CLICKHOUSE_CLIENT} -q "SELECT * FROM $t LIMIT $LIMIT FORMAT Null" # Suppress style check: database=$CLICKHOUSE_DATABASEs
+        ${CLICKHOUSE_CLIENT} -q "SELECT * FROM $t LIMIT $LIMIT SETTINGS allow_introspection_functions = 1 FORMAT Null" # Suppress style check: database=$CLICKHOUSE_DATABASEs
     done
 }
 

From c0e058178ccef89a76aa182f4be7a10caf475d0c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 05:43:55 +0100
Subject: [PATCH 0202/1097] Fix error

---
 src/{ => Functions}/toDecimalString.cpp | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/{ => Functions}/toDecimalString.cpp (100%)

diff --git a/src/toDecimalString.cpp b/src/Functions/toDecimalString.cpp
similarity index 100%
rename from src/toDecimalString.cpp
rename to src/Functions/toDecimalString.cpp

From 23eca8220b8c28bc2a197c32cc97c7de35fce4f3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 05:53:16 +0100
Subject: [PATCH 0203/1097] Fix test

---
 .../AggregateFunctionQuantile.cpp                 |  4 ++--
 .../AggregateFunctionQuantile.h                   | 15 ++++++++-------
 .../AggregateFunctionQuantileBFloat16.cpp         |  4 ++--
 .../AggregateFunctionQuantileBFloat16Weighted.cpp |  4 ++--
 .../AggregateFunctionQuantileDeterministic.cpp    |  4 ++--
 .../AggregateFunctionQuantileExact.cpp            |  4 ++--
 .../AggregateFunctionQuantileExactExclusive.cpp   |  4 ++--
 .../AggregateFunctionQuantileExactHigh.cpp        |  4 ++--
 .../AggregateFunctionQuantileExactInclusive.cpp   |  4 ++--
 .../AggregateFunctionQuantileExactLow.cpp         |  4 ++--
 .../AggregateFunctionQuantileExactWeighted.cpp    |  4 ++--
 ...Approx.cpp => AggregateFunctionQuantileGK.cpp} |  4 ++--
 ...regateFunctionQuantileInterpolatedWeighted.cpp |  4 ++--
 .../AggregateFunctionQuantileTDigest.cpp          |  4 ++--
 .../AggregateFunctionQuantileTDigestWeighted.cpp  |  4 ++--
 .../AggregateFunctionQuantileTiming.cpp           |  4 ++--
 .../AggregateFunctionQuantileTimingWeighted.cpp   |  4 ++--
 17 files changed, 40 insertions(+), 39 deletions(-)
 rename src/AggregateFunctions/{AggregateFunctionQuantileApprox.cpp => AggregateFunctionQuantileGK.cpp} (99%)

diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
index 425730abd73..110f6c6b4d6 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
@@ -109,8 +109,8 @@ struct QuantileReservoirSampler
 };
 
 
-template <typename Value, bool float_return> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<float_return, Float64, void>, false>;
-template <typename Value, bool float_return> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<float_return, Float64, void>, true>;
+template <typename Value, bool float_return> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<float_return, Float64, void>, false, false>;
+template <typename Value, bool float_return> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<float_return, Float64, void>, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h
index 13320ad90b6..b95bb21b870 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@@ -54,15 +54,16 @@ template <
     typename FloatReturnType,
     /// If true, the function will accept multiple parameters with quantile levels
     ///  and return an Array filled with many values of that quantiles.
-    bool returns_many>
+    bool returns_many,
+    /// If the first parameter (before level) is accuracy.
+    bool has_accuracy_parameter>
 class AggregateFunctionQuantile final
-    : public IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>
+    : public IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many, has_accuracy_parameter>>
 {
 private:
     using ColVecType = ColumnVectorOrDecimal<Value>;
 
     static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
-    static constexpr bool is_quantile_gk = std::is_same_v<Data, QuantileGK<Value>>;
     static_assert(!is_decimal<Value> || !returns_float);
 
     QuantileLevels<Float64> levels;
@@ -77,16 +78,16 @@ private:
 
 public:
     AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>(
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many, has_accuracy_parameter>>(
             argument_types_, params, createResultType(argument_types_))
-        , levels(is_quantile_gk && !params.empty() ? Array(params.begin() + 1, params.end()) : params, returns_many)
+        , levels(has_accuracy_parameter && !params.empty() ? Array(params.begin() + 1, params.end()) : params, returns_many)
         , level(levels.levels[0])
         , argument_type(this->argument_types[0])
     {
         if (!returns_many && levels.size() > 1)
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName());
 
-        if constexpr (is_quantile_gk)
+        if constexpr (has_accuracy_parameter)
         {
             if (params.empty())
                 throw Exception(
@@ -115,7 +116,7 @@ public:
 
     void create(AggregateDataPtr __restrict place) const override /// NOLINT
     {
-        if constexpr (is_quantile_gk)
+        if constexpr (has_accuracy_parameter)
             new (place) Data(accuracy);
         else
             new (place) Data;
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileBFloat16.cpp b/src/AggregateFunctions/AggregateFunctionQuantileBFloat16.cpp
index a0c600b5951..c211ba03136 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileBFloat16.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileBFloat16.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool float_return> using FuncQuantileBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16, false, std::conditional_t<float_return, Float64, void>, false>;
-template <typename Value, bool float_return> using FuncQuantilesBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16, false, std::conditional_t<float_return, Float64, void>, true>;
+template <typename Value, bool float_return> using FuncQuantileBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16, false, std::conditional_t<float_return, Float64, void>, false, false>;
+template <typename Value, bool float_return> using FuncQuantilesBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16, false, std::conditional_t<float_return, Float64, void>, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileBFloat16Weighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileBFloat16Weighted.cpp
index f3feb067857..702848ee4fa 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileBFloat16Weighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileBFloat16Weighted.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, false>;
-template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;
+template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, false, false>;
+template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp b/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp
index 4ab8c3c9204..f9f114d69e4 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp
@@ -109,8 +109,8 @@ struct QuantileReservoirSamplerDeterministic
 };
 
 
-template <typename Value, bool float_return> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<float_return, Float64, void>, false>;
-template <typename Value, bool float_return> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<float_return, Float64, void>, true>;
+template <typename Value, bool float_return> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<float_return, Float64, void>, false, false>;
+template <typename Value, bool float_return> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<float_return, Float64, void>, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileExact.cpp b/src/AggregateFunctions/AggregateFunctionQuantileExact.cpp
index cd925b86694..3098c498ce9 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileExact.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileExact.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool _> using FuncQuantileExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantileExact, false, void, false>;
-template <typename Value, bool _> using FuncQuantilesExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantilesExact, false, void, true>;
+template <typename Value, bool _> using FuncQuantileExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantileExact, false, void, false, false>;
+template <typename Value, bool _> using FuncQuantilesExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantilesExact, false, void, true, false>;
 
 
 template <template <typename, bool> class Function>
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileExactExclusive.cpp b/src/AggregateFunctions/AggregateFunctionQuantileExactExclusive.cpp
index 0cf6d53c0c8..64ae5f01f58 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileExactExclusive.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileExactExclusive.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool _> using FuncQuantileExactExclusive = AggregateFunctionQuantile<Value, QuantileExactExclusive<Value>, NameQuantileExactExclusive, false, Float64, false>;
-template <typename Value, bool _> using FuncQuantilesExactExclusive = AggregateFunctionQuantile<Value, QuantileExactExclusive<Value>, NameQuantilesExactExclusive, false, Float64, true>;
+template <typename Value, bool _> using FuncQuantileExactExclusive = AggregateFunctionQuantile<Value, QuantileExactExclusive<Value>, NameQuantileExactExclusive, false, Float64, false, false>;
+template <typename Value, bool _> using FuncQuantilesExactExclusive = AggregateFunctionQuantile<Value, QuantileExactExclusive<Value>, NameQuantilesExactExclusive, false, Float64, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileExactHigh.cpp b/src/AggregateFunctions/AggregateFunctionQuantileExactHigh.cpp
index 820667fcfb5..a224b8fc10f 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileExactHigh.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileExactHigh.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool _> using FuncQuantileExactHigh = AggregateFunctionQuantile<Value, QuantileExactHigh<Value>, NameQuantileExactHigh, false, void, false>;
-template <typename Value, bool _> using FuncQuantilesExactHigh = AggregateFunctionQuantile<Value, QuantileExactHigh<Value>, NameQuantilesExactHigh, false, void, true>;
+template <typename Value, bool _> using FuncQuantileExactHigh = AggregateFunctionQuantile<Value, QuantileExactHigh<Value>, NameQuantileExactHigh, false, void, false, false>;
+template <typename Value, bool _> using FuncQuantilesExactHigh = AggregateFunctionQuantile<Value, QuantileExactHigh<Value>, NameQuantilesExactHigh, false, void, true, false>;
 
 
 template <template <typename, bool> class Function>
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileExactInclusive.cpp b/src/AggregateFunctions/AggregateFunctionQuantileExactInclusive.cpp
index 9d9db538e60..96703c271d8 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileExactInclusive.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileExactInclusive.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool _> using FuncQuantileExactInclusive = AggregateFunctionQuantile<Value, QuantileExactInclusive<Value>, NameQuantileExactInclusive, false, Float64, false>;
-template <typename Value, bool _> using FuncQuantilesExactInclusive = AggregateFunctionQuantile<Value, QuantileExactInclusive<Value>, NameQuantilesExactInclusive, false, Float64, true>;
+template <typename Value, bool _> using FuncQuantileExactInclusive = AggregateFunctionQuantile<Value, QuantileExactInclusive<Value>, NameQuantileExactInclusive, false, Float64, false, false>;
+template <typename Value, bool _> using FuncQuantilesExactInclusive = AggregateFunctionQuantile<Value, QuantileExactInclusive<Value>, NameQuantilesExactInclusive, false, Float64, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileExactLow.cpp b/src/AggregateFunctions/AggregateFunctionQuantileExactLow.cpp
index 5009a82134e..221281a7bb3 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileExactLow.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileExactLow.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool _> using FuncQuantileExactLow = AggregateFunctionQuantile<Value, QuantileExactLow<Value>, NameQuantileExactLow, false, void, false>;
-template <typename Value, bool _> using FuncQuantilesExactLow = AggregateFunctionQuantile<Value, QuantileExactLow<Value>, NameQuantilesExactLow, false, void, true>;
+template <typename Value, bool _> using FuncQuantileExactLow = AggregateFunctionQuantile<Value, QuantileExactLow<Value>, NameQuantileExactLow, false, void, false, false>;
+template <typename Value, bool _> using FuncQuantilesExactLow = AggregateFunctionQuantile<Value, QuantileExactLow<Value>, NameQuantilesExactLow, false, void, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp
index f917ac16b25..e5066fe9c3d 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp
@@ -209,8 +209,8 @@ struct QuantileExactWeighted
 };
 
 
-template <typename Value, bool _> using FuncQuantileExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantileExactWeighted, true, void, false>;
-template <typename Value, bool _> using FuncQuantilesExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantilesExactWeighted, true, void, true>;
+template <typename Value, bool _> using FuncQuantileExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantileExactWeighted, true, void, false, false>;
+template <typename Value, bool _> using FuncQuantilesExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantilesExactWeighted, true, void, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp b/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp
similarity index 99%
rename from src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
rename to src/AggregateFunctions/AggregateFunctionQuantileGK.cpp
index dbaac07c939..2c0b3e55136 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp
@@ -482,8 +482,8 @@ public:
 };
 
 
-template <typename Value, bool _> using FuncQuantileGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantileGK, false, void, false>;
-template <typename Value, bool _> using FuncQuantilesGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantilesGK, false, void, true>;
+template <typename Value, bool _> using FuncQuantileGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantileGK, false, void, false, true>;
+template <typename Value, bool _> using FuncQuantilesGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantilesGK, false, void, true, true>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
index 1c786a8dfef..2b8b72a549a 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
@@ -313,8 +313,8 @@ private:
 };
 
 
-template <typename Value, bool _> using FuncQuantileInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantileInterpolatedWeighted, true, void, false>;
-template <typename Value, bool _> using FuncQuantilesInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantilesInterpolatedWeighted, true, void, true>;
+template <typename Value, bool _> using FuncQuantileInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantileInterpolatedWeighted, true, void, false, false>;
+template <typename Value, bool _> using FuncQuantilesInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantilesInterpolatedWeighted, true, void, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileTDigest.cpp b/src/AggregateFunctions/AggregateFunctionQuantileTDigest.cpp
index 79e34f47319..e3896ffb0d7 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileTDigest.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileTDigest.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool float_return> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<float_return, Float32, void>, false>;
-template <typename Value, bool float_return> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<float_return, Float32, void>, true>;
+template <typename Value, bool float_return> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<float_return, Float32, void>, false, false>;
+template <typename Value, bool float_return> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<float_return, Float32, void>, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileTDigestWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileTDigestWeighted.cpp
index a9c07343fa5..56d5fd36412 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileTDigestWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileTDigestWeighted.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool float_return> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, false>;
-template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, true>;
+template <typename Value, bool float_return> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, false, false>;
+template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileTiming.cpp b/src/AggregateFunctions/AggregateFunctionQuantileTiming.cpp
index 553123f3e79..b3fbd50b6e2 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileTiming.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileTiming.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool _> using FuncQuantileTiming = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTiming, false, Float32, false>;
-template <typename Value, bool _> using FuncQuantilesTiming = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTiming, false, Float32, true>;
+template <typename Value, bool _> using FuncQuantileTiming = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTiming, false, Float32, false, false>;
+template <typename Value, bool _> using FuncQuantilesTiming = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTiming, false, Float32, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileTimingWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileTimingWeighted.cpp
index e2d7aaf7603..7bbba728313 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantileTimingWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileTimingWeighted.cpp
@@ -19,8 +19,8 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool _> using FuncQuantileTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTimingWeighted, true, Float32, false>;
-template <typename Value, bool _> using FuncQuantilesTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTimingWeighted, true, Float32, true>;
+template <typename Value, bool _> using FuncQuantileTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTimingWeighted, true, Float32, false, false>;
+template <typename Value, bool _> using FuncQuantilesTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTimingWeighted, true, Float32, true, false>;
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(

From 4c21ba7b6fa72555bcfcd6ab77645f275007a4ce Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Tue, 12 Sep 2023 06:00:40 +0000
Subject: [PATCH 0204/1097] tables auto initialize new disks without restart

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 programs/local/LocalServer.cpp                |  2 +-
 programs/server/Server.cpp                    |  2 +-
 src/Disks/StoragePolicy.cpp                   | 18 +++-
 src/Disks/StoragePolicy.h                     |  2 +-
 src/Interpreters/Context.cpp                  | 27 ++++--
 src/Interpreters/DatabaseCatalog.cpp          | 40 ++++++++-
 src/Interpreters/DatabaseCatalog.h            | 14 ++-
 src/Storages/IStorage.h                       |  3 +
 src/Storages/MergeTree/MergeTreeData.cpp      | 20 +++++
 src/Storages/MergeTree/MergeTreeData.h        |  2 +
 src/Storages/StorageDistributed.cpp           | 15 ++++
 src/Storages/StorageDistributed.h             |  2 +
 .../__init__.py                               |  0
 .../configs/storage_configuration.xml         | 21 +++++
 .../test_hot_reload_storage_policy/test.py    | 86 +++++++++++++++++++
 15 files changed, 237 insertions(+), 17 deletions(-)
 create mode 100644 tests/integration/test_hot_reload_storage_policy/__init__.py
 create mode 100644 tests/integration/test_hot_reload_storage_policy/configs/storage_configuration.xml
 create mode 100644 tests/integration/test_hot_reload_storage_policy/test.py

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 452af7a4ec0..b66b556dbca 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -763,7 +763,7 @@ void LocalServer::processConfig()
         {
             DatabaseCatalog::instance().createBackgroundTasks();
             loadMetadata(global_context);
-            DatabaseCatalog::instance().startupBackgroundCleanup();
+            DatabaseCatalog::instance().startupBackgroundTasks();
         }
 
         /// For ClickHouse local if path is not set the loader will be disabled.
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 85ae6d7796c..2b98f9e8e78 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1691,7 +1691,7 @@ try
         /// Then, load remaining databases
         loadMetadata(global_context, default_database);
         convertDatabasesEnginesIfNeed(global_context);
-        database_catalog.startupBackgroundCleanup();
+        database_catalog.startupBackgroundTasks();
         /// After loading validate that default database exists
         database_catalog.assertDatabaseExists(default_database);
         /// Load user-defined SQL functions.
diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp
index 33a8719a975..6cf22cbaa1b 100644
--- a/src/Disks/StoragePolicy.cpp
+++ b/src/Disks/StoragePolicy.cpp
@@ -9,6 +9,7 @@
 #include <Common/quoteString.h>
 #include <Common/logger_useful.h>
 
+#include <algorithm>
 #include <set>
 
 
@@ -429,10 +430,11 @@ StoragePolicySelector::StoragePolicySelector(
 }
 
 
-StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks) const
+StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks, Strings & new_disks) const
 {
     std::shared_ptr<StoragePolicySelector> result = std::make_shared<StoragePolicySelector>(config, config_prefix, disks);
-
+    std::set<String> disks_before_reload;
+    std::set<String> disks_after_reload;
     /// First pass, check.
     for (const auto & [name, policy] : policies)
     {
@@ -443,6 +445,8 @@ StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Uti
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage policy {} is missing in new configuration", backQuote(name));
 
         policy->checkCompatibleWith(result->policies[name]);
+        for (const auto & disk : policy->getDisks())
+            disks_before_reload.insert(disk->getName());
     }
 
     /// Second pass, load.
@@ -453,8 +457,18 @@ StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Uti
             result->policies[name] = policy;
         else
             result->policies[name] = std::make_shared<StoragePolicy>(policy, config, config_prefix + "." + name, disks);
+
+        for (const auto & disk : result->policies[name]->getDisks())
+            disks_after_reload.insert(disk->getName());
     }
 
+    std::set_difference(
+        disks_after_reload.begin(),
+        disks_after_reload.end(),
+        disks_before_reload.begin(),
+        disks_before_reload.end(),
+        std::back_inserter(new_disks));
+
     return result;
 }
 
diff --git a/src/Disks/StoragePolicy.h b/src/Disks/StoragePolicy.h
index 69cfb830818..d210d8c1e2f 100644
--- a/src/Disks/StoragePolicy.h
+++ b/src/Disks/StoragePolicy.h
@@ -122,7 +122,7 @@ public:
 
     StoragePolicySelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks);
 
-    StoragePolicySelectorPtr updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks) const;
+    StoragePolicySelectorPtr updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks, Strings & new_disks) const;
 
     /// Policy by name
     StoragePolicyPtr get(const String & name) const;
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 138741a2f2b..cbb99a693af 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -3768,23 +3768,33 @@ void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration
 {
     {
         std::lock_guard lock(shared->storage_policies_mutex);
+        Strings disks_to_reinit;
         if (shared->merge_tree_disk_selector)
             shared->merge_tree_disk_selector
                 = shared->merge_tree_disk_selector->updateFromConfig(config, "storage_configuration.disks", shared_from_this());
 
         if (shared->merge_tree_storage_policy_selector)
         {
-            try
+            if (shared->merge_tree_storage_policy_selector)
             {
-                shared->merge_tree_storage_policy_selector = shared->merge_tree_storage_policy_selector->updateFromConfig(
-                    config, "storage_configuration.policies", shared->merge_tree_disk_selector);
-            }
-            catch (Exception & e)
-            {
-                LOG_ERROR(
-                    shared->log, "An error has occurred while reloading storage policies, storage policies were not applied: {}", e.message());
+                try
+                {
+                    shared->merge_tree_storage_policy_selector = shared->merge_tree_storage_policy_selector->updateFromConfig(
+                        config, "storage_configuration.policies", shared->merge_tree_disk_selector, disks_to_reinit);
+                }
+                catch (Exception & e)
+                {
+                    LOG_ERROR(
+                        shared->log, "An error has occurred while reloading storage policies, storage policies were not applied: {}", e.message());
+                }
             }
         }
+
+        if (!disks_to_reinit.empty())
+        {
+            LOG_DEBUG(shared->log, "Reloading disks: {}", fmt::join(disks_to_reinit, ", "));
+            DatabaseCatalog::instance().triggerReloadDisksTask(disks_to_reinit);
+        }
     }
 
     {
@@ -3792,6 +3802,7 @@ void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration
         if (shared->storage_s3_settings)
             shared->storage_s3_settings->loadFromConfig("s3", config, getSettingsRef());
     }
+
 }
 
 
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 2e82ff464d2..50b0e5d4d1e 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -1,4 +1,5 @@
 #include <string>
+#include <mutex>
 #include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/loadMetadata.h>
@@ -200,11 +201,14 @@ void DatabaseCatalog::createBackgroundTasks()
         cleanup_task = std::make_unique<BackgroundSchedulePoolTaskHolder>(std::move(cleanup_task_holder));
     }
 
-    auto task_holder = getContext()->getSchedulePool().createTask("DatabaseCatalog", [this](){ this->dropTableDataTask(); });
-    drop_task = std::make_unique<BackgroundSchedulePoolTaskHolder>(std::move(task_holder));
+    auto drop_task_holder = getContext()->getSchedulePool().createTask("DatabaseCatalog", [this](){ this->dropTableDataTask(); });
+    drop_task = std::make_unique<BackgroundSchedulePoolTaskHolder>(std::move(drop_task_holder));
+
+    auto reload_disks_task_holder = getContext()->getSchedulePool().createTask("DatabaseCatalog", [this](){ this->reloadDisksTask(); });
+    reload_disks_task = std::make_unique<BackgroundSchedulePoolTaskHolder>(std::move(reload_disks_task_holder));
 }
 
-void DatabaseCatalog::startupBackgroundCleanup()
+void DatabaseCatalog::startupBackgroundTasks()
 {
     /// And it has to be done after all databases are loaded, otherwise cleanup_task may remove something that should not be removed
     if (cleanup_task)
@@ -1576,6 +1580,30 @@ bool DatabaseCatalog::maybeRemoveDirectory(const String & disk_name, const DiskP
     }
 }
 
+void DatabaseCatalog::reloadDisksTask()
+{
+    std::set<String> disks;
+    {
+        std::lock_guard lock{reload_disks_mutex};
+        disks.swap(disks_to_reload);
+    }
+
+    for (auto & database : getDatabases())
+    {
+        auto it = database.second->getTablesIterator(getContext());
+        while (it->isValid())
+        {
+            auto table = it->table();
+            table->initializeDiskOnConfigChange(disks);
+            it->next();
+        }
+    }
+
+    std::lock_guard lock{reload_disks_mutex};
+    if (!disks_to_reload.empty()) /// during reload, another disks configuration change
+        (*reload_disks_task)->scheduleAfter(default_reload_period_sec * 1000);
+}
+
 static void maybeUnlockUUID(UUID uuid)
 {
     if (uuid == UUIDHelpers::Nil)
@@ -1659,4 +1687,10 @@ DDLGuard::~DDLGuard()
     releaseTableLock();
 }
 
+void DatabaseCatalog::triggerReloadDisksTask(const Strings & new_added_disks)
+{
+    std::lock_guard lock{reload_disks_mutex};
+    disks_to_reload.insert(new_added_disks.begin(), new_added_disks.end());
+    (*reload_disks_task)->schedule();
+}
 }
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index edf1036b438..8e81358e589 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -166,7 +166,7 @@ public:
 
     void createBackgroundTasks();
     void initializeAndLoadTemporaryDatabase();
-    void startupBackgroundCleanup();
+    void startupBackgroundTasks();
     void loadMarkedAsDroppedTables();
 
     /// Get an object that protects the table from concurrently executing multiple DDL operations.
@@ -286,6 +286,9 @@ public:
         std::lock_guard lock(tables_marked_dropped_mutex);
         return tables_marked_dropped;
     }
+
+    void triggerReloadDisksTask(const Strings & new_added_disks);
+
 private:
     // The global instance of database catalog. unique_ptr is to allow
     // deferred initialization. Thought I'd use std::optional, but I can't
@@ -319,6 +322,8 @@ private:
     void cleanupStoreDirectoryTask();
     bool maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir);
 
+    void reloadDisksTask();
+
     static constexpr size_t reschedule_time_ms = 100;
 
     mutable std::mutex databases_mutex;
@@ -380,8 +385,15 @@ private:
 
     static constexpr time_t default_drop_error_cooldown_sec = 5;
     time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec;
+
+    std::unique_ptr<BackgroundSchedulePoolTaskHolder> reload_disks_task;
+    std::mutex reload_disks_mutex;
+    std::set<String> disks_to_reload;
+    static constexpr time_t default_reload_period_sec = 5;
 };
 
+
+
 /// This class is useful when creating a table or database.
 /// Usually we create IStorage/IDatabase object first and then add it to IDatabase/DatabaseCatalog.
 /// But such object may start using a directory in store/ since its creation.
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 803ab5e92ba..1d6672fc7c6 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -714,6 +714,9 @@ public:
         return getStorageSnapshot(metadata_snapshot, query_context);
     }
 
+    /// Re initialize disks in case the underlying storage policy changed
+    virtual bool initializeDiskOnConfigChange(const std::set<String> & /*new_added_disks*/) { return true; }
+
     /// A helper to implement read()
     static void readFromPipe(
         QueryPlan & query_plan,
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index aa0b6b2ff37..118a971a71e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -8191,4 +8191,24 @@ CurrentlySubmergingEmergingTagger::~CurrentlySubmergingEmergingTagger()
     storage.currently_emerging_big_parts.erase(emerging_part_name);
 }
 
+bool MergeTreeData::initializeDiskOnConfigChange(const std::set<String> & new_added_disks)
+{
+    auto storage_policy = getStoragePolicy();
+    const auto format_version_path = fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME;
+    for (const auto & name : new_added_disks)
+    {
+        auto disk = storage_policy->tryGetDiskByName(name);
+        if (disk)
+        {
+            disk->createDirectories(relative_data_path);
+            disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME);
+            auto buf = disk->writeFile(format_version_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, getContext()->getWriteSettings());
+            writeIntText(format_version.toUnderType(), *buf);
+            buf->finalize();
+            if (getContext()->getSettingsRef().fsync_metadata)
+                buf->sync();
+        }
+    }
+    return true;
+}
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index aab04260b0e..95cc9907236 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1093,6 +1093,8 @@ public:
         const SelectQueryInfo & query_info,
         const ActionDAGNodes & added_filter_nodes) const;
 
+    bool initializeDiskOnConfigChange(const std::set<String> & /*new_added_disks*/) override;
+
 protected:
     friend class IMergeTreeDataPart;
     friend class MergeTreeDataMergerMutator;
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 7705d0f193f..b4a9f252554 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1875,4 +1875,19 @@ void registerStorageDistributed(StorageFactory & factory)
     });
 }
 
+bool StorageDistributed::initializeDiskOnConfigChange(const std::set<String> & new_added_disks)
+{
+    if (!data_volume)
+        return true;
+
+    for (auto & disk : data_volume->getDisks())
+    {
+        if (new_added_disks.contains(disk->getName()))
+        {
+            initializeDirectoryQueuesForDisk(disk);
+        }
+    }
+
+    return true;
+}
 }
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 520e1445d09..fdf57410af4 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -159,6 +159,8 @@ public:
     /// Used by ClusterCopier
     size_t getShardCount() const;
 
+    bool initializeDiskOnConfigChange(const std::set<String> & new_added_disks) override;
+
 private:
     void renameOnDisk(const String & new_path_to_table_data);
 
diff --git a/tests/integration/test_hot_reload_storage_policy/__init__.py b/tests/integration/test_hot_reload_storage_policy/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_hot_reload_storage_policy/configs/storage_configuration.xml b/tests/integration/test_hot_reload_storage_policy/configs/storage_configuration.xml
new file mode 100644
index 00000000000..466ecde137d
--- /dev/null
+++ b/tests/integration/test_hot_reload_storage_policy/configs/storage_configuration.xml
@@ -0,0 +1,21 @@
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <disk0>
+                <path>/var/lib/clickhouse/disk0/</path>
+            </disk0>
+            <disk1>
+                <path>/var/lib/clickhouse/disk1/</path>
+            </disk1>
+        </disks>
+        <policies>
+            <default_policy>
+                <volumes>
+                    <default_volume>
+                        <disk>disk0</disk>
+                    </default_volume>
+                </volumes>
+            </default_policy>
+        </policies>
+    </storage_configuration>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hot_reload_storage_policy/test.py b/tests/integration/test_hot_reload_storage_policy/test.py
new file mode 100644
index 00000000000..8654b0462e4
--- /dev/null
+++ b/tests/integration/test_hot_reload_storage_policy/test.py
@@ -0,0 +1,86 @@
+import os
+import sys
+import time
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+
+cluster = ClickHouseCluster(__file__)
+node0 = cluster.add_instance(
+    "node0", with_zookeeper=True, main_configs=["configs/storage_configuration.xml"]
+)
+node1 = cluster.add_instance(
+    "node1", with_zookeeper=True, main_configs=["configs/storage_configuration.xml"]
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+new_disk_config = """
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <disk0>
+                <path>/var/lib/clickhouse/disk0/</path>
+            </disk0>
+            <disk1>
+                <path>/var/lib/clickhouse/disk1/</path>
+            </disk1>
+            <disk2>
+                <path>/var/lib/clickhouse/disk2/</path>
+            </disk2>
+        </disks>
+        <policies>
+            <default_policy>
+                <volumes>
+                    <default_volume>
+                        <disk>disk2</disk>
+                        <disk>disk1</disk>
+                        <disk>disk0</disk>
+                    </default_volume>
+                </volumes>
+            </default_policy>
+        </policies>
+    </storage_configuration>
+</clickhouse>
+"""
+
+
+def set_config(node, config):
+    node.replace_config(
+        "/etc/clickhouse-server/config.d/storage_configuration.xml", config
+    )
+    node.query("SYSTEM RELOAD CONFIG")
+
+
+def test_hot_reload_policy(started_cluster):
+    node0.query(
+        "CREATE TABLE t (d Int32, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/t', '0') PARTITION BY d ORDER BY tuple() SETTINGS storage_policy = 'default_policy'"
+    )
+    node0.query("INSERT INTO TABLE t VALUES (1, 'foo') (1, 'bar')")
+
+    node1.query(
+        "CREATE TABLE t (d Int32, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/t_mirror', '1') PARTITION BY d ORDER BY tuple() SETTINGS storage_policy = 'default_policy'"
+    )
+    set_config(node1, new_disk_config)
+    time.sleep(1)
+    node1.query("ALTER TABLE t FETCH PARTITION 1 FROM '/clickhouse/tables/t'")
+    result = int(node1.query("SELECT count() FROM t"))
+    assert (
+        result == 4,
+        "Node should have 2 x full data (4 rows) after reloading storage configuration and fetch new partition, but get {} rows".format(
+            result
+        ),
+    )

From d900f8a352ff263b75e9c9a2df68a7f59b721943 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Mon, 6 Nov 2023 15:18:25 +0300
Subject: [PATCH 0205/1097] merge_row_policy: aliases slightly work

---
 src/Storages/StorageMerge.cpp                 | 108 +++++++++++++-----
 src/Storages/StorageMerge.h                   |   2 +
 src/TableFunctions/TableFunctionMerge.cpp     |   2 +-
 .../02763_row_policy_storage_merge.sql.j2     |  32 ++++++
 4 files changed, 117 insertions(+), 27 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 7df8b8cc6c1..8002a5f4b8f 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -403,6 +403,8 @@ private:
     ExpressionActionsPtr filter_actions;
 };
 
+// using RowPolicyDataPtr = std::unique_ptr<ReadFromMerge::RowPolicyData>;
+
 ReadFromMerge::ReadFromMerge(
     Block common_header_,
     StorageListWithLocks selected_tables_,
@@ -492,6 +494,22 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
         if (sampling_requested && !storage->supportsSampling())
             throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, "Illegal SAMPLE: table doesn't support sampling");
 
+        const auto & [database_name, _, _b, table_name] = table;
+
+        std::unique_ptr<RowPolicyData> row_policy_data_ptr;
+
+        auto row_policy_filter_ptr = context->getRowPolicyFilter(
+          database_name,
+          table_name,
+          RowPolicyFilterType::SELECT_FILTER);
+        if (row_policy_filter_ptr)
+        {
+            row_policy_data_ptr = std::make_unique<RowPolicyData>(row_policy_filter_ptr, storage, context);
+            row_policy_data_ptr->extendNames(column_names);
+        }
+
+
+
         Aliases aliases;
         auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
         auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, context);
@@ -529,6 +547,11 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
 
                         auto type = sample_block.getByName(column).type;
                         aliases.push_back({ .name = column, .type = type, .expression = column_expr->clone() });
+
+                        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
+                            "adding new alias name {}, expression {}",
+                            column, column_expr->formatForLogging());
+
                     }
                     else
                         column_expr = std::make_shared<ASTIdentifier>(column);
@@ -542,6 +565,12 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
                 auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActionsDAG(true);
 
                 column_names_as_aliases = alias_actions->getRequiredColumns().getNames();
+                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
+                    "alias_actions->getRequiredColumns: {}", alias_actions->getRequiredColumns().toString());
+
+                // if (row_policy_data_ptr)
+                //     row_policy_data_ptr->extendNames(column_names_as_aliases);
+
                 if (column_names_as_aliases.empty())
                     column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
             }
@@ -556,6 +585,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
             aliases,
             table,
             column_names_as_aliases.empty() ? column_names : column_names_as_aliases,
+            std::move(row_policy_data_ptr),
             context,
             current_streams);
 
@@ -657,6 +687,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
     const Aliases & aliases,
     const StorageWithLockAndName & storage_with_lock,
     Names real_column_names,
+    std::unique_ptr<RowPolicyData> row_policy_data_ptr,
     ContextMutablePtr modified_context,
     size_t streams_num,
     bool concat_streams)
@@ -679,7 +710,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         storage_snapshot,
         modified_query_info);
 
-    std::optional<RowPolicyData> row_policy_data;
+    // std::optional<RowPolicyData> row_policy_data;
 
     if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns))
     {
@@ -692,15 +723,15 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
         if (!view || allow_experimental_analyzer)
         {
-            auto row_policy_filter_ptr = modified_context->getRowPolicyFilter(
-                    database_name,
-                    table_name,
-                    RowPolicyFilterType::SELECT_FILTER);
-            if (row_policy_filter_ptr)
-            {
-                row_policy_data.emplace(row_policy_filter_ptr, storage, modified_context);
-                row_policy_data->extendNames(real_column_names);
-            }
+            // auto row_policy_filter_ptr = modified_context->getRowPolicyFilter(
+            //         database_name,
+            //         table_name,
+            //         RowPolicyFilterType::SELECT_FILTER);
+            // if (row_policy_data_ptr)
+            // {
+            //    row_policy_data.emplace(row_policy_filter_ptr, storage, modified_context);
+            //    row_policy_data_ptr->extendNames(real_column_names);
+            // }
 
             storage->read(plan,
                 real_column_names,
@@ -732,11 +763,11 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         if (!plan.isInitialized())
             return {};
 
-        if (row_policy_data)
+        if (row_policy_data_ptr)
         {
             if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
             {
-                row_policy_data->addStorageFilter(source_step_with_filter);
+                row_policy_data_ptr->addStorageFilter(source_step_with_filter);
             }
         }
 
@@ -833,14 +864,15 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             });
         }
 
-        if (row_policy_data)
-        {
-            row_policy_data->addFilterTransform(*builder);
-        }
+        // if (row_policy_data_ptr)
+        // {
+        //     row_policy_data_ptr->addFilterTransform(*builder);
+        // }
 
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
-        convertingSourceStream(header, storage_snapshot->metadata, aliases, modified_context, *builder, processed_stage);
+        convertingSourceStream(header, storage_snapshot->metadata, aliases, std::move(row_policy_data_ptr), modified_context, *builder, processed_stage);
+
     }
 
     return builder;
@@ -851,15 +883,16 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
     ContextPtr local_context)
     : row_policy_filter_ptr(row_policy_filter_ptr_)
 {
-    assert(row_policy_filter_ptr_);
+    auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
+    auto storage_columns = storage_metadata_snapshot->getColumns();
+    auto needed_columns = storage_columns.getAll/*Physical*/();
 
     ASTPtr expr = row_policy_filter_ptr->expression;
 
-    auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
-    auto storage_columns = storage_metadata_snapshot->getColumns();
-    auto needed_columns = storage_columns.getAllPhysical();
-
-    auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns);
+    auto syntax_result = TreeRewriter(local_context).analyze(expr,
+      needed_columns /*,
+      storage,
+      storage->getStorageSnapshot(storage_metadata_snapshot, local_context)*/);
     auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context};
 
     actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */);
@@ -894,12 +927,18 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names)
     if (!added_names.empty())
     {
         std::copy(added_names.begin(), added_names.end(), std::back_inserter(names));
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::extendNames"),
+            "{} names added", added_names.size());
+        // for (const auto & added_name : added_names)
+        // {
+        //     LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::extendNames"),
+        //         "  added name {}", added_name);
+        // }
     }
 }
 
 void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step)
 {
-    assert(row_policy_filter_ptr);
     LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::addStorageFilter"), "filter_actions_dag: {},<> {}, <> {}",
         filter_actions->getActionsDAG().dumpNames(),
         filter_actions->getActionsDAG().dumpDAG(),
@@ -910,8 +949,6 @@ void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step)
 
 void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & builder)
 {
-    assert(row_policy_filter_ptr);
-
     LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::addFilterTransform"), "filter_actions_dag: {},<> {}, <> {}",
         filter_actions->getActionsDAG().dumpNames(),
         filter_actions->getActionsDAG().dumpDAG(),
@@ -919,6 +956,10 @@ void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & bui
 
     builder.addSimpleTransform([&](const Block & stream_header)
     {
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::addFilterTransform"),
+            "stream_header.dumpNames {}", stream_header.dumpNames());
+
+
         return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_column_name, true /* remove filter column */);
     });
 }
@@ -1097,6 +1138,7 @@ void ReadFromMerge::convertingSourceStream(
     const Block & header,
     const StorageMetadataPtr & metadata_snapshot,
     const Aliases & aliases,
+    std::unique_ptr<RowPolicyData> row_policy_data_ptr,
     ContextPtr local_context,
     QueryPipelineBuilder & builder,
     QueryProcessingStage::Enum processed_stage)
@@ -1123,11 +1165,24 @@ void ReadFromMerge::convertingSourceStream(
         });
     }
 
+
+
     ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name;
 
     if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)
         convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position;
 
+    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"),
+        "builder.getHeader(): {}, header.getColumnsWithTypeAndName: {}",
+        builder.getHeader().dumpStructure(),
+        header.dumpStructure());
+
+
+    if (row_policy_data_ptr)
+    {
+        row_policy_data_ptr->addFilterTransform(builder);
+    }
+
     auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(),
                                                                 header.getColumnsWithTypeAndName(),
                                                                 convert_actions_match_columns_mode);
@@ -1139,6 +1194,7 @@ void ReadFromMerge::convertingSourceStream(
     {
         return std::make_shared<ExpressionTransform>(stream_header, actions);
     });
+
 }
 
 bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 04c5a70651f..80c5790d319 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -193,6 +193,7 @@ private:
         const Aliases & aliases,
         const StorageWithLockAndName & storage_with_lock,
         Names real_column_names,
+        std::unique_ptr<RowPolicyData> row_policy_data_ptr,
         ContextMutablePtr modified_context,
         size_t streams_num,
         bool concat_streams = false);
@@ -201,6 +202,7 @@ private:
         const Block & header,
         const StorageMetadataPtr & metadata_snapshot,
         const Aliases & aliases,
+        std::unique_ptr<RowPolicyData> row_policy_data_ptr,
         ContextPtr context,
         QueryPipelineBuilder & builder,
         QueryProcessingStage::Enum processed_stage);
diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp
index 599953a1add..5231c651f17 100644
--- a/src/TableFunctions/TableFunctionMerge.cpp
+++ b/src/TableFunctions/TableFunctionMerge.cpp
@@ -126,7 +126,7 @@ ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr contex
         {
             auto storage = DatabaseCatalog::instance().tryGetTable(StorageID{db_with_tables.first, table}, context);
             if (storage)
-                return ColumnsDescription{storage->getInMemoryMetadataPtr()->getColumns().getAllPhysical()};
+              return ColumnsDescription{storage->getInMemoryMetadataPtr()->getColumns().getAllPhysical()};  // !!!
         }
     }
 
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
index 9b3197cc34f..0bc1292d4a5 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -3,6 +3,7 @@ DROP TABLE IF EXISTS 02763_merge_log_2;
 DROP TABLE IF EXISTS 02763_merge_merge_1;
 DROP TABLE IF EXISTS 02763_merge_merge_2;
 DROP TABLE IF EXISTS 02763_merge_fancycols;
+DROP TABLE IF EXISTS 02763_merge_aliases;
 DROP ROW POLICY IF EXISTS 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY IF EXISTS 02763_filter_2 ON 02763_merge_merge_1;
 DROP ROW POLICY IF EXISTS 02763_filter_3 ON 02763_merge_log_1;
@@ -17,6 +18,8 @@ CREATE TABLE 02763_merge_log_2 (x UInt8, y UInt64) ENGINE = Log;
 CREATE TABLE 02763_merge_merge_1 (x UInt8, y UInt64) ENGINE = MergeTree ORDER BY x;
 CREATE TABLE 02763_merge_merge_2 (x UInt8, y UInt64) ENGINE = MergeTree ORDER BY x;
 
+CREATE TABLE 02763_engine_merge_12 (x UInt8, y UInt64) ENGINE = Merge(currentDatabase(), '02763_merge_merge');
+
 INSERT INTO 02763_merge_log_1 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
 INSERT INTO 02763_merge_log_2 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
 INSERT INTO 02763_merge_merge_1 VALUES (1, 11), (2, 12), (3, 13), (4, 14);
@@ -56,6 +59,10 @@ SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>2';
 SELECT * FROM merge(currentDatabase(), '02763_merge_merge') WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
 
+SELECT 'SELECT * FROM engine_merge_12 WHERE x>2';
+SELECT * FROM 02763_engine_merge_12 WHERE x>2 ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge)';
 SELECT * FROM merge(currentDatabase(), '02763_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge) WHERE x>2';
@@ -125,6 +132,29 @@ SELECT x, y from merge(currentDatabase(), '02763_merge') ORDER BY x;
 
 DROP TABLE 02763_merge_fancycols;
 
+SELECT '02763_merge_aliases';
+CREATE TABLE 02763_alias (x UInt8, y UInt64, z UInt64 ALIAS plus(x,y)) ENGINE = MergeTree ORDER BY x;
+INSERT INTO 02763_alias VALUES (1, 11), (2, 12), (3, 13), (4, 14);
+
+CREATE ROW POLICY 02763_filter_7 ON 02763_alias USING z>15 AS permissive TO ALL;
+
+-- SELECT 'SELECT * FROM 02763_alias ORDER BY x';
+-- SELECT x, y, z FROM 02763_alias ORDER BY x;
+
+CREATE TABLE 02763_a_merge (x UInt8, y UInt64, z UInt64) ENGINE = Merge(currentDatabase(), '02763_alias');
+
+
+-- SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_alias) ORDER BY x';
+-- SELECT * FROM merge(currentDatabase(), '02763_merge_alias') ORDER BY x;
+
+SELECT 'x, y, z FROM 02763_a_merge ORDER BY x';
+SELECT x, y, z FROM 02763_a_merge ORDER BY x;
+-- SELECT 'SELECT x, y, z FROM merge(currentDatabase(), 02763_merge_alias) ORDER BY x';
+-- SELECT x, y, z FROM merge(currentDatabase(), '02763_merge_alias') ORDER BY x;
+
+DROP TABLE 02763_alias;
+DROP TABLE 02763_a_merge;
+
 DROP ROW POLICY 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY 02763_filter_2 ON 02763_merge_merge_1;
 
@@ -134,4 +164,6 @@ DROP ROW POLICY 02763_filter_4 ON 02763_merge_merge_1;
 DROP ROW POLICY 02763_filter_5 ON 02763_merge_fancycols;
 DROP ROW POLICY 02763_filter_6 ON 02763_merge_fancycols;
 
+DROP ROW POLICY 02763_filter_7 ON 02763_alias;
+
 {% endfor %}

From a906f885211051e483d40f19288111104b11c141 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 16:23:16 +0100
Subject: [PATCH 0206/1097] Fix test

---
 tests/queries/0_stateless/02911_system_symbols.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02911_system_symbols.sql b/tests/queries/0_stateless/02911_system_symbols.sql
index 398ce77fbd4..d4195c1937e 100644
--- a/tests/queries/0_stateless/02911_system_symbols.sql
+++ b/tests/queries/0_stateless/02911_system_symbols.sql
@@ -1 +1 @@
-SELECT demangle(symbol) AS x FROM system.symbols WHERE symbol LIKE '%StorageSystemSymbols%' ORDER BY x LIMIT 1 SETTINGS allow_introspection_functions = 1;
+SELECT x FROM (SELECT demangle(symbol) AS x FROM system.symbols WHERE symbol LIKE '%StorageSystemSymbols%') WHERE x LIKE '%DB::StorageSystemSymbols::StorageSystemSymbols%' ORDER BY x LIMIT 1 SETTINGS allow_introspection_functions = 1;

From 145c37f3428b399956fbef9e851bc4d282dc7cba Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 16:28:05 +0100
Subject: [PATCH 0207/1097] Fix tidy

---
 src/AggregateFunctions/AggregateFunctionGroupArray.cpp      | 2 +-
 src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp   | 2 +-
 .../AggregateFunctionSequenceNextNode.cpp                   | 2 +-
 src/Functions/array/arrayElement.cpp                        | 6 +++---
 src/Functions/snowflake.cpp                                 | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 89ced95a414..f0e278f9f76 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -57,7 +57,7 @@ struct GroupArrayTrait
 };
 
 template <typename Trait>
-static constexpr const char * getNameByTrait()
+constexpr const char * getNameByTrait()
 {
     if (Trait::last)
         return "groupArrayLast";
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
index 4238bd2a650..ff9259e3aac 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
@@ -45,7 +45,7 @@ struct ComparePairFirst final
     }
 };
 
-static constexpr size_t max_events = 32;
+constexpr size_t max_events = 32;
 
 template <typename T>
 struct AggregateFunctionSequenceMatchData final
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
index 3ef1102505a..3bbd00f0662 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
@@ -56,7 +56,7 @@ enum SequenceBase
 };
 
 /// This is for security
-static const UInt64 max_node_size_deserialize = 0xFFFFFF;
+const UInt64 max_node_size_deserialize = 0xFFFFFF;
 
 /// NodeBase used to implement a linked list for storage of SequenceNextNodeImpl
 template <typename Node, size_t MaxEventsSize>
diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp
index 7025bc07ed3..e628cd87164 100644
--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@@ -902,7 +902,7 @@ void FunctionArrayElement::executeMatchConstKeyToIndex(
 }
 
 template <typename F>
-static bool castColumnString(const IColumn * column, F && f)
+bool castColumnString(const IColumn * column, F && f)
 {
     return castTypeToEither<ColumnString, ColumnFixedString>(column, std::forward<F>(f));
 }
@@ -945,13 +945,13 @@ bool FunctionArrayElement::matchKeyToIndexString(
 }
 
 template <typename FromType, typename ToType>
-static constexpr bool areConvertibleTypes =
+constexpr bool areConvertibleTypes =
     std::is_same_v<FromType, ToType>
         || (is_integer<FromType> && is_integer<ToType>
             && std::is_convertible_v<FromType, ToType>);
 
 template <typename F>
-static bool castColumnNumeric(const IColumn * column, F && f)
+bool castColumnNumeric(const IColumn * column, F && f)
 {
     return castTypeToEither<
         ColumnVector<UInt8>,
diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp
index 989feecb9b4..6aafa2cb5cf 100644
--- a/src/Functions/snowflake.cpp
+++ b/src/Functions/snowflake.cpp
@@ -28,8 +28,8 @@ namespace
  * https://blog.twitter.com/engineering/en_us/a/2010/announcing-snowflake
  * https://ws-dl.blogspot.com/2019/08/2019-08-03-tweetedat-finding-tweet.html
 */
-static constexpr size_t snowflake_epoch = 1288834974657L;
-static constexpr int time_shift = 22;
+constexpr size_t snowflake_epoch = 1288834974657L;
+constexpr int time_shift = 22;
 
 class FunctionDateTimeToSnowflake : public IFunction
 {

From 66b180d751fb9e288411a5b73865c501aceb290a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 16:31:02 +0100
Subject: [PATCH 0208/1097] Fix FreeBSD

---
 src/Storages/System/StorageSystemSymbols.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemSymbols.cpp b/src/Storages/System/StorageSystemSymbols.cpp
index 62d8b7ca58f..535eaf86c19 100644
--- a/src/Storages/System/StorageSystemSymbols.cpp
+++ b/src/Storages/System/StorageSystemSymbols.cpp
@@ -1,5 +1,4 @@
 #include <Columns/ColumnString.h>
-#include <Columns/ColumnVector.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Storages/System/StorageSystemSymbols.h>
@@ -102,10 +101,14 @@ Pipe StorageSystemSymbols::read(
     Block sample_block = storage_snapshot->metadata->getSampleBlock();
     auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names);
 
+#if defined(__ELF__) && !defined(OS_FREEBSD)
     const auto & symbols = SymbolIndex::instance().symbols();
 
     return Pipe(std::make_shared<SymbolsBlockSource>(
         symbols.cbegin(), symbols.cend(), std::move(columns_mask), std::move(res_block), max_block_size));
+#else
+    return Pipe(std::make_shared<NullSource>(std::move(res_block)));
+#endif
 }
 
 }

From a52e22a8580f3d780a7271b5acc672b01b8ca896 Mon Sep 17 00:00:00 2001
From: edef <edef@edef.eu>
Date: Mon, 30 Oct 2023 08:08:10 +0000
Subject: [PATCH 0209/1097] Support reading arrow::LargeListArray

---
 .../Formats/Impl/ArrowColumnToCHColumn.cpp    | 33 +++++++++++++++----
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index cb24554f9f4..50e9ece0399 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -436,6 +436,22 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr<arrow::ChunkedArray>
     return nullmap_column;
 }
 
+template <typename T>
+struct ArrowOffsetArray;
+
+template <>
+struct ArrowOffsetArray<arrow::ListArray>
+{
+    using type = arrow::Int32Array;
+};
+
+template <>
+struct ArrowOffsetArray<arrow::LargeListArray>
+{
+    using type = arrow::Int64Array;
+};
+
+template <typename ArrowListArray>
 static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
 {
     auto offsets_column = ColumnUInt64::create();
@@ -444,9 +460,9 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedAr
 
     for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
     {
-        arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
+        ArrowListArray & list_chunk = dynamic_cast<ArrowListArray &>(*(arrow_column->chunk(chunk_i)));
         auto arrow_offsets_array = list_chunk.offsets();
-        auto & arrow_offsets = dynamic_cast<arrow::Int32Array &>(*arrow_offsets_array);
+        auto & arrow_offsets = dynamic_cast<ArrowOffsetArray<ArrowListArray>::type &>(*arrow_offsets_array);
 
         /*
          * CH uses element size as "offsets", while arrow uses actual offsets as offsets.
@@ -602,13 +618,14 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr<arrow::ChunkedArray>
     }
 }
 
+template <typename ArrowListArray>
 static std::shared_ptr<arrow::ChunkedArray> getNestedArrowColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
 {
     arrow::ArrayVector array_vector;
     array_vector.reserve(arrow_column->num_chunks());
     for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
     {
-        arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
+        ArrowListArray & list_chunk = dynamic_cast<ArrowListArray &>(*(arrow_column->chunk(chunk_i)));
 
         /*
          * It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks.
@@ -819,12 +836,12 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                     key_type_hint = map_type_hint->getKeyType();
                 }
             }
-            auto arrow_nested_column = getNestedArrowColumn(arrow_column);
+            auto arrow_nested_column = getNestedArrowColumn<arrow::ListArray>(arrow_column);
             auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint, true);
             if (skipped)
                 return {};
 
-            auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
+            auto offsets_column = readOffsetsFromArrowListColumn<arrow::ListArray>(arrow_column);
 
             const auto * tuple_column = assert_cast<const ColumnTuple *>(nested_column.column.get());
             const auto * tuple_type = assert_cast<const DataTypeTuple *>(nested_column.type.get());
@@ -846,7 +863,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
             return {std::move(map_column), std::move(map_type), column_name};
         }
         case arrow::Type::LIST:
+        case arrow::Type::LARGE_LIST:
         {
+            bool is_large = arrow_column->type()->id() == arrow::Type::LARGE_LIST;
             DataTypePtr nested_type_hint;
             if (type_hint)
             {
@@ -854,11 +873,11 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                 if (array_type_hint)
                     nested_type_hint = array_type_hint->getNestedType();
             }
-            auto arrow_nested_column = getNestedArrowColumn(arrow_column);
+            auto arrow_nested_column = is_large ? getNestedArrowColumn<arrow::LargeListArray>(arrow_column) : getNestedArrowColumn<arrow::ListArray>(arrow_column);
             auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint);
             if (skipped)
                 return {};
-            auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
+            auto offsets_column = is_large ? readOffsetsFromArrowListColumn<arrow::LargeListArray>(arrow_column) : readOffsetsFromArrowListColumn<arrow::ListArray>(arrow_column);
             auto array_column = ColumnArray::create(nested_column.column, offsets_column);
             auto array_type = std::make_shared<DataTypeArray>(nested_column.type);
             return {std::move(array_column), std::move(array_type), column_name};

From 9e6dee93f36e1d5efcec65b322de077ccc553b52 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 23:26:11 +0100
Subject: [PATCH 0210/1097] Fix FreeBSD

---
 src/Storages/System/StorageSystemSymbols.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Storages/System/StorageSystemSymbols.cpp b/src/Storages/System/StorageSystemSymbols.cpp
index 535eaf86c19..6cffd8a7a66 100644
--- a/src/Storages/System/StorageSystemSymbols.cpp
+++ b/src/Storages/System/StorageSystemSymbols.cpp
@@ -28,6 +28,8 @@ StorageSystemSymbols::StorageSystemSymbols(const StorageID & table_id_)
 }
 
 
+#if defined(__ELF__) && !defined(OS_FREEBSD)
+
 namespace
 {
 
@@ -85,6 +87,8 @@ protected:
 
 }
 
+#endif
+
 
 Pipe StorageSystemSymbols::read(
     const Names & column_names,

From d01108a2b2b1d10817addf538421e0dca270b885 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 23:34:21 +0100
Subject: [PATCH 0211/1097] Fix tidy

---
 src/Functions/FunctionJoinGet.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp
index 83fa4871e39..96ab5a451ea 100644
--- a/src/Functions/FunctionJoinGet.cpp
+++ b/src/Functions/FunctionJoinGet.cpp
@@ -18,12 +18,12 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
-namespace
-{
-
 class HashJoin;
 using StorageJoinPtr = std::shared_ptr<StorageJoin>;
 
+namespace
+{
+
 template <bool or_null>
 class ExecutableFunctionJoinGet final : public IExecutableFunction, WithContext
 {

From e767df53fdc6e9aa5545087dd2cf65b76c2ffa78 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 23:40:35 +0100
Subject: [PATCH 0212/1097] Fix tidy

---
 .../AggregateFunctionLargestTriangleThreeBuckets.cpp        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
index f5751fcaa51..7b0f0756cd1 100644
--- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
+++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
@@ -277,11 +277,11 @@ public:
         auto column_x_adder_func = getColumnAdderFunc(x_type);
         auto column_y_adder_func = getColumnAdderFunc(y_type);
 
-        for (size_t i = 0; i < res.size(); ++i)
+        for (const auto & elem : res)
         {
             auto & column_tuple = assert_cast<ColumnTuple &>(col.getData());
-            column_x_adder_func(column_tuple.getColumn(0), res[i].first);
-            column_y_adder_func(column_tuple.getColumn(1), res[i].second);
+            column_x_adder_func(column_tuple.getColumn(0), elem.first);
+            column_y_adder_func(column_tuple.getColumn(1), elem.second);
         }
 
         col_offsets.getData().push_back(col.getData().size());

From a16b5c111966bca8e978f3778664ebb7d65a7555 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 23:40:53 +0100
Subject: [PATCH 0213/1097] Fix whitespace

---
 .../AggregateFunctionLargestTriangleThreeBuckets.cpp             | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
index 7b0f0756cd1..724a8152435 100644
--- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
+++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
@@ -331,7 +331,6 @@ createAggregateFunctionLargestTriangleThreeBuckets(const std::string & name, con
 {
     assertBinary(name, argument_types);
 
-
     if (!(isNumber(argument_types[0]) || isDateOrDate32(argument_types[0]) || isDateTime(argument_types[0])
           || isDateTime64(argument_types[0])))
         throw Exception(

From 571649e2bbe2e235d8af3b148444fd7c045b9076 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 23:45:07 +0100
Subject: [PATCH 0214/1097] Fix a bug

---
 .../AggregateFunctionSumMap.cpp               | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index 6b9666b8923..da00cc799d7 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -79,7 +79,7 @@ public:
 
     AggregateFunctionMapBase(const DataTypePtr & keys_type_,
             const DataTypes & values_types_, const DataTypes & argument_types_)
-        : Base(argument_types_, {} /* parameters */, createResultType(keys_type_, values_types_, getName()))
+        : Base(argument_types_, {} /* parameters */, createResultType(keys_type_, values_types_))
         , keys_type(keys_type_)
         , keys_serialization(keys_type->getDefaultSerialization())
         , values_types(values_types_)
@@ -118,8 +118,7 @@ public:
 
     static DataTypePtr createResultType(
         const DataTypePtr & keys_type_,
-        const DataTypes & values_types_,
-        const String & name_)
+        const DataTypes & values_types_)
     {
         DataTypes types;
         types.emplace_back(std::make_shared<DataTypeArray>(keys_type_));
@@ -130,8 +129,8 @@ public:
             {
                 if (!value_type->isSummable())
                     throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Values for {} cannot be summed, passed type {}",
-                        name_, value_type->getName()};
+                        "Values for -Map cannot be summed, passed type {}",
+                        value_type->getName()};
             }
 
             DataTypePtr result_type;
@@ -140,8 +139,8 @@ public:
             {
                 if (value_type->onlyNull())
                     throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Cannot calculate {} of type {}",
-                        name_, value_type->getName()};
+                        "Cannot calculate -Map of type {}",
+                        value_type->getName()};
 
                 // Overflow, meaning that the returned type is the same as
                 // the input type. Nulls are skipped.
@@ -154,8 +153,8 @@ public:
                 // No overflow, meaning we promote the types if necessary.
                 if (!value_type_without_nullable->canBePromoted())
                     throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Values for {} are expected to be Numeric, Float or Decimal, passed type {}",
-                        name_, value_type->getName()};
+                        "Values for -Map are expected to be Numeric, Float or Decimal, passed type {}",
+                        value_type->getName()};
 
                 WhichDataType value_type_to_check(value_type_without_nullable);
 
@@ -177,7 +176,7 @@ public:
 
     bool allocatesMemoryInArena() const override { return false; }
 
-    static const auto & getArgumentColumns(const IColumn**& columns)
+    static auto getArgumentColumns(const IColumn ** columns)
     {
         if constexpr (tuple_argument)
         {

From 21049b23d235ad430278b52bbfe8ee145b35d5ad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 23:45:30 +0100
Subject: [PATCH 0215/1097] Fix tidy

---
 src/AggregateFunctions/AggregateFunctionSumMap.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index da00cc799d7..1dea05f4a88 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -495,7 +495,7 @@ public:
         }
     }
 
-    bool keepKey(const Field & key) const { return keys_to_keep.count(key); }
+    bool keepKey(const Field & key) const { return keys_to_keep.contains(key); }
 };
 
 
From e3ad367e04d89039ed84d46db2e346aebf18e911 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 23:47:43 +0100
Subject: [PATCH 0216/1097] Fix a bug

---
 src/AggregateFunctions/AggregateFunctionSumMap.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index 1dea05f4a88..a516721ee52 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -289,6 +289,8 @@ public:
                 serialize = [&](size_t col_idx, const Array & values){ promoted_values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {}); };
                 break;
             }
+            default:
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown version {}, of -Map aggregate function serialization state", *version);
         }
 
         for (const auto & elem : merged_maps)
@@ -321,6 +323,8 @@ public:
                 deserialize = [&](size_t col_idx, Array & values){ promoted_values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {}); };
                 break;
             }
+            default:
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected version {} of -Map aggregate function serialization state", *version);
         }
 
         for (size_t i = 0; i < size; ++i)

From 11302688bdd56924e3c6d70b98676293c8588fcc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 23:48:23 +0100
Subject: [PATCH 0217/1097] Fix tidy

---
 src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
index fe821abfc4b..1ab2501981a 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
@@ -26,7 +26,7 @@ namespace ErrorCodes
 namespace
 {
 
-static constexpr size_t max_events = 32;
+constexpr size_t max_events = 32;
 
 template <typename T>
 struct AggregateFunctionWindowFunnelData

From 99522df3c1501db814c6fc1883fe0def419b8c9a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 6 Nov 2023 23:50:39 +0100
Subject: [PATCH 0218/1097] Fix tidy

---
 src/AggregateFunctions/AggregateFunctionGroupArray.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index f0e278f9f76..65e2b1e4cb1 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -85,6 +85,8 @@ struct GroupArraySamplerData
 
     UInt64 genRandom(size_t lim)
     {
+        chassert(lim != 0);
+
         /// With a large number of values, we will generate random numbers several times slower.
         if (lim <= static_cast<UInt64>(rng.max()))
             return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);
@@ -94,7 +96,10 @@ struct GroupArraySamplerData
 
     void randomShuffle()
     {
-        for (size_t i = 1; i < value.size(); ++i)
+        size_t size = value.size();
+        chassert(size < std::numeric_limits<size_t>::max());
+
+        for (size_t i = 1; i < size; ++i)
         {
             size_t j = genRandom(i + 1);
             std::swap(value[i], value[j]);

From 04b81ef6a80164bf9c3a59dc63a8f2a9f1495c7f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 00:09:22 +0100
Subject: [PATCH 0219/1097] Fix unusual shit

---
 src/AggregateFunctions/AggregateFunctionGroupArray.cpp | 2 +-
 src/AggregateFunctions/ReservoirSampler.h              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 65e2b1e4cb1..1934095e754 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -89,7 +89,7 @@ struct GroupArraySamplerData
 
         /// With a large number of values, we will generate random numbers several times slower.
         if (lim <= static_cast<UInt64>(rng.max()))
-            return static_cast<UInt32>(rng()) % static_cast<UInt32>(lim);
+            return rng() % lim;
         else
             return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
     }
diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h
index 7409a3fa0dd..37fc05a2e4c 100644
--- a/src/AggregateFunctions/ReservoirSampler.h
+++ b/src/AggregateFunctions/ReservoirSampler.h
@@ -255,11 +255,11 @@ private:
 
     UInt64 genRandom(UInt64 limit)
     {
-        assert(limit > 0);
+        chassert(limit > 0);
 
         /// With a large number of values, we will generate random numbers several times slower.
         if (limit <= static_cast<UInt64>(rng.max()))
-            return static_cast<UInt32>(rng()) % static_cast<UInt32>(limit);
+            return rng() % limit;
         else
             return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % limit;
     }

From 22630c6211a2197d1660d1aac43ea51bcbf26b72 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 00:09:41 +0100
Subject: [PATCH 0220/1097] Fix tidy

---
 src/AggregateFunctions/AggregateFunctionTopK.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp
index 8630650b319..745fa9a6f23 100644
--- a/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp
@@ -38,7 +38,7 @@ namespace ErrorCodes
 namespace
 {
 
-static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF;
+inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF;
 
 template <typename T>
 struct AggregateFunctionTopKData

From 42b816342edf871c1939197e2146e6ae878398cd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 00:09:59 +0100
Subject: [PATCH 0221/1097] Fix tidy

---
 src/TableFunctions/TableFunctionExplain.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp
index cf986e1817d..f993a9820cb 100644
--- a/src/TableFunctions/TableFunctionExplain.cpp
+++ b/src/TableFunctions/TableFunctionExplain.cpp
@@ -127,7 +127,7 @@ ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr cont
     return columns_description;
 }
 
-static Block executeMonoBlock(QueryPipeline & pipeline)
+Block executeMonoBlock(QueryPipeline & pipeline)
 {
     if (!pipeline.pulling())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected pulling pipeline");

From 35335d9e008e97a0fd36506e5074334a919b6429 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 00:24:52 +0100
Subject: [PATCH 0222/1097] Fix a bug in groupArraySample

---
 .../AggregateFunctionGroupArray.cpp           | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 1934095e754..b4d2c2ea99a 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -138,11 +138,11 @@ class GroupArrayNumericImpl final
     using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
     static constexpr bool limit_num_elems = Trait::has_limit;
     UInt64 max_elems;
-    UInt64 seed;
+    std::optional<UInt64> seed;
 
 public:
     explicit GroupArrayNumericImpl(
-        const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
+        const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, std::optional<UInt64> seed_)
         : IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
             {data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
         , max_elems(max_elems_)
@@ -169,7 +169,7 @@ public:
     {
         [[maybe_unused]] auto a = new (place) Data;
         if constexpr (Trait::sampler == Sampler::RNG)
-            a->rng.seed(seed);
+            a->rng.seed(seed.value_or(thread_local_rng()));
     }
 
     void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
@@ -474,10 +474,10 @@ class GroupArrayGeneralImpl final
 
     DataTypePtr & data_type;
     UInt64 max_elems;
-    UInt64 seed;
+    std::optional<UInt64> seed;
 
 public:
-    GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
+    GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, std::optional<UInt64> seed_)
         : IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
             {data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
         , data_type(this->argument_types[0])
@@ -505,7 +505,7 @@ public:
     {
         [[maybe_unused]] auto a = new (place) Data;
         if constexpr (Trait::sampler == Sampler::RNG)
-            a->rng.seed(seed);
+            a->rng.seed(seed.value_or(thread_local_rng()));
     }
 
     void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
@@ -766,10 +766,10 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
     {
         if (Tlast)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "groupArrayLast make sense only with max_elems (groupArrayLast(max_elems)())");
-        return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ false, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems);
+        return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ false, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems, std::nullopt);
     }
     else
-        return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems);
+        return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems, std::nullopt);
 }
 
 AggregateFunctionPtr createAggregateFunctionGroupArraySample(
@@ -796,11 +796,9 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
 
     UInt64 max_elems = get_parameter(0);
 
-    UInt64 seed;
+    std::optional<UInt64> seed;
     if (parameters.size() >= 2)
         seed = get_parameter(1);
-    else
-        seed = thread_local_rng();
 
     return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, /* Tlast= */ false, /* Tsampler= */ Sampler::RNG>>(argument_types[0], parameters, max_elems, seed);
 }

From 62819af191d3e2c657e21a46e05c36b0fa6e8998 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 00:27:17 +0100
Subject: [PATCH 0223/1097] Add a test

---
 tests/queries/0_stateless/02912_group_array_sample.reference | 1 +
 tests/queries/0_stateless/02912_group_array_sample.sql       | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 tests/queries/0_stateless/02912_group_array_sample.reference
 create mode 100644 tests/queries/0_stateless/02912_group_array_sample.sql

diff --git a/tests/queries/0_stateless/02912_group_array_sample.reference b/tests/queries/0_stateless/02912_group_array_sample.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02912_group_array_sample.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02912_group_array_sample.sql b/tests/queries/0_stateless/02912_group_array_sample.sql
new file mode 100644
index 00000000000..6cf9d45170b
--- /dev/null
+++ b/tests/queries/0_stateless/02912_group_array_sample.sql
@@ -0,0 +1,2 @@
+-- Checks that the random seed is different for multiple states of aggregation:
+SELECT uniq(x) > 50 FROM (SELECT number, groupArraySample(10)(arrayJoin(range(1000))) AS x FROM numbers(100) GROUP BY number);

From 1fa61300724e5781aecc304a8db2cc4dd30e65e0 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Tue, 7 Nov 2023 01:27:53 +0000
Subject: [PATCH 0224/1097] small refactor

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Interpreters/Context.cpp         | 21 +++++++++------------
 src/Interpreters/DatabaseCatalog.cpp | 15 ++++++++-------
 src/Interpreters/DatabaseCatalog.h   |  3 +--
 3 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index cbb99a693af..81d5751f17b 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -3775,24 +3775,21 @@ void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration
 
         if (shared->merge_tree_storage_policy_selector)
         {
-            if (shared->merge_tree_storage_policy_selector)
+            try
             {
-                try
-                {
-                    shared->merge_tree_storage_policy_selector = shared->merge_tree_storage_policy_selector->updateFromConfig(
-                        config, "storage_configuration.policies", shared->merge_tree_disk_selector, disks_to_reinit);
-                }
-                catch (Exception & e)
-                {
-                    LOG_ERROR(
-                        shared->log, "An error has occurred while reloading storage policies, storage policies were not applied: {}", e.message());
-                }
+                shared->merge_tree_storage_policy_selector = shared->merge_tree_storage_policy_selector->updateFromConfig(
+                    config, "storage_configuration.policies", shared->merge_tree_disk_selector, disks_to_reinit);
+            }
+            catch (Exception & e)
+            {
+                LOG_ERROR(
+                    shared->log, "An error has occurred while reloading storage policies, storage policies were not applied: {}", e.message());
             }
         }
 
         if (!disks_to_reinit.empty())
         {
-            LOG_DEBUG(shared->log, "Reloading disks: {}", fmt::join(disks_to_reinit, ", "));
+            LOG_INFO(shared->log, "Initializing disks: ({}) for all tables", fmt::join(disks_to_reinit, ", "));
             DatabaseCatalog::instance().triggerReloadDisksTask(disks_to_reinit);
         }
     }
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 50b0e5d4d1e..8dc90e1a2f1 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -1601,7 +1601,14 @@ void DatabaseCatalog::reloadDisksTask()
 
     std::lock_guard lock{reload_disks_mutex};
     if (!disks_to_reload.empty()) /// during reload, another disks configuration change
-        (*reload_disks_task)->scheduleAfter(default_reload_period_sec * 1000);
+        (*reload_disks_task)->scheduleAfter(DBMS_DEFAULT_DISK_RELOAD_PERIOD_SEC * 1000);
+}
+
+void DatabaseCatalog::triggerReloadDisksTask(const Strings & new_added_disks)
+{
+    std::lock_guard lock{reload_disks_mutex};
+    disks_to_reload.insert(new_added_disks.begin(), new_added_disks.end());
+    (*reload_disks_task)->schedule();
 }
 
 static void maybeUnlockUUID(UUID uuid)
@@ -1687,10 +1694,4 @@ DDLGuard::~DDLGuard()
     releaseTableLock();
 }
 
-void DatabaseCatalog::triggerReloadDisksTask(const Strings & new_added_disks)
-{
-    std::lock_guard lock{reload_disks_mutex};
-    disks_to_reload.insert(new_added_disks.begin(), new_added_disks.end());
-    (*reload_disks_task)->schedule();
-}
 }
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index 8e81358e589..ee5d0b84b23 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -389,11 +389,10 @@ private:
     std::unique_ptr<BackgroundSchedulePoolTaskHolder> reload_disks_task;
     std::mutex reload_disks_mutex;
     std::set<String> disks_to_reload;
-    static constexpr time_t default_reload_period_sec = 5;
+    static constexpr time_t DBMS_DEFAULT_DISK_RELOAD_PERIOD_SEC = 5;
 };
 
 
-
 /// This class is useful when creating a table or database.
 /// Usually we create IStorage/IDatabase object first and then add it to IDatabase/DatabaseCatalog.
 /// But such object may start using a directory in store/ since its creation.

From 5169e4e54d26110f3b1ec95a5dd061cd2b26d959 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 15:30:31 +0100
Subject: [PATCH 0225/1097] Fix FreeBSD

---
 src/Storages/System/StorageSystemSymbols.cpp | 12 ++++--------
 src/Storages/System/attachSystemTables.cpp   |  5 +++++
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/Storages/System/StorageSystemSymbols.cpp b/src/Storages/System/StorageSystemSymbols.cpp
index 6cffd8a7a66..56195544448 100644
--- a/src/Storages/System/StorageSystemSymbols.cpp
+++ b/src/Storages/System/StorageSystemSymbols.cpp
@@ -1,3 +1,5 @@
+#if defined(__ELF__) && !defined(OS_FREEBSD)
+
 #include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -28,8 +30,6 @@ StorageSystemSymbols::StorageSystemSymbols(const StorageID & table_id_)
 }
 
 
-#if defined(__ELF__) && !defined(OS_FREEBSD)
-
 namespace
 {
 
@@ -87,8 +87,6 @@ protected:
 
 }
 
-#endif
-
 
 Pipe StorageSystemSymbols::read(
     const Names & column_names,
@@ -105,14 +103,12 @@ Pipe StorageSystemSymbols::read(
     Block sample_block = storage_snapshot->metadata->getSampleBlock();
     auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names);
 
-#if defined(__ELF__) && !defined(OS_FREEBSD)
     const auto & symbols = SymbolIndex::instance().symbols();
 
     return Pipe(std::make_shared<SymbolsBlockSource>(
         symbols.cbegin(), symbols.cend(), std::move(columns_mask), std::move(res_block), max_block_size));
-#else
-    return Pipe(std::make_shared<NullSource>(std::move(res_block)));
-#endif
 }
 
 }
+
+#endif
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index 56525f5b948..6d875208fbb 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -85,7 +85,10 @@
 #include <Storages/System/StorageSystemJemalloc.h>
 #include <Storages/System/StorageSystemScheduler.h>
 #include <Storages/System/StorageSystemS3Queue.h>
+
+#if defined(__ELF__) && !defined(OS_FREEBSD)
 #include <Storages/System/StorageSystemSymbols.h>
+#endif
 
 #if USE_RDKAFKA
 #include <Storages/System/StorageSystemKafkaConsumers.h>
@@ -152,7 +155,9 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database)
     attach<StorageSystemSchemaInferenceCache>(context, system_database, "schema_inference_cache");
     attach<StorageSystemDroppedTables>(context, system_database, "dropped_tables");
     attach<StorageSystemScheduler>(context, system_database, "scheduler");
+#if defined(__ELF__) && !defined(OS_FREEBSD)
     attach<StorageSystemSymbols>(context, system_database, "symbols");
+#endif
 #if USE_RDKAFKA
     attach<StorageSystemKafkaConsumers>(context, system_database, "kafka_consumers");
 #endif

From e4400ec24c1a7949638f80642f8510978dc6bbea Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Tue, 7 Nov 2023 13:33:02 -0300
Subject: [PATCH 0226/1097] add transition from reading key to reading quoted
 key when double quotes are found

---
 src/Functions/keyvaluepair/impl/StateHandlerImpl.h   |  5 +++++
 ..._extract_key_value_pairs_multiple_input.reference | 12 ++++++++++++
 .../02499_extract_key_value_pairs_multiple_input.sql | 12 ++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/src/Functions/keyvaluepair/impl/StateHandlerImpl.h b/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
index 7fc3ba54833..b4fd91ec3c9 100644
--- a/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
+++ b/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
@@ -106,6 +106,11 @@ public:
             {
                 return {next_pos, State::WAITING_KEY};
             }
+            else if (isQuotingCharacter(*p))
+            {
+                // +1 to skip quoting character
+                return {next_pos, State::READING_QUOTED_KEY};
+            }
 
             pos = next_pos;
         }
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
index d0cf9ff680b..f646583bbd3 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
@@ -345,6 +345,18 @@ WITH
 SELECT
     x;
 {'argument1':'1','argument2':'2','char':'=','char2':'=','formula':'1+2=3','result':'3','string':'foo=bar'}
+-- https://github.com/ClickHouse/ClickHouse/issues/56357
+WITH
+    extractKeyValuePairs('{"a":"1", "b":"2"}') as s_map,
+    CAST(
+        arrayMap(
+            (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+        ),
+        'Map(String,String)'
+    ) AS x
+SELECT
+    x;
+{'a':'1','b':'2'}
 -- check str_to_map alias (it is case-insensitive)
 WITH
     sTr_tO_mAp('name:neymar, age:31 team:psg,nationality:brazil') AS s_map,
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
index 804ff4ce880..9277ba6d7ec 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
@@ -481,6 +481,18 @@ WITH
 SELECT
     x;
 
+-- https://github.com/ClickHouse/ClickHouse/issues/56357
+WITH
+    extractKeyValuePairs('{"a":"1", "b":"2"}') as s_map,
+    CAST(
+        arrayMap(
+            (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+        ),
+        'Map(String,String)'
+    ) AS x
+SELECT
+    x;
+
 -- check str_to_map alias (it is case-insensitive)
 WITH
     sTr_tO_mAp('name:neymar, age:31 team:psg,nationality:brazil') AS s_map,

From 28ca29fda24350a954dc0747a822161e74992a44 Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Tue, 7 Nov 2023 14:31:34 -0300
Subject: [PATCH 0227/1097] remove stale comment

---
 src/Functions/keyvaluepair/impl/StateHandlerImpl.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Functions/keyvaluepair/impl/StateHandlerImpl.h b/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
index b4fd91ec3c9..687d8d95d42 100644
--- a/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
+++ b/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
@@ -108,7 +108,6 @@ public:
             }
             else if (isQuotingCharacter(*p))
             {
-                // +1 to skip quoting character
                 return {next_pos, State::READING_QUOTED_KEY};
             }
 

From d7d68b23d9606860bf239de5b6a185e8f844cac9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 18:40:34 +0100
Subject: [PATCH 0228/1097] Address review comments

---
 docs/en/operations/system-tables/symbols.md                   | 2 +-
 .../AggregateFunctionAnalysisOfVariance.cpp                   | 2 +-
 src/AggregateFunctions/AggregateFunctionGroupArray.cpp        | 4 ++--
 src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp    | 4 +---
 .../AggregateFunctionLargestTriangleThreeBuckets.cpp          | 2 +-
 src/AggregateFunctions/AggregateFunctionMannWhitney.cpp       | 2 +-
 src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp      | 4 +++-
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/en/operations/system-tables/symbols.md b/docs/en/operations/system-tables/symbols.md
index 9a60baafe1a..5acd3ad51c7 100644
--- a/docs/en/operations/system-tables/symbols.md
+++ b/docs/en/operations/system-tables/symbols.md
@@ -4,7 +4,7 @@ slug: /en/operations/system-tables/symbols
 # symbols
 
 Contains information for introspection of `clickhouse` binary. It requires the introspection privilege to access.
-This table is only usable for C++ experts and ClickHouse engineers.
+This table is only useful for C++ experts and ClickHouse engineers.
 
 Columns:
 
diff --git a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp
index 5092d869809..934a8dffd90 100644
--- a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp
@@ -9,7 +9,7 @@
 #include <Columns/ColumnNullable.h>
 #include <AggregateFunctions/IAggregateFunction.h>
 #include <AggregateFunctions/Moments.h>
-#include "Common/NaNUtils.h"
+#include <Common/NaNUtils.h>
 #include <Common/assert_cast.h>
 
 
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index b4d2c2ea99a..b95471df90a 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -229,7 +229,7 @@ public:
             cur_elems.value[cur_elems.total_values % max_elems] = value;
             ++cur_elems.total_values;
         }
-        assert(rhs_elems.total_values >= rhs_elems.value.size());
+        chassert(rhs_elems.total_values >= rhs_elems.value.size());
         cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
     }
 
@@ -568,7 +568,7 @@ public:
             cur_elems.value[cur_elems.total_values % max_elems] = value->clone(arena);
             ++cur_elems.total_values;
         }
-        assert(rhs_elems.total_values >= rhs_elems.value.size());
+        chassert(rhs_elems.total_values >= rhs_elems.value.size());
         cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size();
     }
 
diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
index 7537df1554c..b2431be89d6 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
@@ -5,8 +5,6 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeIPv4andIPv6.h>
 
-#include <cassert>
-
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadHelpersArena.h>
@@ -225,7 +223,7 @@ public:
                 return;
 
             // We have to copy the keys to our arena.
-            assert(arena != nullptr);
+            chassert(arena != nullptr);
             cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
         }
     }
diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
index 724a8152435..83fc4f80c48 100644
--- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
+++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
@@ -14,7 +14,7 @@
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
-#include <Common/PODArray_fwd.h>
+#include <Common/PODArray.h>
 #include <Common/assert_cast.h>
 
 #include <boost/math/distributions/normal.hpp>
diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
index b06da05fa79..a70da7b35d5 100644
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
@@ -6,7 +6,7 @@
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnTuple.h>
 #include <Common/assert_cast.h>
-#include <Common/PODArray_fwd.h>
+#include <Common/PODArray.h>
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypesNumber.h>
diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
index 1ab2501981a..f15d067a302 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
@@ -20,6 +20,7 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int TOO_LARGE_ARRAY_SIZE;
     extern const int BAD_ARGUMENTS;
 }
 
@@ -113,7 +114,8 @@ struct AggregateFunctionWindowFunnelData
         size_t size;
         readBinary(size, buf);
 
-        /// TODO Protection against huge size
+        if (size > 100'000'000) /// The constant is arbitrary
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large size of the state of windowFunnel");
 
         events_list.clear();
         events_list.reserve(size);

From 0c6e96a034185db0236605e78388441c75df2d5e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 18:49:16 +0100
Subject: [PATCH 0229/1097] chasset will make sense for the static analyzer

---
 base/base/defines.h      | 2 +-
 src/Common/Exception.cpp | 6 ------
 src/Common/Exception.h   | 2 +-
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/base/base/defines.h b/base/base/defines.h
index d852f6b9f63..8a088985a46 100644
--- a/base/base/defines.h
+++ b/base/base/defines.h
@@ -121,7 +121,7 @@
         {
             void abortOnFailedAssertion(const String & description);
         }
-        #define chassert(x) static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
+        #define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
         #define UNREACHABLE() abort()
         // clang-format off
     #else
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index 5c299e495eb..d9f515b38b1 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -41,12 +41,6 @@ namespace ErrorCodes
 void abortOnFailedAssertion(const String & description)
 {
     LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description);
-
-    /// This is to suppress -Wmissing-noreturn
-    volatile bool always_false = false;
-    if (always_false)
-        return;
-
     abort();
 }
 
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index b2411e256ed..a7ffa8adcd0 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -19,7 +19,7 @@ namespace Poco { class Logger; }
 namespace DB
 {
 
-void abortOnFailedAssertion(const String & description);
+[[noreturn]] void abortOnFailedAssertion(const String & description);
 
 /// This flag can be set for testing purposes - to check that no exceptions are thrown.
 extern bool terminate_on_any_exception;

From 181231d50004c73e87852874850235c5eb983610 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 7 Nov 2023 17:56:02 +0000
Subject: [PATCH 0230/1097] init

---
 src/Formats/NumpyDataTypes.h                  |   2 +
 .../Formats/Impl/NpyRowInputFormat.cpp        |  65 +++++++++++++++---
 .../Formats/Impl/NpyRowInputFormat.h          |   2 +-
 .../0_stateless/02895_npy_format.reference    |   2 +
 tests/queries/0_stateless/02895_npy_format.sh |   2 +
 .../queries/0_stateless/data_npy/float_16.npy | Bin 0 -> 140 bytes
 6 files changed, 61 insertions(+), 12 deletions(-)
 create mode 100644 tests/queries/0_stateless/data_npy/float_16.npy

diff --git a/src/Formats/NumpyDataTypes.h b/src/Formats/NumpyDataTypes.h
index 43fd9064dae..712797515c9 100644
--- a/src/Formats/NumpyDataTypes.h
+++ b/src/Formats/NumpyDataTypes.h
@@ -17,6 +17,7 @@ enum class NumpyDataTypeIndex
     UInt16,
     UInt32,
     UInt64,
+    Float16,
     Float32,
     Float64,
     String,
@@ -79,6 +80,7 @@ public:
     {
         switch (size)
         {
+            case 2: type_index = NumpyDataTypeIndex::Float16; break;
             case 4: type_index = NumpyDataTypeIndex::Float32; break;
             case 8: type_index = NumpyDataTypeIndex::Float64; break;
             default:
diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
index 9acb2909626..387269b8974 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
@@ -1,20 +1,16 @@
+#include <cmath>
 #include <string>
-#include <vector>
 #include <Processors/Formats/Impl/NpyRowInputFormat.h>
 #include <DataTypes/DataTypeString.h>
-#include <Common/assert_cast.h>
-#include <Common/Exception.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Formats/FormatFactory.h>
-#include <Formats/NumpyDataTypes.h>
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnsNumber.h>
 #include <DataTypes/IDataType.h>
 #include <IO/ReadBuffer.h>
-#include <Processors/Formats/IRowInputFormat.h>
 #include <boost/algorithm/string/split.hpp>
 #include <IO/ReadBufferFromString.h>
 
@@ -34,6 +30,38 @@ namespace ErrorCodes
 namespace
 {
 
+float convertFloat16ToFloat32(uint16_t float16_value) {
+    uint16_t sign = (float16_value >> 15) & 0x1;
+    uint16_t exponent = (float16_value >> 10) & 0x1F;
+    uint16_t fraction = float16_value & 0x3FF;
+
+    // Handling special cases for exponent
+    if (exponent == 0x1F) {
+        // NaN or Infinity in float16
+        return (fraction == 0) ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
+    }
+
+    // Convert exponent from float16 to float32 format
+    int32_t new_exponent = static_cast<int32_t>(exponent) - 15 + 127;
+
+    // Constructing the float32 representation
+    uint32_t float32_value = (static_cast<uint32_t>(sign) << 31) |
+                             (static_cast<uint32_t>(new_exponent) << 23) |
+                             (static_cast<uint32_t>(fraction) << 13);
+
+    // Interpret the binary representation as a float
+    float result;
+    std::memcpy(&result, &float32_value, sizeof(float));
+
+    // Determine decimal places dynamically based on the magnitude of the number
+    int decimal_places = std::max(0, 6 - static_cast<int>(std::log10(std::abs(result))));
+    // Truncate the decimal part to the determined number of decimal places
+    float multiplier = static_cast<float>(std::pow(10.0f, decimal_places));
+    result = std::round(result * multiplier) / multiplier;
+
+    return result;
+}
+
 DataTypePtr getDataTypeFromNumpyType(const std::shared_ptr<NumpyDataType> & numpy_type)
 {
     switch (numpy_type->getTypeIndex())
@@ -54,6 +82,8 @@ DataTypePtr getDataTypeFromNumpyType(const std::shared_ptr<NumpyDataType> & nump
             return std::make_shared<DataTypeUInt32>();
         case NumpyDataTypeIndex::UInt64:
             return std::make_shared<DataTypeUInt64>();
+        case NumpyDataTypeIndex::Float16:
+            return std::make_shared<DataTypeFloat32>();
         case NumpyDataTypeIndex::Float32:
             return std::make_shared<DataTypeFloat32>();
         case NumpyDataTypeIndex::Float64:
@@ -266,14 +296,26 @@ NpyRowInputFormat::NpyRowInputFormat(ReadBuffer & in_, Block header_, Params par
 }
 
 template <typename ColumnValue, typename DataValue>
-void NpyRowInputFormat::readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness)
+void NpyRowInputFormat::readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness, bool isFloat16)
 {
-    DataValue value;
-    if (endianness == NumpyDataType::Endianness::BIG)
-        readBinaryBigEndian(value, *in);
+    if (!isFloat16)
+    {
+        DataValue value;
+        if (endianness == NumpyDataType::Endianness::BIG)
+            readBinaryBigEndian(value, *in);
+        else
+            readBinaryLittleEndian(value, *in);
+        assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue((static_cast<ColumnValue>(value)));
+    }
     else
-        readBinaryLittleEndian(value, *in);
-    assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue(static_cast<ColumnValue>(value));
+    {
+        uint16_t value;
+        if (endianness == NumpyDataType::Endianness::BIG)
+            readBinaryBigEndian(value, *in);
+        else
+            readBinaryLittleEndian(value, *in);
+        assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue(static_cast<ColumnValue>(convertFloat16ToFloat32(value)));
+    }
 }
 
 template <typename T>
@@ -300,6 +342,7 @@ void NpyRowInputFormat::readAndInsertFloat(IColumn * column, const DataTypePtr &
 {
     switch (npy_type.getTypeIndex())
     {
+        case NumpyDataTypeIndex::Float16: readBinaryValueAndInsert<T, Float32>(column->getPtr(), npy_type.getEndianness(), true); break;
         case NumpyDataTypeIndex::Float32: readBinaryValueAndInsert<T, Float32>(column->getPtr(), npy_type.getEndianness()); break;
         case NumpyDataTypeIndex::Float64: readBinaryValueAndInsert<T, Float64>(column->getPtr(), npy_type.getEndianness()); break;
         default:
diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.h b/src/Processors/Formats/Impl/NpyRowInputFormat.h
index ad32bdba3bf..2bc28121d11 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.h
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.h
@@ -43,7 +43,7 @@ private:
     void readAndInsertString(MutableColumnPtr column, const DataTypePtr & data_type, const NumpyDataType & npy_type, bool is_fixed);
 
     template <typename ColumnValue, typename DataValue>
-    void readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness);
+    void readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness, bool isFloat16 = false);
 
     void readRows(MutableColumns & columns);
 
diff --git a/tests/queries/0_stateless/02895_npy_format.reference b/tests/queries/0_stateless/02895_npy_format.reference
index 0c90fbfd418..e1c168d1c67 100644
--- a/tests/queries/0_stateless/02895_npy_format.reference
+++ b/tests/queries/0_stateless/02895_npy_format.reference
@@ -84,3 +84,5 @@ c
 0
 0
 1
+[2.199219,1.099609,3.300781]
+[4.25,3.34961,6.628906]
diff --git a/tests/queries/0_stateless/02895_npy_format.sh b/tests/queries/0_stateless/02895_npy_format.sh
index 1dbf62ceaa2..b60f324467d 100755
--- a/tests/queries/0_stateless/02895_npy_format.sh
+++ b/tests/queries/0_stateless/02895_npy_format.sh
@@ -56,3 +56,5 @@ $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy', Npy
 $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy', Npy, 'value Float32')" 2>&1 | grep -c "BAD_ARGUMENTS"
 
 $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/complex.npy')" 2>&1 | grep -c "BAD_ARGUMENTS"
+
+$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/float_16.npy')"
diff --git a/tests/queries/0_stateless/data_npy/float_16.npy b/tests/queries/0_stateless/data_npy/float_16.npy
new file mode 100644
index 0000000000000000000000000000000000000000..3224d1e57e3ea08e74382200dccf3af3ab33706e
GIT binary patch
literal 140
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+r@qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
kXCxM+0{I$7Its>`ItsN4WCN}=hcug6P7W@coff(Q0J_p2?f?J)

literal 0
HcmV?d00001


From 4da512d6a3a2bcd0ee9d3687445f799a1d865320 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 7 Nov 2023 17:57:39 +0000
Subject: [PATCH 0231/1097] ehnanced docs

---
 docs/en/interfaces/formats.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index eb963de0c35..e11f74cfef5 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -2469,6 +2469,7 @@ This function is designed to load a NumPy array from a .npy file into ClickHouse
 | u2       |    UInt16       |
 | u4       |    UInt32       |
 | u8       |    UInt64       |
+| f2       |    Float32      |
 | f4       |    Float32      |
 | f8       |    Float64      |
 | S        |    String       |

From 6f6454e696b62d1f36b4fe5b33fece01272db230 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 7 Nov 2023 18:12:56 +0000
Subject: [PATCH 0232/1097] style check

---
 src/Processors/Formats/Impl/NpyRowInputFormat.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
index 387269b8974..9e985dffe0c 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
@@ -30,13 +30,15 @@ namespace ErrorCodes
 namespace
 {
 
-float convertFloat16ToFloat32(uint16_t float16_value) {
+float convertFloat16ToFloat32(uint16_t float16_value)
+{
     uint16_t sign = (float16_value >> 15) & 0x1;
     uint16_t exponent = (float16_value >> 10) & 0x1F;
     uint16_t fraction = float16_value & 0x3FF;
 
     // Handling special cases for exponent
-    if (exponent == 0x1F) {
+    if (exponent == 0x1F)
+    {
         // NaN or Infinity in float16
         return (fraction == 0) ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
     }

From 9e175ddff33575a2d3de0449f551377aeb050b65 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 19:42:16 +0100
Subject: [PATCH 0233/1097] Add [[noreturn]]

---
 base/base/defines.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/base/base/defines.h b/base/base/defines.h
index 8a088985a46..8c0b05d6181 100644
--- a/base/base/defines.h
+++ b/base/base/defines.h
@@ -125,11 +125,7 @@
         #define UNREACHABLE() abort()
         // clang-format off
     #else
-        /// Here sizeof() trick is used to suppress unused warning for result,
-        /// since simple "(void)x" will evaluate the expression, while
-        /// "sizeof(!(x))" will not.
-        #define NIL_EXPRESSION(x) (void)sizeof(!(x))
-        #define chassert(x) NIL_EXPRESSION(x)
+        #define chassert(x) (void)sizeof(!(x))
         #define UNREACHABLE() __builtin_unreachable()
     #endif
 #endif

From 7988852e0a1725a81cce1756f955d5d3568e0821 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 19:42:58 +0100
Subject: [PATCH 0234/1097] Add a test

---
 tests/queries/0_stateless/02913_sum_map_state.reference | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tests/queries/0_stateless/02913_sum_map_state.reference

diff --git a/tests/queries/0_stateless/02913_sum_map_state.reference b/tests/queries/0_stateless/02913_sum_map_state.reference
new file mode 100644
index 00000000000..98e4f7a2f3c
--- /dev/null
+++ b/tests/queries/0_stateless/02913_sum_map_state.reference
@@ -0,0 +1 @@
+0200000000010000000000000053542827302E312701000000020000000000000053542827302E3127

From 6efb1d1759f1d0e8b5ac1087c9faf5780313224c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 19:44:18 +0100
Subject: [PATCH 0235/1097] Merge with master

---
 src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp
index 6d1f918fe3b..60e8df64283 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp
@@ -87,7 +87,7 @@ public:
             }
         }
 
-        if (!isUnsignedInteger(arguments[1]))
+        if (!isUInt(arguments[1]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of aggregate function {} must be unsigned integer.", getName());
 
         if (default_value.isNull())

From f619f73f284039c4e3bfebeda2228dd5a799d6e3 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 29 Aug 2023 11:53:32 +0000
Subject: [PATCH 0236/1097] Fix incorrect header in grace hash join and filter
 pushdown

---
 src/QueryPipeline/QueryPipelineBuilder.cpp           | 12 +++++-------
 .../02861_filter_pushdown_const_bug.reference        |  2 ++
 .../0_stateless/02861_filter_pushdown_const_bug.sql  |  4 ++++
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp
index f9726339872..f13d1c56d7f 100644
--- a/src/QueryPipeline/QueryPipelineBuilder.cpp
+++ b/src/QueryPipeline/QueryPipelineBuilder.cpp
@@ -483,8 +483,6 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelinesRightLe
 
 
     Block left_header = left->getHeader();
-    Block joined_header = JoiningTransform::transformHeader(left_header, join);
-
     for (size_t i = 0; i < num_streams; ++i)
     {
         auto joining = std::make_shared<JoiningTransform>(
@@ -496,9 +494,9 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelinesRightLe
         {
             // Process delayed joined blocks when all JoiningTransform are finished.
             auto delayed = std::make_shared<DelayedJoinedBlocksWorkerTransform>(
-                joined_header,
-                [left_header, joined_header, max_block_size, join]()
-                { return join->getNonJoinedBlocks(left_header, joined_header, max_block_size); });
+                output_header,
+                [left_header, output_header, max_block_size, join]()
+                { return join->getNonJoinedBlocks(left_header, output_header, max_block_size); });
             if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output");
 
@@ -533,7 +531,7 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelinesRightLe
         for (size_t i = 1; i < joined_output_ports.size(); i += 2)
             delayed_ports_numbers.push_back(i);
 
-        auto delayed_processor = std::make_shared<DelayedPortsProcessor>(joined_header, 2 * num_streams, delayed_ports_numbers);
+        auto delayed_processor = std::make_shared<DelayedPortsProcessor>(output_header, 2 * num_streams, delayed_ports_numbers);
         if (collected_processors)
             collected_processors->emplace_back(delayed_processor);
         left->pipe.processors->emplace_back(delayed_processor);
@@ -545,7 +543,7 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelinesRightLe
         left->pipe.output_ports.clear();
         for (OutputPort & port : delayed_processor->getOutputs())
             left->pipe.output_ports.push_back(&port);
-        left->pipe.header = joined_header;
+        left->pipe.header = output_header;
         left->resize(num_streams);
     }
 
diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference
index 428ba88bff0..df8198bc856 100644
--- a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference
+++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference
@@ -6,3 +6,5 @@
 1
 1
 1	1
+1	1
+1	1
diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql
index a5ddf830d48..a299e50984f 100644
--- a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql
+++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql
@@ -15,4 +15,8 @@ SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 O
 SELECT key FROM ( SELECT 1 AS key ) AS t1 JOIN ( SELECT 1 AS key ) AS t2 ON t1.key = t2.key WHERE key;
 SELECT * FROM ( SELECT 1 AS key GROUP BY NULL ) AS t1 INNER JOIN (SELECT 1 AS key) AS t2 ON t1.key = t2.key WHERE t1.key ORDER BY key;
 
+SET join_algorithm = 'grace_hash';
+
+SELECT * FROM (SELECT key AS a FROM t1 ) t1 INNER JOIN (SELECT key AS c FROM t1 ) t2 ON c = a WHERE a;
+
 DROP TABLE IF EXISTS t1;

From e46dbcb2f1bcc26000b26a50c103547b4dab4fb2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 7 Nov 2023 18:49:45 +0000
Subject: [PATCH 0237/1097] Use pcg/randomSeed() random generator/seed instead
 of std::mt19937/std::random_device

---
 programs/copier/ClusterCopier.cpp             |   3 +-
 programs/copier/TaskCluster.h                 |   3 +-
 .../ZooKeeper/ZooKeeperWithFaultInjection.h   |   3 +-
 src/Common/mysqlxx/PoolWithFailover.cpp       |   7 +-
 src/Common/randomSeed.cpp                     |   8 +-
 .../CompressionCodecDeflateQpl.cpp            |   3 +-
 src/Compression/CompressionCodecDeflateQpl.h  |   3 +-
 .../MySQL/MaterializedMySQLSyncThread.cpp     |   7 +-
 src/Interpreters/Cluster.cpp                  |   5 +-
 src/Interpreters/examples/string_hash_map.cpp |  21 +-
 src/Server/PostgreSQLHandler.cpp              |   6 +-
 utils/check-style/check-style                 | 789 +++++++++---------
 12 files changed, 434 insertions(+), 424 deletions(-)

diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp
index 2fe7808d304..b2b4970d04f 100644
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@@ -5,6 +5,7 @@
 
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/KeeperException.h>
+#include <Common/randomSeed.h>
 #include <Common/setThreadName.h>
 #include <Common/CurrentMetrics.h>
 #include <Interpreters/InterpreterInsertQuery.h>
@@ -59,7 +60,7 @@ void ClusterCopier::init()
     getContext()->setClustersConfig(task_cluster_current_config, false, task_cluster->clusters_prefix);
 
     /// Set up shards and their priority
-    task_cluster->random_engine.seed(task_cluster->random_device());
+    task_cluster->random_engine.seed(randomSeed());
     for (auto & task_table : task_cluster->table_tasks)
     {
         task_table.cluster_pull = getContext()->getCluster(task_table.cluster_pull_name);
diff --git a/programs/copier/TaskCluster.h b/programs/copier/TaskCluster.h
index fc1c8a663ec..a7f8bc3baca 100644
--- a/programs/copier/TaskCluster.h
+++ b/programs/copier/TaskCluster.h
@@ -7,7 +7,7 @@
 
 #include <Poco/Util/AbstractConfiguration.h>
 
-#include <random>
+#include <pcg_random.hpp>
 
 namespace DB
 {
@@ -45,7 +45,6 @@ struct TaskCluster
     /// Subtasks
     TasksTable table_tasks;
 
-    std::random_device random_device;
     pcg64 random_engine;
 };
 
diff --git a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
index 4887e896e9b..dec3213fbc4 100644
--- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
+++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
@@ -7,6 +7,7 @@
 #include <Common/logger_useful.h>
 #include <Common/randomSeed.h>
 #include "Coordination/KeeperConstants.h"
+#include <pcg_random.hpp>
 
 namespace DB
 {
@@ -42,7 +43,7 @@ public:
     }
 
 private:
-    std::mt19937_64 rndgen;
+    pcg64_fast rndgen;
     std::bernoulli_distribution distribution;
 };
 
diff --git a/src/Common/mysqlxx/PoolWithFailover.cpp b/src/Common/mysqlxx/PoolWithFailover.cpp
index 190522c704a..df2e3b61c33 100644
--- a/src/Common/mysqlxx/PoolWithFailover.cpp
+++ b/src/Common/mysqlxx/PoolWithFailover.cpp
@@ -2,7 +2,9 @@
 #include <ctime>
 #include <random>
 #include <thread>
+#include <pcg_random.hpp>
 #include <mysqlxx/PoolWithFailover.h>
+#include <Common/randomSeed.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 
@@ -44,10 +46,7 @@ PoolWithFailover::PoolWithFailover(
         /// PoolWithFailover objects are stored in a cache inside PoolFactory.
         /// This cache is reset by ExternalDictionariesLoader after every SYSTEM RELOAD DICTIONAR{Y|IES}
         /// which triggers massive re-constructing of connection pools.
-        /// The state of PRNGs like std::mt19937 is considered to be quite heavy
-        /// thus here we attempt to optimize its construction.
-        static thread_local std::mt19937 rnd_generator(static_cast<uint_fast32_t>(
-                std::hash<std::thread::id>{}(std::this_thread::get_id()) + std::clock()));
+        static thread_local pcg64_fast rnd_generator(randomSeed());
         for (auto & [_, replicas] : replicas_by_priority)
         {
             if (replicas.size() > 1)
diff --git a/src/Common/randomSeed.cpp b/src/Common/randomSeed.cpp
index e1aa56fa811..e10ef87283f 100644
--- a/src/Common/randomSeed.cpp
+++ b/src/Common/randomSeed.cpp
@@ -13,10 +13,10 @@
 
 namespace DB
 {
-    namespace ErrorCodes
-    {
-        extern const int CANNOT_CLOCK_GETTIME;
-    }
+namespace ErrorCodes
+{
+    extern const int CANNOT_CLOCK_GETTIME;
+}
 }
 
 
diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp
index 0737e523ba0..76dc5f824e3 100644
--- a/src/Compression/CompressionCodecDeflateQpl.cpp
+++ b/src/Compression/CompressionCodecDeflateQpl.cpp
@@ -6,6 +6,7 @@
 #include <Compression/CompressionFactory.h>
 #include <Compression/CompressionInfo.h>
 #include <Poco/Logger.h>
+#include <Common/randomSeed.h>
 #include <Common/logger_useful.h>
 #include "libaccel_config.h"
 #include <Common/MemorySanitizer.h>
@@ -29,7 +30,7 @@ DeflateQplJobHWPool & DeflateQplJobHWPool::instance()
 
 DeflateQplJobHWPool::DeflateQplJobHWPool()
     : max_hw_jobs(0)
-    , random_engine(std::random_device()())
+    , random_engine(randomSeed())
 {
     Poco::Logger * log = &Poco::Logger::get("DeflateQplJobHWPool");
     const char * qpl_version = qpl_get_library_version();
diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h
index 8d73568707e..e0ec791dfe5 100644
--- a/src/Compression/CompressionCodecDeflateQpl.h
+++ b/src/Compression/CompressionCodecDeflateQpl.h
@@ -3,6 +3,7 @@
 #include <Compression/ICompressionCodec.h>
 #include <map>
 #include <random>
+#include <pcg_random.hpp>
 #include <qpl/qpl.h>
 
 namespace Poco
@@ -41,7 +42,7 @@ private:
     std::unique_ptr<std::atomic_bool[]> hw_job_ptr_locks;
 
     bool job_pool_ready;
-    std::mt19937 random_engine;
+    pcg64_fast random_engine;
     std::uniform_int_distribution<int> distribution;
 };
 
diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
index 3d10e66e964..dc81a94202d 100644
--- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
+++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
@@ -26,12 +26,14 @@
 #include <Interpreters/executeQuery.h>
 #include <Storages/StorageMergeTree.h>
 #include <Common/quoteString.h>
+#include <Common/randomSeed.h>
 #include <Common/setThreadName.h>
 #include <base/sleep.h>
 #include <boost/algorithm/string/split.hpp>
 #include <boost/algorithm/string/trim.hpp>
 #include <Parsers/CommonParsers.h>
 #include <Parsers/ASTIdentifier.h>
+#include <pcg_random.hpp>
 
 namespace DB
 {
@@ -428,9 +430,8 @@ static inline void dumpDataForTables(
 
 static inline UInt32 randomNumber()
 {
-    std::mt19937 rng;
-    rng.seed(std::random_device()());
-    std::uniform_int_distribution<std::mt19937::result_type> dist6(
+    pcg64_fast rng{randomSeed()};
+    std::uniform_int_distribution<pcg64_fast::result_type> dist6(
         std::numeric_limits<UInt32>::min(), std::numeric_limits<UInt32>::max());
     return static_cast<UInt32>(dist6(rng));
 }
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 82c3d48bc05..78c708b96da 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -4,6 +4,7 @@
 #include <Common/isLocalAddress.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/parseAddress.h>
+#include <Common/randomSeed.h>
 #include <Common/Config/AbstractConfigurationComparison.h>
 #include <Common/Config/ConfigHelper.h>
 #include <Core/Settings.h>
@@ -16,6 +17,7 @@
 #include <boost/range/algorithm_ext/erase.hpp>
 
 #include <span>
+#include <pcg_random.hpp>
 
 namespace DB
 {
@@ -660,8 +662,7 @@ namespace
 
 void shuffleReplicas(std::vector<Cluster::Address> & replicas, const Settings & settings, size_t replicas_needed)
 {
-    std::random_device rd;
-    std::mt19937 gen{rd()};
+    pcg64_fast gen{randomSeed()};
 
     if (settings.prefer_localhost_replica)
     {
diff --git a/src/Interpreters/examples/string_hash_map.cpp b/src/Interpreters/examples/string_hash_map.cpp
index f55ed983fbc..58bf7858b39 100644
--- a/src/Interpreters/examples/string_hash_map.cpp
+++ b/src/Interpreters/examples/string_hash_map.cpp
@@ -14,24 +14,25 @@
 
 /**
 
+#include <Common/randomSeed.h>
 #include <fstream>
 #include <random>
+#include <pcg_random.hpp>
 
 using namespace std;
 
 int main()
 {
     std::string s;
-    std::random_device dev;
-    std::mt19937 rng(dev());
-    std::uniform_int_distribution<std::mt19937::result_type> dist(0, 25);
-    std::binomial_distribution<std::mt19937::result_type> binomial1(100, 0.01);
-    std::binomial_distribution<std::mt19937::result_type> binomial2(100, 0.02);
-    std::binomial_distribution<std::mt19937::result_type> binomial4(100, 0.04);
-    std::binomial_distribution<std::mt19937::result_type> binomial8(100, 0.08);
-    std::binomial_distribution<std::mt19937::result_type> binomial16(100, 0.16);
-    std::binomial_distribution<std::mt19937::result_type> binomial24(100, 0.24);
-    std::binomial_distribution<std::mt19937::result_type> binomial48(100, 0.48);
+    pcg64_fast rng{randomSeed()};
+    std::uniform_int_distribution<pcg64_fast::result_type> dist(0, 25);
+    std::binomial_distribution<pcg64_fast::result_type> binomial1(100, 0.01);
+    std::binomial_distribution<pcg64_fast::result_type> binomial2(100, 0.02);
+    std::binomial_distribution<pcg64_fast::result_type> binomial4(100, 0.04);
+    std::binomial_distribution<pcg64_fast::result_type> binomial8(100, 0.08);
+    std::binomial_distribution<pcg64_fast::result_type> binomial16(100, 0.16);
+    std::binomial_distribution<pcg64_fast::result_type> binomial24(100, 0.24);
+    std::binomial_distribution<pcg64_fast::result_type> binomial48(100, 0.48);
     // 11GB
     std::ofstream f("/tmp/terms.csv");
     size_t l1, l2, l4, l8, l16, l24, l48;
diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp
index 7b078154252..3956f795657 100644
--- a/src/Server/PostgreSQLHandler.cpp
+++ b/src/Server/PostgreSQLHandler.cpp
@@ -7,9 +7,10 @@
 #include "PostgreSQLHandler.h"
 #include <Parsers/parseQuery.h>
 #include <Server/TCPServer.h>
+#include <Common/randomSeed.h>
 #include <Common/setThreadName.h>
 #include <base/scope_guard.h>
-#include <random>
+#include <pcg_random.hpp>
 
 #include "config_version.h"
 
@@ -284,8 +285,7 @@ void PostgreSQLHandler::processQuery()
         if (!parse_res.second)
             throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse and execute the following part of query: {}", String(parse_res.first));
 
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        pcg64_fast gen{randomSeed()};
         std::uniform_int_distribution<Int32> dis(0, INT32_MAX);
 
         for (const auto & spl_query : queries)
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index b728602ef40..b8412e29313 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -16,404 +16,409 @@ LC_ALL="en_US.UTF-8"
 ROOT_PATH=$(git rev-parse --show-toplevel)
 EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'
 
-# From [1]:
-#     But since array_to_string_internal() in array.c still loops over array
-#     elements and concatenates them into a string, it's probably not more
-#     efficient than the looping solutions proposed, but it's more readable.
+# # From [1]:
+# #     But since array_to_string_internal() in array.c still loops over array
+# #     elements and concatenates them into a string, it's probably not more
+# #     efficient than the looping solutions proposed, but it's more readable.
+# #
+# #  [1]: https://stackoverflow.com/a/15394738/328260
+# function in_array()
+# {
+#     local IFS="|"
+#     local value=$1 && shift
 #
-#  [1]: https://stackoverflow.com/a/15394738/328260
-function in_array()
-{
-    local IFS="|"
-    local value=$1 && shift
-
-    [[ "${IFS}${*}${IFS}" =~ "${IFS}${value}${IFS}" ]]
-}
-
-find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' |
-# a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces
-    grep -v -P '(//|:\s+\*|\$\(\()| \)"'
-# single-line comment | continuation of a multiline comment | a typical piece of embedded shell code | something like ending of raw string literal
-
-# Tabs
-find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep $@ -F $'\t'
-
-# // namespace comments are unneeded
-find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep $@ -P '}\s*//+\s*namespace\s*'
-
-# Broken symlinks
-find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found"
-
-# Double whitespaces
-find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
-    grep -vP $EXCLUDE_DIRS |
-    while read i; do $ROOT_PATH/utils/check-style/double-whitespaces.pl < $i || echo -e "^ File $i contains double whitespaces\n"; done
-
-# Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics
-declare -A EXTERN_TYPES
-EXTERN_TYPES[ErrorCodes]=int
-EXTERN_TYPES[ProfileEvents]=Event
-EXTERN_TYPES[CurrentMetrics]=Metric
-
-EXTERN_TYPES_EXCLUDES=(
-    ProfileEvents::global_counters
-    ProfileEvents::Event
-    ProfileEvents::Count
-    ProfileEvents::Counters
-    ProfileEvents::end
-    ProfileEvents::increment
-    ProfileEvents::incrementForLogMessage
-    ProfileEvents::getName
-    ProfileEvents::Type
-    ProfileEvents::TypeEnum
-    ProfileEvents::dumpToMapColumn
-    ProfileEvents::getProfileEvents
-    ProfileEvents::ThreadIdToCountersSnapshot
-    ProfileEvents::LOCAL_NAME
-    ProfileEvents::CountersIncrement
-
-    CurrentMetrics::add
-    CurrentMetrics::sub
-    CurrentMetrics::get
-    CurrentMetrics::set
-    CurrentMetrics::end
-    CurrentMetrics::Increment
-    CurrentMetrics::Metric
-    CurrentMetrics::values
-    CurrentMetrics::Value
-
-    ErrorCodes::ErrorCode
-    ErrorCodes::getName
-    ErrorCodes::increment
-    ErrorCodes::end
-    ErrorCodes::values
-    ErrorCodes::values[i]
-    ErrorCodes::getErrorCodeByName
-)
-for extern_type in ${!EXTERN_TYPES[@]}; do
-    type_of_extern=${EXTERN_TYPES[$extern_type]}
-    allowed_chars='[_A-Za-z]+'
-
-    # Unused
-    # NOTE: to fix automatically, replace echo with:
-    # sed -i "/extern const $type_of_extern $val/d" $file
-    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
-        # NOTE: the check is pretty dumb and distinguish only by the type_of_extern,
-        # and this matches with zkutil::CreateMode
-        grep -v 'src/Common/ZooKeeper/Types.h'
-    } | {
-        grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
-    } | while read file; do
-        grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do
-            if ! grep -q "$extern_type::$val" $file; then
-                # Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp
-                if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then
-                    echo "$extern_type::$val is defined but not used in file $file"
-                fi
-            fi
-        done
-    done
-
-    # Undefined
-    # NOTE: to fix automatically, replace echo with:
-    # ( grep -q -F 'namespace $extern_type' $file && \
-    #   sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \
-    #     awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n    extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
-    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
-        grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
-    } | while read file; do
-        grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do
-            if ! grep -q "extern const $type_of_extern $val" $file; then
-                if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then
-                    echo "$extern_type::$val is used in file $file but not defined"
-                fi
-            fi
-        done
-    done
-
-    # Duplicates
-    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
-        grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
-    } | while read file; do
-        grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
-    done
-done
-
-# Three or more consecutive empty lines
-find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
-    grep -vP $EXCLUDE_DIRS |
-    while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done
-
-# Broken XML files (requires libxml2-utils)
-find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
-    grep -vP $EXCLUDE_DIRS |
-    xargs xmllint --noout --nonet
-
-# FIXME: for now only clickhouse-test
-pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py
-
-find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
-    grep -vP $EXCLUDE_DIRS |
-    xargs yamllint --config-file=$ROOT_PATH/.yamllint
-
-# Tests should not be named with "fail" in their names. It makes looking at the results less convenient.
-find $ROOT_PATH/tests/queries -iname '*fail*' |
-    grep . && echo 'Tests should not be named with "fail" in their names. It makes looking at the results less convenient when you search for "fail" substring in browser.'
-
-# Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition
-# NOTE: it is not that accurate, but at least something.
-tests_with_query_log=( $(
-    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-        xargs grep --with-filename -e system.query_log -e system.query_thread_log | cut -d: -f1 | sort -u
-) )
-for test_case in "${tests_with_query_log[@]}"; do
-    grep -qE current_database.*currentDatabase "$test_case" || {
-        grep -qE 'current_database.*\$CLICKHOUSE_DATABASE' "$test_case"
-    } || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case"
-done
-
-# There shouldn't be large jumps between test numbers (since they should be consecutive)
-max_diff=$(
-    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-          grep -oP '\d+\D+\K\d+' | sort -n -u | awk 's{print ($0-s) " diff " s " and " $0 }{s=$0}' | sort -n | tail -n 1
-)
-max_diff_value=( $(echo $max_diff) )
-if [[ $max_diff_value -ge 100 ]];
-then
-    echo "Too big of a difference between test numbers: $max_diff"
-fi
-
-# Queries to:
-tables_with_database_column=(
-    system.tables
-    system.parts
-    system.detached_parts
-    system.parts_columns
-    system.columns
-    system.projection_parts
-    system.mutations
-)
-# should have database = currentDatabase() condition
+#     [[ "${IFS}${*}${IFS}" =~ "${IFS}${value}${IFS}" ]]
+# }
 #
-# NOTE: it is not that accuate, but at least something.
-tests_with_database_column=( $(
-    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-        xargs grep --with-filename $(printf -- "-e %s " "${tables_with_database_column[@]}") |
-        grep -v -e ':--' -e ':#' |
-        cut -d: -f1 | sort -u
-) )
-for test_case in "${tests_with_database_column[@]}"; do
-    grep -qE database.*currentDatabase "$test_case" || {
-        grep -qE 'database.*\$CLICKHOUSE_DATABASE' "$test_case"
-    } || {
-        # explicit database
-        grep -qE "database[ ]*=[ ]*'" "$test_case"
-    } || {
-        echo "Queries to ${tables_with_database_column[*]} does not have database = currentDatabase()/\$CLICKHOUSE_DATABASE condition in $test_case"
-    }
-done
+# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' |
+# # a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces
+#     grep -v -P '(//|:\s+\*|\$\(\()| \)"'
+# # single-line comment | continuation of a multiline comment | a typical piece of embedded shell code | something like ending of raw string literal
+#
+# # Tabs
+# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs grep $@ -F $'\t'
+#
+# # // namespace comments are unneeded
+# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs grep $@ -P '}\s*//+\s*namespace\s*'
+#
+# # Broken symlinks
+# find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found"
+#
+# # Double whitespaces
+# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+#     grep -vP $EXCLUDE_DIRS |
+#     while read i; do $ROOT_PATH/utils/check-style/double-whitespaces.pl < $i || echo -e "^ File $i contains double whitespaces\n"; done
+#
+# # Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics
+# declare -A EXTERN_TYPES
+# EXTERN_TYPES[ErrorCodes]=int
+# EXTERN_TYPES[ProfileEvents]=Event
+# EXTERN_TYPES[CurrentMetrics]=Metric
+#
+# EXTERN_TYPES_EXCLUDES=(
+#     ProfileEvents::global_counters
+#     ProfileEvents::Event
+#     ProfileEvents::Count
+#     ProfileEvents::Counters
+#     ProfileEvents::end
+#     ProfileEvents::increment
+#     ProfileEvents::incrementForLogMessage
+#     ProfileEvents::getName
+#     ProfileEvents::Type
+#     ProfileEvents::TypeEnum
+#     ProfileEvents::dumpToMapColumn
+#     ProfileEvents::getProfileEvents
+#     ProfileEvents::ThreadIdToCountersSnapshot
+#     ProfileEvents::LOCAL_NAME
+#     ProfileEvents::CountersIncrement
+#
+#     CurrentMetrics::add
+#     CurrentMetrics::sub
+#     CurrentMetrics::get
+#     CurrentMetrics::set
+#     CurrentMetrics::end
+#     CurrentMetrics::Increment
+#     CurrentMetrics::Metric
+#     CurrentMetrics::values
+#     CurrentMetrics::Value
+#
+#     ErrorCodes::ErrorCode
+#     ErrorCodes::getName
+#     ErrorCodes::increment
+#     ErrorCodes::end
+#     ErrorCodes::values
+#     ErrorCodes::values[i]
+#     ErrorCodes::getErrorCodeByName
+# )
+# for extern_type in ${!EXTERN_TYPES[@]}; do
+#     type_of_extern=${EXTERN_TYPES[$extern_type]}
+#     allowed_chars='[_A-Za-z]+'
+#
+#     # Unused
+#     # NOTE: to fix automatically, replace echo with:
+#     # sed -i "/extern const $type_of_extern $val/d" $file
+#     find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
+#         # NOTE: the check is pretty dumb and distinguish only by the type_of_extern,
+#         # and this matches with zkutil::CreateMode
+#         grep -v 'src/Common/ZooKeeper/Types.h'
+#     } | {
+#         grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
+#     } | while read file; do
+#         grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do
+#             if ! grep -q "$extern_type::$val" $file; then
+#                 # Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp
+#                 if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then
+#                     echo "$extern_type::$val is defined but not used in file $file"
+#                 fi
+#             fi
+#         done
+#     done
+#
+#     # Undefined
+#     # NOTE: to fix automatically, replace echo with:
+#     # ( grep -q -F 'namespace $extern_type' $file && \
+#     #   sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \
+#     #     awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n    extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
+#     find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
+#         grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
+#     } | while read file; do
+#         grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do
+#             if ! grep -q "extern const $type_of_extern $val" $file; then
+#                 if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then
+#                     echo "$extern_type::$val is used in file $file but not defined"
+#                 fi
+#             fi
+#         done
+#     done
+#
+#     # Duplicates
+#     find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
+#         grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
+#     } | while read file; do
+#         grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
+#     done
+# done
+#
+# # Three or more consecutive empty lines
+# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+#     grep -vP $EXCLUDE_DIRS |
+#     while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done
+#
+# # Broken XML files (requires libxml2-utils)
+# find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs xmllint --noout --nonet
+#
+# # FIXME: for now only clickhouse-test
+# pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py
+#
+# find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs yamllint --config-file=$ROOT_PATH/.yamllint
+#
+# # Tests should not be named with "fail" in their names. It makes looking at the results less convenient.
+# find $ROOT_PATH/tests/queries -iname '*fail*' |
+#     grep . && echo 'Tests should not be named with "fail" in their names. It makes looking at the results less convenient when you search for "fail" substring in browser.'
+#
+# # Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition
+# # NOTE: it is not that accurate, but at least something.
+# tests_with_query_log=( $(
+#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+#         xargs grep --with-filename -e system.query_log -e system.query_thread_log | cut -d: -f1 | sort -u
+# ) )
+# for test_case in "${tests_with_query_log[@]}"; do
+#     grep -qE current_database.*currentDatabase "$test_case" || {
+#         grep -qE 'current_database.*\$CLICKHOUSE_DATABASE' "$test_case"
+#     } || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case"
+# done
+#
+# # There shouldn't be large jumps between test numbers (since they should be consecutive)
+# max_diff=$(
+#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+#           grep -oP '\d+\D+\K\d+' | sort -n -u | awk 's{print ($0-s) " diff " s " and " $0 }{s=$0}' | sort -n | tail -n 1
+# )
+# max_diff_value=( $(echo $max_diff) )
+# if [[ $max_diff_value -ge 100 ]];
+# then
+#     echo "Too big of a difference between test numbers: $max_diff"
+# fi
+#
+# # Queries to:
+# tables_with_database_column=(
+#     system.tables
+#     system.parts
+#     system.detached_parts
+#     system.parts_columns
+#     system.columns
+#     system.projection_parts
+#     system.mutations
+# )
+# # should have database = currentDatabase() condition
+# #
+# # NOTE: it is not that accuate, but at least something.
+# tests_with_database_column=( $(
+#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+#         xargs grep --with-filename $(printf -- "-e %s " "${tables_with_database_column[@]}") |
+#         grep -v -e ':--' -e ':#' |
+#         cut -d: -f1 | sort -u
+# ) )
+# for test_case in "${tests_with_database_column[@]}"; do
+#     grep -qE database.*currentDatabase "$test_case" || {
+#         grep -qE 'database.*\$CLICKHOUSE_DATABASE' "$test_case"
+#     } || {
+#         # explicit database
+#         grep -qE "database[ ]*=[ ]*'" "$test_case"
+#     } || {
+#         echo "Queries to ${tables_with_database_column[*]} does not have database = currentDatabase()/\$CLICKHOUSE_DATABASE condition in $test_case"
+#     }
+# done
+#
+# # Queries with ReplicatedMergeTree
+# # NOTE: it is not that accuate, but at least something.
+# tests_with_replicated_merge_tree=( $(
+#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+#         xargs grep --with-filename -e "Replicated.*MergeTree[ ]*(.*" | cut -d: -f1 | sort -u
+# ) )
+# for test_case in "${tests_with_replicated_merge_tree[@]}"; do
+#     case "$test_case" in
+#         *.gen.*)
+#             ;;
+#         *.sh)
+#             test_case_zk_prefix="\(\$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX\|{database}\)"
+#             grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
+#             ;;
+#         *.sql|*.sql.j2)
+#             test_case_zk_prefix="\({database}\|currentDatabase()\|{uuid}\|{default_path_test}\)"
+#             grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
+#             ;;
+#         *.py)
+#             # Right now there is not such tests anyway
+#             echo "No ReplicatedMergeTree style check for *.py ($test_case)"
+#             ;;
+#     esac
+# done
+#
+# # All the submodules should be from https://github.com/
+# find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | grep -v -F 'https://github.com/' && echo 'All the submodules should be from https://github.com/'; done
+#
+# # There shouldn't be any code snippets under GPL or LGPL
+# find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL"
+#
+# # There shouldn't be any docker containers outside docker directory
+# find $ROOT_PATH -not -path $ROOT_PATH'/tests/ci*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:"
+#
+# # There shouldn't be any docker compose files outside docker directory
+# find $ROOT_PATH -name '*compose*.yml' -type f -not -path $ROOT_PATH'/docker' -not -path $ROOT_PATH'/tests/integration*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' 2>/dev/null | grep -vP $EXCLUDE_DIRS | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to the 'docker' or 'tests' directory:"
+#
+# # Check that every header file has #pragma once in first line
+# find $ROOT_PATH/{src,programs,utils} -name '*.h' |
+#     grep -vP $EXCLUDE_DIRS |
+#     while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
+#
+# # Check for executable bit on non-executable files
+# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
+#
+# # Check for BOM
+# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
+# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
+# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
+#
+# # Too many exclamation marks
+# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
+#
+# # Trailing whitespaces
+# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces."
+#
+# # Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
+# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
+#
+# # Forbid std::cerr/std::cout in src (fine in programs/utils)
+# std_cerr_cout_excludes=(
+#     /examples/
+#     /tests/
+#     _fuzzer
+#     # DUMP()
+#     base/base/iostream_debug_helpers.h
+#     # OK
+#     src/Common/ProgressIndication.cpp
+#     # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests
+#     src/Common/HashTable/HashTable.h
+#     # SensitiveDataMasker::printStats()
+#     src/Common/SensitiveDataMasker.cpp
+#     # StreamStatistics::print()
+#     src/Compression/LZ4_decompress_faster.cpp
+#     # ContextSharedPart with subsequent std::terminate()
+#     src/Interpreters/Context.cpp
+#     # IProcessor::dump()
+#     src/Processors/IProcessor.cpp
+#     src/Client/ClientBase.cpp
+#     src/Client/LineReader.cpp
+#     src/Client/QueryFuzzer.cpp
+#     src/Client/Suggest.cpp
+#     src/Bridge/IBridge.cpp
+#     src/Daemon/BaseDaemon.cpp
+#     src/Loggers/Loggers.cpp
+# )
+# sources_with_std_cerr_cout=( $(
+#     find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \
+#         grep -vP $EXCLUDE_DIRS | \
+#         grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \
+#         xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u
+# ) )
+# # Exclude comments
+# for src in "${sources_with_std_cerr_cout[@]}"; do
+#     # suppress stderr, since it may contain warning for #pargma once in headers
+#     if gcc -fpreprocessed -dD -E "$src" 2>/dev/null | grep -F -q -e std::cerr -e std::cout; then
+#         echo "$src: uses std::cerr/std::cout"
+#     fi
+# done
+#
+# # Queries with event_date should have yesterday() not today()
+# #
+# # NOTE: it is not that accuate, but at least something.
+# tests_with_event_time_date=( $(
+#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+#         grep -vP $EXCLUDE_DIRS |
+#         xargs grep --with-filename -e event_time -e event_date | cut -d: -f1 | sort -u
+# ) )
+# for test_case in "${tests_with_event_time_date[@]}"; do
+#     cat "$test_case" | tr '\n' ' ' | grep -q -i -e 'WHERE.*event_date[ ]*=[ ]*today()' -e 'WHERE.*event_date[ ]*=[ ]*today()' && {
+#         echo "event_time/event_date should be filtered using >=yesterday() in $test_case (to avoid flakiness)"
+#     }
+# done
+#
+# expect_tests=( $(find $ROOT_PATH/tests/queries -name '*.expect') )
+# for test_case in "${expect_tests[@]}"; do
+#     pattern="^exp_internal -f \$env(CLICKHOUSE_TMP)/\$basename.debuglog 0$"
+#     grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+#
+#     if grep -q "^spawn.*CLICKHOUSE_CLIENT_BINARY$" "$test_case"; then
+#         pattern="^spawn.*CLICKHOUSE_CLIENT_BINARY.*--history_file$"
+#         grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+#     fi
+#
+#     # Otherwise expect_after/expect_before will not bail without stdin attached
+#     # (and actually this is a hack anyway, correct way is to use $any_spawn_id)
+#     pattern="-i \$any_spawn_id timeout"
+#     grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+#     pattern="-i \$any_spawn_id eof"
+#     grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+# done
+#
+# # Conflict markers
+# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
+#     xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
+#
+# # Forbid subprocess.check_call(...) in integration tests because it does not provide enough information on errors
+# find $ROOT_PATH'/tests/integration' -name '*.py' |
+#     xargs grep -F 'subprocess.check_call' | grep -v "STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL" && echo "Use helpers.cluster.run_and_check or subprocess.run instead of subprocess.check_call to print detailed info on error"
+#
+# # Forbid non-unique error codes
+# if [[ "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | wc -l)" != "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | sort | uniq | wc -l)" ]]
+# then
+#     echo "ErrorCodes.cpp contains non-unique error codes"
+# fi
+#
+# # Check that there is no system-wide libraries/headers in use.
+# #
+# # NOTE: it is better to override find_path/find_library in cmake, but right now
+# # it is not possible, see [1] for the reference.
+# #
+# #   [1]: git grep --recurse-submodules -e find_library -e find_path contrib
+# if git grep -e find_path -e find_library -- :**CMakeLists.txt; then
+#     echo "There is find_path/find_library usage. ClickHouse should use everything bundled. Consider adding one more contrib module."
+# fi
+#
+# # Forbid files that differ only by character case
+# find $ROOT_PATH | sort -f | uniq -i -c | awk '{ if ($1 > 1) print }'
+#
+# # Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly
+# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead"
+#
+# # Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable
+# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead"
 
-# Queries with ReplicatedMergeTree
-# NOTE: it is not that accuate, but at least something.
-tests_with_replicated_merge_tree=( $(
-    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-        xargs grep --with-filename -e "Replicated.*MergeTree[ ]*(.*" | cut -d: -f1 | sort -u
-) )
-for test_case in "${tests_with_replicated_merge_tree[@]}"; do
-    case "$test_case" in
-        *.gen.*)
-            ;;
-        *.sh)
-            test_case_zk_prefix="\(\$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX\|{database}\)"
-            grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
-            ;;
-        *.sql|*.sql.j2)
-            test_case_zk_prefix="\({database}\|currentDatabase()\|{uuid}\|{default_path_test}\)"
-            grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
-            ;;
-        *.py)
-            # Right now there is not such tests anyway
-            echo "No ReplicatedMergeTree style check for *.py ($test_case)"
-            ;;
-    esac
-done
-
-# All the submodules should be from https://github.com/
-find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | grep -v -F 'https://github.com/' && echo 'All the submodules should be from https://github.com/'; done
-
-# There shouldn't be any code snippets under GPL or LGPL
-find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL"
-
-# There shouldn't be any docker containers outside docker directory
-find $ROOT_PATH -not -path $ROOT_PATH'/tests/ci*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:"
-
-# There shouldn't be any docker compose files outside docker directory
-find $ROOT_PATH -name '*compose*.yml' -type f -not -path $ROOT_PATH'/docker' -not -path $ROOT_PATH'/tests/integration*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' 2>/dev/null | grep -vP $EXCLUDE_DIRS | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to the 'docker' or 'tests' directory:"
-
-# Check that every header file has #pragma once in first line
-find $ROOT_PATH/{src,programs,utils} -name '*.h' |
-    grep -vP $EXCLUDE_DIRS |
-    while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
-
-# Check for executable bit on non-executable files
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
-
-# Check for BOM
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
-
-# Too many exclamation marks
-find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
-
-# Trailing whitespaces
-find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces."
-
-# Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
+# Forbid mt19937() and random_device() which are outdated and slow
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
     grep -vP $EXCLUDE_DIRS |
-    xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
+    xargs grep -P '(std::mt19937|std::mersenne_twister_engine|std::random_device)' && echo "Use pcg64_fast (from pcg_random.h) and randomSeed (from Common/randomSeed.h) instead"
 
-# Forbid std::cerr/std::cout in src (fine in programs/utils)
-std_cerr_cout_excludes=(
-    /examples/
-    /tests/
-    _fuzzer
-    # DUMP()
-    base/base/iostream_debug_helpers.h
-    # OK
-    src/Common/ProgressIndication.cpp
-    # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests
-    src/Common/HashTable/HashTable.h
-    # SensitiveDataMasker::printStats()
-    src/Common/SensitiveDataMasker.cpp
-    # StreamStatistics::print()
-    src/Compression/LZ4_decompress_faster.cpp
-    # ContextSharedPart with subsequent std::terminate()
-    src/Interpreters/Context.cpp
-    # IProcessor::dump()
-    src/Processors/IProcessor.cpp
-    src/Client/ClientBase.cpp
-    src/Client/LineReader.cpp
-    src/Client/QueryFuzzer.cpp
-    src/Client/Suggest.cpp
-    src/Bridge/IBridge.cpp
-    src/Daemon/BaseDaemon.cpp
-    src/Loggers/Loggers.cpp
-)
-sources_with_std_cerr_cout=( $(
-    find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \
-        grep -vP $EXCLUDE_DIRS | \
-        grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \
-        xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u
-) )
-# Exclude comments
-for src in "${sources_with_std_cerr_cout[@]}"; do
-    # suppress stderr, since it may contain warning for #pargma once in headers
-    if gcc -fpreprocessed -dD -E "$src" 2>/dev/null | grep -F -q -e std::cerr -e std::cout; then
-        echo "$src: uses std::cerr/std::cout"
-    fi
-done
-
-# Queries with event_date should have yesterday() not today()
+# # Require checking return value of close(),
+# # since it can hide fd misuse and break other places.
+# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+#     grep -vP $EXCLUDE_DIRS |
+#     xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked"
 #
-# NOTE: it is not that accuate, but at least something.
-tests_with_event_time_date=( $(
-    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-        grep -vP $EXCLUDE_DIRS |
-        xargs grep --with-filename -e event_time -e event_date | cut -d: -f1 | sort -u
-) )
-for test_case in "${tests_with_event_time_date[@]}"; do
-    cat "$test_case" | tr '\n' ' ' | grep -q -i -e 'WHERE.*event_date[ ]*=[ ]*today()' -e 'WHERE.*event_date[ ]*=[ ]*today()' && {
-        echo "event_time/event_date should be filtered using >=yesterday() in $test_case (to avoid flakiness)"
-    }
-done
-
-expect_tests=( $(find $ROOT_PATH/tests/queries -name '*.expect') )
-for test_case in "${expect_tests[@]}"; do
-    pattern="^exp_internal -f \$env(CLICKHOUSE_TMP)/\$basename.debuglog 0$"
-    grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
-
-    if grep -q "^spawn.*CLICKHOUSE_CLIENT_BINARY$" "$test_case"; then
-        pattern="^spawn.*CLICKHOUSE_CLIENT_BINARY.*--history_file$"
-        grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
-    fi
-
-    # Otherwise expect_after/expect_before will not bail without stdin attached
-    # (and actually this is a hack anyway, correct way is to use $any_spawn_id)
-    pattern="-i \$any_spawn_id timeout"
-    grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
-    pattern="-i \$any_spawn_id eof"
-    grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
-done
-
-# Conflict markers
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
-    xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
-
-# Forbid subprocess.check_call(...) in integration tests because it does not provide enough information on errors
-find $ROOT_PATH'/tests/integration' -name '*.py' |
-    xargs grep -F 'subprocess.check_call' | grep -v "STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL" && echo "Use helpers.cluster.run_and_check or subprocess.run instead of subprocess.check_call to print detailed info on error"
-
-# Forbid non-unique error codes
-if [[ "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | wc -l)" != "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | sort | uniq | wc -l)" ]]
-then
-    echo "ErrorCodes.cpp contains non-unique error codes"
-fi
-
-# Check that there is no system-wide libraries/headers in use.
+# # Check for existence of __init__.py files
+# for i in "${ROOT_PATH}"/tests/integration/test_*; do FILE="${i}/__init__.py"; [ ! -f "${FILE}" ] && echo "${FILE} should exist for every integration test"; done
 #
-# NOTE: it is better to override find_path/find_library in cmake, but right now
-# it is not possible, see [1] for the reference.
+# # A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647
+# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -l -F '#ifdef NDEBUG' | xargs -I@FILE awk '/#ifdef NDEBUG/ { inside = 1; dirty = 1 } /#endif/ { if (inside && dirty) { print "File @FILE has suspicious #ifdef NDEBUG, possibly confused with #ifndef NDEBUG" }; inside = 0 } /#else/ { dirty = 0 }' @FILE
 #
-#   [1]: git grep --recurse-submodules -e find_library -e find_path contrib
-if git grep -e find_path -e find_library -- :**CMakeLists.txt; then
-    echo "There is find_path/find_library usage. ClickHouse should use everything bundled. Consider adding one more contrib module."
-fi
-
-# Forbid files that differ only by character case
-find $ROOT_PATH | sort -f | uniq -i -c | awk '{ if ($1 > 1) print }'
-
-# Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly
-find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead"
-
-# Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable
-find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead"
-
-# Require checking return value of close(),
-# since it can hide fd misuse and break other places.
-find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked"
-
-# Check for existence of __init__.py files
-for i in "${ROOT_PATH}"/tests/integration/test_*; do FILE="${i}/__init__.py"; [ ! -f "${FILE}" ] && echo "${FILE} should exist for every integration test"; done
-
-# A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647
-find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -l -F '#ifdef NDEBUG' | xargs -I@FILE awk '/#ifdef NDEBUG/ { inside = 1; dirty = 1 } /#endif/ { if (inside && dirty) { print "File @FILE has suspicious #ifdef NDEBUG, possibly confused with #ifndef NDEBUG" }; inside = 0 } /#else/ { dirty = 0 }' @FILE
-
-# If a user is doing dynamic or typeid cast with a pointer, and immediately dereferencing it, it is unsafe.
-find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep --line-number -P '(dynamic|typeid)_cast<[^>]+\*>\([^\(\)]+\)->' | grep -P '.' && echo "It's suspicious when you are doing a dynamic_cast or typeid_cast with a pointer and immediately dereferencing it. Use references instead of pointers or check a pointer to nullptr."
-
-# The stateful directory should only contain the tests that depend on the test dataset (hits or visits).
-find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."'
-
-# Check for bad punctuation: whitespace before comma.
-find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"
-
-# Cyrillic characters hiding inside Latin.
-find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."
+# # If a user is doing dynamic or typeid cast with a pointer, and immediately dereferencing it, it is unsafe.
+# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep --line-number -P '(dynamic|typeid)_cast<[^>]+\*>\([^\(\)]+\)->' | grep -P '.' && echo "It's suspicious when you are doing a dynamic_cast or typeid_cast with a pointer and immediately dereferencing it. Use references instead of pointers or check a pointer to nullptr."
+#
+# # The stateful directory should only contain the tests that depend on the test dataset (hits or visits).
+# find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."'
+#
+# # Check for bad punctuation: whitespace before comma.
+# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"
+#
+# # Cyrillic characters hiding inside Latin.
+# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."

From 839562eeb3c616aeccb4cbc2cb827767c11f3a73 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 20:53:25 +0100
Subject: [PATCH 0238/1097] Added [[noreturn]]

---
 base/base/defines.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/base/defines.h b/base/base/defines.h
index 8c0b05d6181..dfe939ae7ed 100644
--- a/base/base/defines.h
+++ b/base/base/defines.h
@@ -119,7 +119,7 @@
         #include <base/types.h>
         namespace DB
         {
-            void abortOnFailedAssertion(const String & description);
+            [[noreturn]] void abortOnFailedAssertion(const String & description);
         }
         #define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
         #define UNREACHABLE() abort()

From 8cd1caab07af1a606dd31031adcd2f7516350d18 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 22:54:21 +0100
Subject: [PATCH 0239/1097] Remove C++ templates

---
 .../AggregateFunctionStatistics.cpp           | 377 +++++++++---------
 1 file changed, 197 insertions(+), 180 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.cpp b/src/AggregateFunctions/AggregateFunctionStatistics.cpp
index d841ad4c03d..e9d9b7409ca 100644
--- a/src/AggregateFunctions/AggregateFunctionStatistics.cpp
+++ b/src/AggregateFunctions/AggregateFunctionStatistics.cpp
@@ -55,14 +55,11 @@ bool areComparable(UInt64 a, UInt64 b)
   * Source: "Updating formulae and a pairwise algorithm for computing sample variances"
   * (Chan et al., Stanford University, 12.1979)
   */
-template <typename T, typename Op>
-class AggregateFunctionVarianceData
+struct AggregateFunctionVarianceData
 {
-public:
     void update(const IColumn & column, size_t row_num)
     {
-        T received = assert_cast<const ColumnVector<T> &>(column).getData()[row_num];
-        Float64 val = static_cast<Float64>(received);
+        Float64 val = column.getFloat64(row_num);
         Float64 delta = val - mean;
 
         ++count;
@@ -102,29 +99,84 @@ public:
         readBinary(m2, buf);
     }
 
-    void publish(IColumn & to) const
-    {
-        assert_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(m2, count));
-    }
-
-private:
     UInt64 count = 0;
     Float64 mean = 0.0;
     Float64 m2 = 0.0;
 };
 
+enum class VarKind
+{
+    varSampStable,
+    stddevSampStable,
+    varPopStable,
+    stddevPopStable,
+};
+
 /** The main code for the implementation of varSamp, stddevSamp, varPop, stddevPop.
   */
-template <typename T, typename Op>
 class AggregateFunctionVariance final
-    : public IAggregateFunctionDataHelper<AggregateFunctionVarianceData<T, Op>, AggregateFunctionVariance<T, Op>>
+    : public IAggregateFunctionDataHelper<AggregateFunctionVarianceData, AggregateFunctionVariance>
 {
-public:
-    explicit AggregateFunctionVariance(const DataTypePtr & arg)
-        : IAggregateFunctionDataHelper<AggregateFunctionVarianceData<T, Op>, AggregateFunctionVariance<T, Op>>({arg}, {}, std::make_shared<DataTypeFloat64>())
-    {}
+private:
+    VarKind kind;
 
-    String getName() const override { return Op::name; }
+    static Float64 getVarSamp(Float64 m2, UInt64 count)
+    {
+        if (count < 2)
+            return std::numeric_limits<Float64>::infinity();
+        else
+            return m2 / (count - 1);
+    }
+
+    static Float64 getStddevSamp(Float64 m2, UInt64 count)
+    {
+        return sqrt(getVarSamp(m2, count));
+    }
+
+    static Float64 getVarPop(Float64 m2, UInt64 count)
+    {
+        if (count == 0)
+            return std::numeric_limits<Float64>::infinity();
+        else if (count == 1)
+            return 0.0;
+        else
+            return m2 / count;
+    }
+
+    static Float64 getStddevPop(Float64 m2, UInt64 count)
+    {
+        return sqrt(getVarPop(m2, count));
+    }
+
+    Float64 getResult(ConstAggregateDataPtr __restrict place) const
+    {
+        const auto & data = this->data(place);
+        switch (kind)
+        {
+            case VarKind::varSampStable: return getVarSamp(data.m2, data.count);
+            case VarKind::stddevSampStable: return getStddevSamp(data.m2, data.count);
+            case VarKind::varPopStable: return getVarPop(data.m2, data.count);
+            case VarKind::stddevPopStable: return getStddevPop(data.m2, data.count);
+        }
+    }
+
+public:
+    explicit AggregateFunctionVariance(VarKind kind_, const DataTypePtr & arg)
+        : IAggregateFunctionDataHelper<AggregateFunctionVarianceData, AggregateFunctionVariance>({arg}, {}, std::make_shared<DataTypeFloat64>()),
+        kind(kind_)
+    {
+    }
+
+    String getName() const override
+    {
+        switch (kind)
+        {
+            case VarKind::varSampStable: return "varSampStable";
+            case VarKind::stddevSampStable: return "stddevSampStable";
+            case VarKind::varPopStable: return "varPopStable";
+            case VarKind::stddevPopStable: return "stddevPopStable";
+        }
+    }
 
     bool allocatesMemoryInArena() const override { return false; }
 
@@ -150,73 +202,17 @@ public:
 
     void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
-        this->data(place).publish(to);
+        assert_cast<ColumnFloat64 &>(to).getData().push_back(getResult(place));
     }
 };
 
-/** Implementing the varSamp function.
-  */
-struct AggregateFunctionVarSampImpl
-{
-    static constexpr auto name = "varSampStable";
-
-    static inline Float64 apply(Float64 m2, UInt64 count)
-    {
-        if (count < 2)
-            return std::numeric_limits<Float64>::infinity();
-        else
-            return m2 / (count - 1);
-    }
-};
-
-/** Implementing the stddevSamp function.
-  */
-struct AggregateFunctionStdDevSampImpl
-{
-    static constexpr auto name = "stddevSampStable";
-
-    static inline Float64 apply(Float64 m2, UInt64 count)
-    {
-        return sqrt(AggregateFunctionVarSampImpl::apply(m2, count));
-    }
-};
-
-/** Implementing the varPop function.
-  */
-struct AggregateFunctionVarPopImpl
-{
-    static constexpr auto name = "varPopStable";
-
-    static inline Float64 apply(Float64 m2, UInt64 count)
-    {
-        if (count == 0)
-            return std::numeric_limits<Float64>::infinity();
-        else if (count == 1)
-            return 0.0;
-        else
-            return m2 / count;
-    }
-};
-
-/** Implementing the stddevPop function.
-  */
-struct AggregateFunctionStdDevPopImpl
-{
-    static constexpr auto name = "stddevPopStable";
-
-    static inline Float64 apply(Float64 m2, UInt64 count)
-    {
-        return sqrt(AggregateFunctionVarPopImpl::apply(m2, count));
-    }
-};
 
 /** If `compute_marginal_moments` flag is set this class provides the successor
   * CovarianceData support of marginal moments for calculating the correlation.
   */
 template <bool compute_marginal_moments>
-class BaseCovarianceData
+struct BaseCovarianceData
 {
-protected:
     void incrementMarginalMoments(Float64, Float64) {}
     void mergeWith(const BaseCovarianceData &) {}
     void serialize(WriteBuffer &) const {}
@@ -224,9 +220,8 @@ protected:
 };
 
 template <>
-class BaseCovarianceData<true>
+struct BaseCovarianceData<true>
 {
-protected:
     void incrementMarginalMoments(Float64 left_incr, Float64 right_incr)
     {
         left_m2 += left_incr;
@@ -260,21 +255,17 @@ protected:
   * (J. Bennett et al., Sandia National Laboratories,
   *  2009 IEEE International Conference on Cluster Computing)
   */
-template <typename T, typename U, typename Op, bool compute_marginal_moments>
-class CovarianceData : public BaseCovarianceData<compute_marginal_moments>
+template <bool compute_marginal_moments>
+struct CovarianceData : public BaseCovarianceData<compute_marginal_moments>
 {
-private:
     using Base = BaseCovarianceData<compute_marginal_moments>;
 
-public:
     void update(const IColumn & column_left, const IColumn & column_right, size_t row_num)
     {
-        T left_received = assert_cast<const ColumnVector<T> &>(column_left).getData()[row_num];
-        Float64 left_val = static_cast<Float64>(left_received);
+        Float64 left_val = column_left.getFloat64(row_num);
         Float64 left_delta = left_val - left_mean;
 
-        U right_received = assert_cast<const ColumnVector<U> &>(column_right).getData()[row_num];
-        Float64 right_val = static_cast<Float64>(right_received);
+        Float64 right_val = column_right.getFloat64(row_num);
         Float64 right_delta = right_val - right_mean;
 
         Float64 old_right_mean = right_mean;
@@ -346,34 +337,87 @@ public:
         Base::deserialize(buf);
     }
 
-    void publish(IColumn & to) const
-    {
-        if constexpr (compute_marginal_moments)
-            assert_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(co_moment, Base::left_m2, Base::right_m2, count));
-        else
-            assert_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(co_moment, count));
-    }
-
-private:
     UInt64 count = 0;
     Float64 left_mean = 0.0;
     Float64 right_mean = 0.0;
     Float64 co_moment = 0.0;
 };
 
-template <typename T, typename U, typename Op, bool compute_marginal_moments = false>
+enum class CovarKind
+{
+    covarSampStable,
+    covarPopStable,
+    corrStable,
+};
+
+template <bool compute_marginal_moments>
 class AggregateFunctionCovariance final
     : public IAggregateFunctionDataHelper<
-        CovarianceData<T, U, Op, compute_marginal_moments>,
-        AggregateFunctionCovariance<T, U, Op, compute_marginal_moments>>
+        CovarianceData<compute_marginal_moments>,
+        AggregateFunctionCovariance<compute_marginal_moments>>
 {
-public:
-    explicit AggregateFunctionCovariance(const DataTypes & args) : IAggregateFunctionDataHelper<
-        CovarianceData<T, U, Op, compute_marginal_moments>,
-        AggregateFunctionCovariance<T, U, Op, compute_marginal_moments>>(args, {}, std::make_shared<DataTypeFloat64>())
-    {}
+private:
+    CovarKind kind;
 
-    String getName() const override { return Op::name; }
+    static Float64 getCovarSamp(Float64 co_moment, UInt64 count)
+    {
+        if (count < 2)
+            return std::numeric_limits<Float64>::infinity();
+        else
+            return co_moment / (count - 1);
+    }
+
+    static Float64 getCovarPop(Float64 co_moment, UInt64 count)
+    {
+        if (count == 0)
+            return std::numeric_limits<Float64>::infinity();
+        else if (count == 1)
+            return 0.0;
+        else
+            return co_moment / count;
+    }
+
+    static Float64 getCorr(Float64 co_moment, Float64 left_m2, Float64 right_m2, UInt64 count)
+    {
+        if (count < 2)
+            return std::numeric_limits<Float64>::infinity();
+        else
+            return co_moment / sqrt(left_m2 * right_m2);
+    }
+
+    Float64 getResult(ConstAggregateDataPtr __restrict place) const
+    {
+        const auto & data = this->data(place);
+        switch (kind)
+        {
+            case CovarKind::covarSampStable: return getCovarSamp(data.co_moment, data.count);
+            case CovarKind::covarPopStable: return getCovarPop(data.co_moment, data.count);
+
+            case CovarKind::corrStable:
+                if constexpr (compute_marginal_moments)
+                    return getCorr(data.co_moment, data.left_m2, data.right_m2, data.count);
+                else
+                    return 0;
+        }
+    }
+
+public:
+    explicit AggregateFunctionCovariance(CovarKind kind_, const DataTypes & args) : IAggregateFunctionDataHelper<
+        CovarianceData<compute_marginal_moments>,
+        AggregateFunctionCovariance<compute_marginal_moments>>(args, {}, std::make_shared<DataTypeFloat64>()),
+        kind(kind_)
+    {
+    }
+
+    String getName() const override
+    {
+        switch (kind)
+        {
+            case CovarKind::covarSampStable: return "covarSampStable";
+            case CovarKind::covarPopStable: return "covarPopStable";
+            case CovarKind::corrStable: return "corrStable";
+        }
+    }
 
     bool allocatesMemoryInArena() const override { return false; }
 
@@ -399,78 +443,10 @@ public:
 
     void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
-        this->data(place).publish(to);
+        assert_cast<ColumnFloat64 &>(to).getData().push_back(getResult(place));
     }
 };
 
-/** Implementing the covarSamp function.
-  */
-struct AggregateFunctionCovarSampImpl
-{
-    static constexpr auto name = "covarSampStable";
-
-    static inline Float64 apply(Float64 co_moment, UInt64 count)
-    {
-        if (count < 2)
-            return std::numeric_limits<Float64>::infinity();
-        else
-            return co_moment / (count - 1);
-    }
-};
-
-/** Implementing the covarPop function.
-  */
-struct AggregateFunctionCovarPopImpl
-{
-    static constexpr auto name = "covarPopStable";
-
-    static inline Float64 apply(Float64 co_moment, UInt64 count)
-    {
-        if (count == 0)
-            return std::numeric_limits<Float64>::infinity();
-        else if (count == 1)
-            return 0.0;
-        else
-            return co_moment / count;
-    }
-};
-
-/** `corr` function implementation.
-  */
-struct AggregateFunctionCorrImpl
-{
-    static constexpr auto name = "corrStable";
-
-    static inline Float64 apply(Float64 co_moment, Float64 left_m2, Float64 right_m2, UInt64 count)
-    {
-        if (count < 2)
-            return std::numeric_limits<Float64>::infinity();
-        else
-            return co_moment / sqrt(left_m2 * right_m2);
-    }
-};
-
-template <typename T>
-using AggregateFunctionVarSampStable = AggregateFunctionVariance<T, AggregateFunctionVarSampImpl>;
-
-template <typename T>
-using AggregateFunctionStddevSampStable = AggregateFunctionVariance<T, AggregateFunctionStdDevSampImpl>;
-
-template <typename T>
-using AggregateFunctionVarPopStable = AggregateFunctionVariance<T, AggregateFunctionVarPopImpl>;
-
-template <typename T>
-using AggregateFunctionStddevPopStable = AggregateFunctionVariance<T, AggregateFunctionStdDevPopImpl>;
-
-template <typename T, typename U>
-using AggregateFunctionCovarSampStable = AggregateFunctionCovariance<T, U, AggregateFunctionCovarSampImpl>;
-
-template <typename T, typename U>
-using AggregateFunctionCovarPopStable = AggregateFunctionCovariance<T, U, AggregateFunctionCovarPopImpl>;
-
-template <typename T, typename U>
-using AggregateFunctionCorrStable = AggregateFunctionCovariance<T, U, AggregateFunctionCorrImpl, true>;
-
 
 template <template <typename> typename FunctionTemplate>
 AggregateFunctionPtr createAggregateFunctionStatisticsUnary(
@@ -507,13 +483,54 @@ AggregateFunctionPtr createAggregateFunctionStatisticsBinary(
 
 void registerAggregateFunctionsStatisticsStable(AggregateFunctionFactory & factory)
 {
-    factory.registerFunction("varSampStable", createAggregateFunctionStatisticsUnary<AggregateFunctionVarSampStable>);
-    factory.registerFunction("varPopStable", createAggregateFunctionStatisticsUnary<AggregateFunctionVarPopStable>);
-    factory.registerFunction("stddevSampStable", createAggregateFunctionStatisticsUnary<AggregateFunctionStddevSampStable>);
-    factory.registerFunction("stddevPopStable", createAggregateFunctionStatisticsUnary<AggregateFunctionStddevPopStable>);
-    factory.registerFunction("covarSampStable", createAggregateFunctionStatisticsBinary<AggregateFunctionCovarSampStable>);
-    factory.registerFunction("covarPopStable", createAggregateFunctionStatisticsBinary<AggregateFunctionCovarPopStable>);
-    factory.registerFunction("corrStable", createAggregateFunctionStatisticsBinary<AggregateFunctionCorrStable>);
+    factory.registerFunction("varSampStable", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+    {
+        assertNoParameters(name, parameters);
+        assertUnary(name, argument_types);
+        return std::make_shared<AggregateFunctionVariance>(VarKind::varSampStable, argument_types[0]);
+    });
+
+    factory.registerFunction("varPopStable", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+    {
+        assertNoParameters(name, parameters);
+        assertUnary(name, argument_types);
+        return std::make_shared<AggregateFunctionVariance>(VarKind::varPopStable, argument_types[0]);
+    });
+
+    factory.registerFunction("stddevSampStable", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+    {
+        assertNoParameters(name, parameters);
+        assertUnary(name, argument_types);
+        return std::make_shared<AggregateFunctionVariance>(VarKind::stddevSampStable, argument_types[0]);
+    });
+
+    factory.registerFunction("stddevPopStable", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+    {
+        assertNoParameters(name, parameters);
+        assertUnary(name, argument_types);
+        return std::make_shared<AggregateFunctionVariance>(VarKind::stddevPopStable, argument_types[0]);
+    });
+
+    factory.registerFunction("covarSampStable", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+    {
+        assertNoParameters(name, parameters);
+        assertBinary(name, argument_types);
+        return std::make_shared<AggregateFunctionCovariance<false>>(CovarKind::covarSampStable, argument_types);
+    });
+
+    factory.registerFunction("covarPopStable", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+    {
+        assertNoParameters(name, parameters);
+        assertBinary(name, argument_types);
+        return std::make_shared<AggregateFunctionCovariance<false>>(CovarKind::covarPopStable, argument_types);
+    });
+
+    factory.registerFunction("corrStable", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+    {
+        assertNoParameters(name, parameters);
+        assertBinary(name, argument_types);
+        return std::make_shared<AggregateFunctionCovariance<true>>(CovarKind::corrStable, argument_types);
+    });
 }
 
 }

From be106b5c778cfe1390a909345825c85d6126d721 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 23:20:01 +0100
Subject: [PATCH 0240/1097] Someone thinks too high about their code

---
 src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp  | 2 +-
 src/AggregateFunctions/AggregateFunctionDeltaSum.cpp     | 2 +-
 .../AggregateFunctionDeltaSumTimestamp.cpp               | 9 ++++-----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
index 675c4328b83..3daed382ce9 100644
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
@@ -46,7 +46,7 @@ public:
 
     void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        const auto& weights = static_cast<const ColumnVectorOrDecimal<Weight> &>(*columns[1]);
+        const auto & weights = static_cast<const ColumnVectorOrDecimal<Weight> &>(*columns[1]);
 
         this->data(place).numerator += static_cast<Numerator>(
             static_cast<const ColumnVectorOrDecimal<Value> &>(*columns[0]).getData()[row_num]) *
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
index 3e446ea6a0e..a846490a89e 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
@@ -70,7 +70,7 @@ public:
         }
     }
 
-    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         auto place_data = &this->data(place);
         auto rhs_data = &this->data(rhs);
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
index a9c999ea151..ec8b6971c95 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
@@ -100,7 +100,7 @@ public:
         return false;
     }
 
-    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         auto place_data = &this->data(place);
         auto rhs_data = &this->data(rhs);
@@ -115,7 +115,9 @@ public:
             place_data->last_ts = rhs_data->last_ts;
         }
         else if (place_data->seen && !rhs_data->seen)
+        {
             return;
+        }
         else if (before(place_data, rhs_data))
         {
             // This state came before the rhs state
@@ -183,10 +185,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSumTimestamp(
     const Settings *)
 {
     assertNoParameters(name, params);
-
-    if (arguments.size() != 2)
-        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-            "Incorrect number of arguments for aggregate function {}", name);
+    assertBinary(name, arguments);
 
     if (!isInteger(arguments[0]) && !isFloat(arguments[0]) && !isDate(arguments[0]) && !isDateTime(arguments[0]))
         throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}, "

From a22bef35405bf2f827ce9b12dc067b817d228953 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 23:25:39 +0100
Subject: [PATCH 0241/1097] The code was mistakenly written in C instead of C++

---
 .../AggregateFunctionDeltaSumTimestamp.cpp    | 105 +++++++++---------
 1 file changed, 52 insertions(+), 53 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
index ec8b6971c95..2b29e8c3716 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
@@ -67,54 +67,51 @@ public:
         auto value = assert_cast<const ColumnVector<ValueType> &>(*columns[0]).getData()[row_num];
         auto ts = assert_cast<const ColumnVector<TimestampType> &>(*columns[1]).getData()[row_num];
 
-        if ((this->data(place).last < value) && this->data(place).seen)
+        auto & data = this->data(place);
+
+        if ((data.last < value) && data.seen)
         {
-            this->data(place).sum += (value - this->data(place).last);
+            data.sum += (value - data.last);
         }
 
-        this->data(place).last = value;
-        this->data(place).last_ts = ts;
+        data.last = value;
+        data.last_ts = ts;
 
-        if (!this->data(place).seen)
+        if (!data.seen)
         {
-            this->data(place).first = value;
-            this->data(place).seen = true;
-            this->data(place).first_ts = ts;
+            data.first = value;
+            data.seen = true;
+            data.first_ts = ts;
         }
     }
 
     // before returns true if lhs is before rhs or false if it is not or can't be determined
-    bool ALWAYS_INLINE before (
-        const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * lhs,
-        const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> * rhs
-    ) const
+    bool ALWAYS_INLINE before(
+        const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> & lhs,
+        const AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType> & rhs) const
     {
-        if (lhs->last_ts < rhs->first_ts)
-        {
+        if (lhs.last_ts < rhs.first_ts)
             return true;
-        }
-        if (lhs->last_ts == rhs->first_ts && (lhs->last_ts < rhs->last_ts || lhs->first_ts < rhs->first_ts))
-        {
+        if (lhs.last_ts == rhs.first_ts && (lhs.last_ts < rhs.last_ts || lhs.first_ts < rhs.first_ts))
             return true;
-        }
         return false;
     }
 
     void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
-        auto place_data = &this->data(place);
-        auto rhs_data = &this->data(rhs);
+        auto & place_data = this->data(place);
+        auto & rhs_data = this->data(rhs);
 
-        if (!place_data->seen && rhs_data->seen)
+        if (!place_data.seen && rhs_data.seen)
         {
-            place_data->sum = rhs_data->sum;
-            place_data->seen = true;
-            place_data->first = rhs_data->first;
-            place_data->first_ts = rhs_data->first_ts;
-            place_data->last = rhs_data->last;
-            place_data->last_ts = rhs_data->last_ts;
+            place_data.sum = rhs_data.sum;
+            place_data.seen = true;
+            place_data.first = rhs_data.first;
+            place_data.first_ts = rhs_data.first_ts;
+            place_data.last = rhs_data.last;
+            place_data.last_ts = rhs_data.last_ts;
         }
-        else if (place_data->seen && !rhs_data->seen)
+        else if (place_data.seen && !rhs_data.seen)
         {
             return;
         }
@@ -122,21 +119,21 @@ public:
         {
             // This state came before the rhs state
 
-            if (rhs_data->first > place_data->last)
-                place_data->sum += (rhs_data->first - place_data->last);
-            place_data->sum += rhs_data->sum;
-            place_data->last = rhs_data->last;
-            place_data->last_ts = rhs_data->last_ts;
+            if (rhs_data.first > place_data.last)
+                place_data.sum += (rhs_data.first - place_data.last);
+            place_data.sum += rhs_data.sum;
+            place_data.last = rhs_data.last;
+            place_data.last_ts = rhs_data.last_ts;
         }
         else if (before(rhs_data, place_data))
         {
             // This state came after the rhs state
 
-            if (place_data->first > rhs_data->last)
-                place_data->sum += (place_data->first - rhs_data->last);
-            place_data->sum += rhs_data->sum;
-            place_data->first = rhs_data->first;
-            place_data->first_ts = rhs_data->first_ts;
+            if (place_data.first > rhs_data.last)
+                place_data.sum += (place_data.first - rhs_data.last);
+            place_data.sum += rhs_data.sum;
+            place_data.first = rhs_data.first;
+            place_data.first_ts = rhs_data.first_ts;
         }
         else
         {
@@ -144,32 +141,34 @@ public:
             // same timestamps. We have to pick either the smaller or larger value so that the
             // result is deterministic.
 
-            if (place_data->first < rhs_data->first)
+            if (place_data.first < rhs_data.first)
             {
-                place_data->first = rhs_data->first;
-                place_data->last = rhs_data->last;
+                place_data.first = rhs_data.first;
+                place_data.last = rhs_data.last;
             }
         }
     }
 
     void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
     {
-        writeBinaryLittleEndian(this->data(place).sum, buf);
-        writeBinaryLittleEndian(this->data(place).first, buf);
-        writeBinaryLittleEndian(this->data(place).first_ts, buf);
-        writeBinaryLittleEndian(this->data(place).last, buf);
-        writeBinaryLittleEndian(this->data(place).last_ts, buf);
-        writeBinaryLittleEndian(this->data(place).seen, buf);
+        const auto & data = this->data(place);
+        writeBinaryLittleEndian(data.sum, buf);
+        writeBinaryLittleEndian(data.first, buf);
+        writeBinaryLittleEndian(data.first_ts, buf);
+        writeBinaryLittleEndian(data.last, buf);
+        writeBinaryLittleEndian(data.last_ts, buf);
+        writeBinaryLittleEndian(data.seen, buf);
     }
 
     void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
     {
-        readBinaryLittleEndian(this->data(place).sum, buf);
-        readBinaryLittleEndian(this->data(place).first, buf);
-        readBinaryLittleEndian(this->data(place).first_ts, buf);
-        readBinaryLittleEndian(this->data(place).last, buf);
-        readBinaryLittleEndian(this->data(place).last_ts, buf);
-        readBinaryLittleEndian(this->data(place).seen, buf);
+        auto & data = this->data(place);
+        readBinaryLittleEndian(data.sum, buf);
+        readBinaryLittleEndian(data.first, buf);
+        readBinaryLittleEndian(data.first_ts, buf);
+        readBinaryLittleEndian(data.last, buf);
+        readBinaryLittleEndian(data.last_ts, buf);
+        readBinaryLittleEndian(data.seen, buf);
     }
 
     void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override

From 52bd128f2d3b22485bbce1c0e08cc409f6080ef3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 8 Nov 2023 03:33:41 +0100
Subject: [PATCH 0242/1097] Fix style

---
 src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
index 2b29e8c3716..5819c533fd9 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp
@@ -16,7 +16,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 

From 30bec95d111672c6d740a178e6208b5d186f0042 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 8 Nov 2023 03:52:23 +0100
Subject: [PATCH 0243/1097] Remove support of `Decimal` inside `avgWeighted`
 #56435

---
 .../AggregateFunctionAvgWeighted.cpp          | 40 +++++--------------
 .../01035_avg_weighted_long.reference         | 18 ---------
 .../0_stateless/01035_avg_weighted_long.sh    | 12 ------
 .../01668_avg_weighted_ubsan.reference        | 13 ------
 .../0_stateless/01668_avg_weighted_ubsan.sql  |  4 --
 5 files changed, 10 insertions(+), 77 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
index 3daed382ce9..d33e843fac9 100644
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
@@ -20,11 +20,9 @@ namespace
 {
 
 template <typename T>
-using AvgWeightedFieldType = std::conditional_t<is_decimal<T>,
-    std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
-    std::conditional_t<DecimalOrExtendedInt<T>,
+using AvgWeightedFieldType = std::conditional_t<DecimalOrExtendedInt<T>,
         Float64, // no way to do UInt128 * UInt128, better cast to Float64
-        NearestFieldType<T>>>;
+        NearestFieldType<T>>;
 
 template <typename T, typename U>
 using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
@@ -46,11 +44,11 @@ public:
 
     void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        const auto & weights = static_cast<const ColumnVectorOrDecimal<Weight> &>(*columns[1]);
+        const auto & weights = static_cast<const ColumnVector<Weight> &>(*columns[1]);
 
         this->data(place).numerator += static_cast<Numerator>(
-            static_cast<const ColumnVectorOrDecimal<Value> &>(*columns[0]).getData()[row_num]) *
-            static_cast<Numerator>(weights.getData()[row_num]);
+            static_cast<const ColumnVector<Value> &>(*columns[0]).getData()[row_num])
+            * static_cast<Numerator>(weights.getData()[row_num]);
 
         this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);
     }
@@ -106,7 +104,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
 
     constexpr auto allow = [](WhichDataType t)
     {
-        return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
+        return t.isInt() || t.isUInt() || t.isFloat();
     };
 
     return allow(l_dt) && allow(r_dt);
@@ -117,7 +115,6 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
     { \
         LINE(Int8); LINE(Int16); LINE(Int32); LINE(Int64); LINE(Int128); LINE(Int256); \
         LINE(UInt8); LINE(UInt16); LINE(UInt32); LINE(UInt64); LINE(UInt128); LINE(UInt256); \
-        LINE(Decimal32); LINE(Decimal64); LINE(Decimal128); LINE(Decimal256); \
         LINE(Float32); LINE(Float64); \
         default: return nullptr; \
     }
@@ -159,31 +156,14 @@ createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & a
                         "Types {} and {} are non-conforming as arguments for aggregate function {}",
                         data_type->getName(), data_type_weight->getName(), name);
 
-    AggregateFunctionPtr ptr;
-
-    const bool left_decimal = isDecimal(data_type);
-    const bool right_decimal = isDecimal(data_type_weight);
-
-    /// We multiply value by weight, so actual scale of numerator is <scale of value> + <scale of weight>
-    if (left_decimal && right_decimal)
-        ptr.reset(create(*data_type, *data_type_weight,
-            argument_types,
-            getDecimalScale(*data_type) + getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight)));
-    else if (left_decimal)
-        ptr.reset(create(*data_type, *data_type_weight, argument_types,
-            getDecimalScale(*data_type)));
-    else if (right_decimal)
-        ptr.reset(create(*data_type, *data_type_weight, argument_types,
-            getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight)));
-    else
-        ptr.reset(create(*data_type, *data_type_weight, argument_types));
-
-    return ptr;
+    return AggregateFunctionPtr(create(*data_type, *data_type_weight, argument_types));
 }
+
 }
 
 void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory & factory)
 {
-    factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted, AggregateFunctionFactory::CaseSensitive);
+    factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted);
 }
+
 }
diff --git a/tests/queries/0_stateless/01035_avg_weighted_long.reference b/tests/queries/0_stateless/01035_avg_weighted_long.reference
index f3efdc522b7..3d95ee5e453 100644
--- a/tests/queries/0_stateless/01035_avg_weighted_long.reference
+++ b/tests/queries/0_stateless/01035_avg_weighted_long.reference
@@ -1,7 +1,5 @@
 2.3333333333333335
 nan
-1
-1
 8
 nan
 8
@@ -211,20 +209,4 @@ nan
 1
 1
 1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
 1
diff --git a/tests/queries/0_stateless/01035_avg_weighted_long.sh b/tests/queries/0_stateless/01035_avg_weighted_long.sh
index 8838b07a3d7..0e76d6e328d 100755
--- a/tests/queries/0_stateless/01035_avg_weighted_long.sh
+++ b/tests/queries/0_stateless/01035_avg_weighted_long.sh
@@ -7,13 +7,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, weight) FROM (SELECT t.1 AS x, t.2 AS weight FROM (SELECT arrayJoin([(1, 5), (2, 4), (3, 3), (4, 2), (5, 1)]) AS t));"
 ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, weight) FROM (SELECT t.1 AS x, t.2 AS weight FROM (SELECT arrayJoin([(1, 0), (2, 0), (3, 0), (4, 0), (5, 0)]) AS t));"
-${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, y) FROM (select toDecimal256(1, 0) x, toDecimal256(1, 1) y);"
-${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, y) FROM (select toDecimal32(1, 0) x, toDecimal256(1, 1) y);"
 
 types=("Int8" "Int16" "Int32" "Int64" "UInt8" "UInt16" "UInt32" "UInt64" "Float32" "Float64")
 exttypes=("Int128" "Int256" "UInt256")
-# Decimal types
-dtypes=("32" "64" "128" "256")
 
 (
     for left in "${types[@]}"
@@ -32,14 +28,6 @@ dtypes=("32" "64" "128" "256")
             echo "SELECT avgWeighted(to${left}(1), to${right}(2));"
         done
     done
-
-    for left in "${dtypes[@]}"
-    do
-        for right in "${dtypes[@]}"
-        do
-            echo "SELECT avgWeighted(toDecimal${left}(2, 4), toDecimal${right}(1, 4));"
-        done
-    done
 ) | clickhouse-client -nm
 
 echo "$(${CLICKHOUSE_CLIENT} --server_logs_file=/dev/null --query="SELECT avgWeighted(['string'], toFloat64(0))" 2>&1)" \
diff --git a/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference b/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference
index a8921b27cff..ec064f61ba7 100644
--- a/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference
+++ b/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference
@@ -1,14 +1 @@
 -0
-nan
-nan
-1
-2
-3
-4
-5
-6
-7
-8
-9
-nan
-nan
diff --git a/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql b/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql
index 1c31c23eaee..24e7dc0cb90 100644
--- a/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql
+++ b/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql
@@ -1,5 +1 @@
 SELECT round(avgWeighted(x, y)) FROM (SELECT 1023 AS x, 1000000000 AS y UNION ALL SELECT 10 AS x, -9223372036854775808 AS y);
-select avgWeighted(number, toDecimal128(number, 9)) from numbers(0);
-SELECT avgWeighted(a, toDecimal64(c, 9)) OVER (PARTITION BY c) FROM (SELECT number AS a, number AS c FROM numbers(10));
-select avg(toDecimal128(number, 9)) from numbers(0);
-select avgWeighted(number, toDecimal128(0, 9)) from numbers(10);

From 180eca7337220636a9c1c18239b906928a8c1339 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 8 Nov 2023 09:36:43 +0000
Subject: [PATCH 0244/1097] Restore check-style

---
 utils/check-style/check-style | 790 +++++++++++++++++-----------------
 1 file changed, 395 insertions(+), 395 deletions(-)

diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index b8412e29313..f87d2e292b5 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -16,409 +16,409 @@ LC_ALL="en_US.UTF-8"
 ROOT_PATH=$(git rev-parse --show-toplevel)
 EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'
 
-# # From [1]:
-# #     But since array_to_string_internal() in array.c still loops over array
-# #     elements and concatenates them into a string, it's probably not more
-# #     efficient than the looping solutions proposed, but it's more readable.
-# #
-# #  [1]: https://stackoverflow.com/a/15394738/328260
-# function in_array()
-# {
-#     local IFS="|"
-#     local value=$1 && shift
+# From [1]:
+#     But since array_to_string_internal() in array.c still loops over array
+#     elements and concatenates them into a string, it's probably not more
+#     efficient than the looping solutions proposed, but it's more readable.
 #
-#     [[ "${IFS}${*}${IFS}" =~ "${IFS}${value}${IFS}" ]]
-# }
+#  [1]: https://stackoverflow.com/a/15394738/328260
+function in_array()
+{
+    local IFS="|"
+    local value=$1 && shift
+
+    [[ "${IFS}${*}${IFS}" =~ "${IFS}${value}${IFS}" ]]
+}
+
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' |
+# a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces
+    grep -v -P '(//|:\s+\*|\$\(\()| \)"'
+# single-line comment | continuation of a multiline comment | a typical piece of embedded shell code | something like ending of raw string literal
+
+# Tabs
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep $@ -F $'\t'
+
+# // namespace comments are unneeded
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep $@ -P '}\s*//+\s*namespace\s*'
+
+# Broken symlinks
+find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found"
+
+# Double whitespaces
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+    grep -vP $EXCLUDE_DIRS |
+    while read i; do $ROOT_PATH/utils/check-style/double-whitespaces.pl < $i || echo -e "^ File $i contains double whitespaces\n"; done
+
+# Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics
+declare -A EXTERN_TYPES
+EXTERN_TYPES[ErrorCodes]=int
+EXTERN_TYPES[ProfileEvents]=Event
+EXTERN_TYPES[CurrentMetrics]=Metric
+
+EXTERN_TYPES_EXCLUDES=(
+    ProfileEvents::global_counters
+    ProfileEvents::Event
+    ProfileEvents::Count
+    ProfileEvents::Counters
+    ProfileEvents::end
+    ProfileEvents::increment
+    ProfileEvents::incrementForLogMessage
+    ProfileEvents::getName
+    ProfileEvents::Type
+    ProfileEvents::TypeEnum
+    ProfileEvents::dumpToMapColumn
+    ProfileEvents::getProfileEvents
+    ProfileEvents::ThreadIdToCountersSnapshot
+    ProfileEvents::LOCAL_NAME
+    ProfileEvents::CountersIncrement
+
+    CurrentMetrics::add
+    CurrentMetrics::sub
+    CurrentMetrics::get
+    CurrentMetrics::set
+    CurrentMetrics::end
+    CurrentMetrics::Increment
+    CurrentMetrics::Metric
+    CurrentMetrics::values
+    CurrentMetrics::Value
+
+    ErrorCodes::ErrorCode
+    ErrorCodes::getName
+    ErrorCodes::increment
+    ErrorCodes::end
+    ErrorCodes::values
+    ErrorCodes::values[i]
+    ErrorCodes::getErrorCodeByName
+)
+for extern_type in ${!EXTERN_TYPES[@]}; do
+    type_of_extern=${EXTERN_TYPES[$extern_type]}
+    allowed_chars='[_A-Za-z]+'
+
+    # Unused
+    # NOTE: to fix automatically, replace echo with:
+    # sed -i "/extern const $type_of_extern $val/d" $file
+    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
+        # NOTE: the check is pretty dumb and distinguish only by the type_of_extern,
+        # and this matches with zkutil::CreateMode
+        grep -v 'src/Common/ZooKeeper/Types.h'
+    } | {
+        grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
+    } | while read file; do
+        grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do
+            if ! grep -q "$extern_type::$val" $file; then
+                # Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp
+                if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then
+                    echo "$extern_type::$val is defined but not used in file $file"
+                fi
+            fi
+        done
+    done
+
+    # Undefined
+    # NOTE: to fix automatically, replace echo with:
+    # ( grep -q -F 'namespace $extern_type' $file && \
+    #   sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \
+    #     awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n    extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
+    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
+        grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
+    } | while read file; do
+        grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do
+            if ! grep -q "extern const $type_of_extern $val" $file; then
+                if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then
+                    echo "$extern_type::$val is used in file $file but not defined"
+                fi
+            fi
+        done
+    done
+
+    # Duplicates
+    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
+        grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
+    } | while read file; do
+        grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
+    done
+done
+
+# Three or more consecutive empty lines
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done
+
+# Broken XML files (requires libxml2-utils)
+find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs xmllint --noout --nonet
+
+# FIXME: for now only clickhouse-test
+pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py
+
+find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
+    grep -vP $EXCLUDE_DIRS |
+    xargs yamllint --config-file=$ROOT_PATH/.yamllint
+
+# Tests should not be named with "fail" in their names. It makes looking at the results less convenient.
+find $ROOT_PATH/tests/queries -iname '*fail*' |
+    grep . && echo 'Tests should not be named with "fail" in their names. It makes looking at the results less convenient when you search for "fail" substring in browser.'
+
+# Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition
+# NOTE: it is not that accurate, but at least something.
+tests_with_query_log=( $(
+    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+        xargs grep --with-filename -e system.query_log -e system.query_thread_log | cut -d: -f1 | sort -u
+) )
+for test_case in "${tests_with_query_log[@]}"; do
+    grep -qE current_database.*currentDatabase "$test_case" || {
+        grep -qE 'current_database.*\$CLICKHOUSE_DATABASE' "$test_case"
+    } || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case"
+done
+
+# There shouldn't be large jumps between test numbers (since they should be consecutive)
+max_diff=$(
+    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+          grep -oP '\d+\D+\K\d+' | sort -n -u | awk 's{print ($0-s) " diff " s " and " $0 }{s=$0}' | sort -n | tail -n 1
+)
+max_diff_value=( $(echo $max_diff) )
+if [[ $max_diff_value -ge 100 ]];
+then
+    echo "Too big of a difference between test numbers: $max_diff"
+fi
+
+# Queries to:
+tables_with_database_column=(
+    system.tables
+    system.parts
+    system.detached_parts
+    system.parts_columns
+    system.columns
+    system.projection_parts
+    system.mutations
+)
+# should have database = currentDatabase() condition
 #
-# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' |
-# # a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces
-#     grep -v -P '(//|:\s+\*|\$\(\()| \)"'
-# # single-line comment | continuation of a multiline comment | a typical piece of embedded shell code | something like ending of raw string literal
+# NOTE: it is not that accuate, but at least something.
+tests_with_database_column=( $(
+    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+        xargs grep --with-filename $(printf -- "-e %s " "${tables_with_database_column[@]}") |
+        grep -v -e ':--' -e ':#' |
+        cut -d: -f1 | sort -u
+) )
+for test_case in "${tests_with_database_column[@]}"; do
+    grep -qE database.*currentDatabase "$test_case" || {
+        grep -qE 'database.*\$CLICKHOUSE_DATABASE' "$test_case"
+    } || {
+        # explicit database
+        grep -qE "database[ ]*=[ ]*'" "$test_case"
+    } || {
+        echo "Queries to ${tables_with_database_column[*]} does not have database = currentDatabase()/\$CLICKHOUSE_DATABASE condition in $test_case"
+    }
+done
+
+# Queries with ReplicatedMergeTree
+# NOTE: it is not that accuate, but at least something.
+tests_with_replicated_merge_tree=( $(
+    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+        xargs grep --with-filename -e "Replicated.*MergeTree[ ]*(.*" | cut -d: -f1 | sort -u
+) )
+for test_case in "${tests_with_replicated_merge_tree[@]}"; do
+    case "$test_case" in
+        *.gen.*)
+            ;;
+        *.sh)
+            test_case_zk_prefix="\(\$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX\|{database}\)"
+            grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
+            ;;
+        *.sql|*.sql.j2)
+            test_case_zk_prefix="\({database}\|currentDatabase()\|{uuid}\|{default_path_test}\)"
+            grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
+            ;;
+        *.py)
+            # Right now there is not such tests anyway
+            echo "No ReplicatedMergeTree style check for *.py ($test_case)"
+            ;;
+    esac
+done
+
+# All the submodules should be from https://github.com/
+find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | grep -v -F 'https://github.com/' && echo 'All the submodules should be from https://github.com/'; done
+
+# There shouldn't be any code snippets under GPL or LGPL
+find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL"
+
+# There shouldn't be any docker containers outside docker directory
+find $ROOT_PATH -not -path $ROOT_PATH'/tests/ci*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:"
+
+# There shouldn't be any docker compose files outside docker directory
+find $ROOT_PATH -name '*compose*.yml' -type f -not -path $ROOT_PATH'/docker' -not -path $ROOT_PATH'/tests/integration*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' 2>/dev/null | grep -vP $EXCLUDE_DIRS | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to the 'docker' or 'tests' directory:"
+
+# Check that every header file has #pragma once in first line
+find $ROOT_PATH/{src,programs,utils} -name '*.h' |
+    grep -vP $EXCLUDE_DIRS |
+    while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
+
+# Check for executable bit on non-executable files
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
+
+# Check for BOM
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
+
+# Too many exclamation marks
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
+
+# Trailing whitespaces
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces."
+
+# Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
+
+# Forbid std::cerr/std::cout in src (fine in programs/utils)
+std_cerr_cout_excludes=(
+    /examples/
+    /tests/
+    _fuzzer
+    # DUMP()
+    base/base/iostream_debug_helpers.h
+    # OK
+    src/Common/ProgressIndication.cpp
+    # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests
+    src/Common/HashTable/HashTable.h
+    # SensitiveDataMasker::printStats()
+    src/Common/SensitiveDataMasker.cpp
+    # StreamStatistics::print()
+    src/Compression/LZ4_decompress_faster.cpp
+    # ContextSharedPart with subsequent std::terminate()
+    src/Interpreters/Context.cpp
+    # IProcessor::dump()
+    src/Processors/IProcessor.cpp
+    src/Client/ClientBase.cpp
+    src/Client/LineReader.cpp
+    src/Client/QueryFuzzer.cpp
+    src/Client/Suggest.cpp
+    src/Bridge/IBridge.cpp
+    src/Daemon/BaseDaemon.cpp
+    src/Loggers/Loggers.cpp
+)
+sources_with_std_cerr_cout=( $(
+    find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \
+        grep -vP $EXCLUDE_DIRS | \
+        grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \
+        xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u
+) )
+# Exclude comments
+for src in "${sources_with_std_cerr_cout[@]}"; do
+    # suppress stderr, since it may contain warning for #pargma once in headers
+    if gcc -fpreprocessed -dD -E "$src" 2>/dev/null | grep -F -q -e std::cerr -e std::cout; then
+        echo "$src: uses std::cerr/std::cout"
+    fi
+done
+
+# Queries with event_date should have yesterday() not today()
 #
-# # Tabs
-# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs grep $@ -F $'\t'
+# NOTE: it is not that accuate, but at least something.
+tests_with_event_time_date=( $(
+    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+        grep -vP $EXCLUDE_DIRS |
+        xargs grep --with-filename -e event_time -e event_date | cut -d: -f1 | sort -u
+) )
+for test_case in "${tests_with_event_time_date[@]}"; do
+    cat "$test_case" | tr '\n' ' ' | grep -q -i -e 'WHERE.*event_date[ ]*=[ ]*today()' -e 'WHERE.*event_date[ ]*=[ ]*today()' && {
+        echo "event_time/event_date should be filtered using >=yesterday() in $test_case (to avoid flakiness)"
+    }
+done
+
+expect_tests=( $(find $ROOT_PATH/tests/queries -name '*.expect') )
+for test_case in "${expect_tests[@]}"; do
+    pattern="^exp_internal -f \$env(CLICKHOUSE_TMP)/\$basename.debuglog 0$"
+    grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+
+    if grep -q "^spawn.*CLICKHOUSE_CLIENT_BINARY$" "$test_case"; then
+        pattern="^spawn.*CLICKHOUSE_CLIENT_BINARY.*--history_file$"
+        grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+    fi
+
+    # Otherwise expect_after/expect_before will not bail without stdin attached
+    # (and actually this is a hack anyway, correct way is to use $any_spawn_id)
+    pattern="-i \$any_spawn_id timeout"
+    grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+    pattern="-i \$any_spawn_id eof"
+    grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+done
+
+# Conflict markers
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
+    xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
+
+# Forbid subprocess.check_call(...) in integration tests because it does not provide enough information on errors
+find $ROOT_PATH'/tests/integration' -name '*.py' |
+    xargs grep -F 'subprocess.check_call' | grep -v "STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL" && echo "Use helpers.cluster.run_and_check or subprocess.run instead of subprocess.check_call to print detailed info on error"
+
+# Forbid non-unique error codes
+if [[ "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | wc -l)" != "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | sort | uniq | wc -l)" ]]
+then
+    echo "ErrorCodes.cpp contains non-unique error codes"
+fi
+
+# Check that there is no system-wide libraries/headers in use.
 #
-# # // namespace comments are unneeded
-# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs grep $@ -P '}\s*//+\s*namespace\s*'
+# NOTE: it is better to override find_path/find_library in cmake, but right now
+# it is not possible, see [1] for the reference.
 #
-# # Broken symlinks
-# find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found"
-#
-# # Double whitespaces
-# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
-#     grep -vP $EXCLUDE_DIRS |
-#     while read i; do $ROOT_PATH/utils/check-style/double-whitespaces.pl < $i || echo -e "^ File $i contains double whitespaces\n"; done
-#
-# # Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics
-# declare -A EXTERN_TYPES
-# EXTERN_TYPES[ErrorCodes]=int
-# EXTERN_TYPES[ProfileEvents]=Event
-# EXTERN_TYPES[CurrentMetrics]=Metric
-#
-# EXTERN_TYPES_EXCLUDES=(
-#     ProfileEvents::global_counters
-#     ProfileEvents::Event
-#     ProfileEvents::Count
-#     ProfileEvents::Counters
-#     ProfileEvents::end
-#     ProfileEvents::increment
-#     ProfileEvents::incrementForLogMessage
-#     ProfileEvents::getName
-#     ProfileEvents::Type
-#     ProfileEvents::TypeEnum
-#     ProfileEvents::dumpToMapColumn
-#     ProfileEvents::getProfileEvents
-#     ProfileEvents::ThreadIdToCountersSnapshot
-#     ProfileEvents::LOCAL_NAME
-#     ProfileEvents::CountersIncrement
-#
-#     CurrentMetrics::add
-#     CurrentMetrics::sub
-#     CurrentMetrics::get
-#     CurrentMetrics::set
-#     CurrentMetrics::end
-#     CurrentMetrics::Increment
-#     CurrentMetrics::Metric
-#     CurrentMetrics::values
-#     CurrentMetrics::Value
-#
-#     ErrorCodes::ErrorCode
-#     ErrorCodes::getName
-#     ErrorCodes::increment
-#     ErrorCodes::end
-#     ErrorCodes::values
-#     ErrorCodes::values[i]
-#     ErrorCodes::getErrorCodeByName
-# )
-# for extern_type in ${!EXTERN_TYPES[@]}; do
-#     type_of_extern=${EXTERN_TYPES[$extern_type]}
-#     allowed_chars='[_A-Za-z]+'
-#
-#     # Unused
-#     # NOTE: to fix automatically, replace echo with:
-#     # sed -i "/extern const $type_of_extern $val/d" $file
-#     find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
-#         # NOTE: the check is pretty dumb and distinguish only by the type_of_extern,
-#         # and this matches with zkutil::CreateMode
-#         grep -v 'src/Common/ZooKeeper/Types.h'
-#     } | {
-#         grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
-#     } | while read file; do
-#         grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do
-#             if ! grep -q "$extern_type::$val" $file; then
-#                 # Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp
-#                 if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then
-#                     echo "$extern_type::$val is defined but not used in file $file"
-#                 fi
-#             fi
-#         done
-#     done
-#
-#     # Undefined
-#     # NOTE: to fix automatically, replace echo with:
-#     # ( grep -q -F 'namespace $extern_type' $file && \
-#     #   sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \
-#     #     awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n    extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
-#     find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
-#         grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
-#     } | while read file; do
-#         grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do
-#             if ! grep -q "extern const $type_of_extern $val" $file; then
-#                 if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then
-#                     echo "$extern_type::$val is used in file $file but not defined"
-#                 fi
-#             fi
-#         done
-#     done
-#
-#     # Duplicates
-#     find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
-#         grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
-#     } | while read file; do
-#         grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
-#     done
-# done
-#
-# # Three or more consecutive empty lines
-# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
-#     grep -vP $EXCLUDE_DIRS |
-#     while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done
-#
-# # Broken XML files (requires libxml2-utils)
-# find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs xmllint --noout --nonet
-#
-# # FIXME: for now only clickhouse-test
-# pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py
-#
-# find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs yamllint --config-file=$ROOT_PATH/.yamllint
-#
-# # Tests should not be named with "fail" in their names. It makes looking at the results less convenient.
-# find $ROOT_PATH/tests/queries -iname '*fail*' |
-#     grep . && echo 'Tests should not be named with "fail" in their names. It makes looking at the results less convenient when you search for "fail" substring in browser.'
-#
-# # Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition
-# # NOTE: it is not that accurate, but at least something.
-# tests_with_query_log=( $(
-#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-#         xargs grep --with-filename -e system.query_log -e system.query_thread_log | cut -d: -f1 | sort -u
-# ) )
-# for test_case in "${tests_with_query_log[@]}"; do
-#     grep -qE current_database.*currentDatabase "$test_case" || {
-#         grep -qE 'current_database.*\$CLICKHOUSE_DATABASE' "$test_case"
-#     } || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case"
-# done
-#
-# # There shouldn't be large jumps between test numbers (since they should be consecutive)
-# max_diff=$(
-#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-#           grep -oP '\d+\D+\K\d+' | sort -n -u | awk 's{print ($0-s) " diff " s " and " $0 }{s=$0}' | sort -n | tail -n 1
-# )
-# max_diff_value=( $(echo $max_diff) )
-# if [[ $max_diff_value -ge 100 ]];
-# then
-#     echo "Too big of a difference between test numbers: $max_diff"
-# fi
-#
-# # Queries to:
-# tables_with_database_column=(
-#     system.tables
-#     system.parts
-#     system.detached_parts
-#     system.parts_columns
-#     system.columns
-#     system.projection_parts
-#     system.mutations
-# )
-# # should have database = currentDatabase() condition
-# #
-# # NOTE: it is not that accuate, but at least something.
-# tests_with_database_column=( $(
-#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-#         xargs grep --with-filename $(printf -- "-e %s " "${tables_with_database_column[@]}") |
-#         grep -v -e ':--' -e ':#' |
-#         cut -d: -f1 | sort -u
-# ) )
-# for test_case in "${tests_with_database_column[@]}"; do
-#     grep -qE database.*currentDatabase "$test_case" || {
-#         grep -qE 'database.*\$CLICKHOUSE_DATABASE' "$test_case"
-#     } || {
-#         # explicit database
-#         grep -qE "database[ ]*=[ ]*'" "$test_case"
-#     } || {
-#         echo "Queries to ${tables_with_database_column[*]} does not have database = currentDatabase()/\$CLICKHOUSE_DATABASE condition in $test_case"
-#     }
-# done
-#
-# # Queries with ReplicatedMergeTree
-# # NOTE: it is not that accuate, but at least something.
-# tests_with_replicated_merge_tree=( $(
-#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-#         xargs grep --with-filename -e "Replicated.*MergeTree[ ]*(.*" | cut -d: -f1 | sort -u
-# ) )
-# for test_case in "${tests_with_replicated_merge_tree[@]}"; do
-#     case "$test_case" in
-#         *.gen.*)
-#             ;;
-#         *.sh)
-#             test_case_zk_prefix="\(\$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX\|{database}\)"
-#             grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
-#             ;;
-#         *.sql|*.sql.j2)
-#             test_case_zk_prefix="\({database}\|currentDatabase()\|{uuid}\|{default_path_test}\)"
-#             grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
-#             ;;
-#         *.py)
-#             # Right now there is not such tests anyway
-#             echo "No ReplicatedMergeTree style check for *.py ($test_case)"
-#             ;;
-#     esac
-# done
-#
-# # All the submodules should be from https://github.com/
-# find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | grep -v -F 'https://github.com/' && echo 'All the submodules should be from https://github.com/'; done
-#
-# # There shouldn't be any code snippets under GPL or LGPL
-# find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL"
-#
-# # There shouldn't be any docker containers outside docker directory
-# find $ROOT_PATH -not -path $ROOT_PATH'/tests/ci*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:"
-#
-# # There shouldn't be any docker compose files outside docker directory
-# find $ROOT_PATH -name '*compose*.yml' -type f -not -path $ROOT_PATH'/docker' -not -path $ROOT_PATH'/tests/integration*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' 2>/dev/null | grep -vP $EXCLUDE_DIRS | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to the 'docker' or 'tests' directory:"
-#
-# # Check that every header file has #pragma once in first line
-# find $ROOT_PATH/{src,programs,utils} -name '*.h' |
-#     grep -vP $EXCLUDE_DIRS |
-#     while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
-#
-# # Check for executable bit on non-executable files
-# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
-#
-# # Check for BOM
-# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
-# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
-# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
-#
-# # Too many exclamation marks
-# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
-#
-# # Trailing whitespaces
-# find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces."
-#
-# # Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
-# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
-#
-# # Forbid std::cerr/std::cout in src (fine in programs/utils)
-# std_cerr_cout_excludes=(
-#     /examples/
-#     /tests/
-#     _fuzzer
-#     # DUMP()
-#     base/base/iostream_debug_helpers.h
-#     # OK
-#     src/Common/ProgressIndication.cpp
-#     # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests
-#     src/Common/HashTable/HashTable.h
-#     # SensitiveDataMasker::printStats()
-#     src/Common/SensitiveDataMasker.cpp
-#     # StreamStatistics::print()
-#     src/Compression/LZ4_decompress_faster.cpp
-#     # ContextSharedPart with subsequent std::terminate()
-#     src/Interpreters/Context.cpp
-#     # IProcessor::dump()
-#     src/Processors/IProcessor.cpp
-#     src/Client/ClientBase.cpp
-#     src/Client/LineReader.cpp
-#     src/Client/QueryFuzzer.cpp
-#     src/Client/Suggest.cpp
-#     src/Bridge/IBridge.cpp
-#     src/Daemon/BaseDaemon.cpp
-#     src/Loggers/Loggers.cpp
-# )
-# sources_with_std_cerr_cout=( $(
-#     find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \
-#         grep -vP $EXCLUDE_DIRS | \
-#         grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \
-#         xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u
-# ) )
-# # Exclude comments
-# for src in "${sources_with_std_cerr_cout[@]}"; do
-#     # suppress stderr, since it may contain warning for #pargma once in headers
-#     if gcc -fpreprocessed -dD -E "$src" 2>/dev/null | grep -F -q -e std::cerr -e std::cout; then
-#         echo "$src: uses std::cerr/std::cout"
-#     fi
-# done
-#
-# # Queries with event_date should have yesterday() not today()
-# #
-# # NOTE: it is not that accuate, but at least something.
-# tests_with_event_time_date=( $(
-#     find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
-#         grep -vP $EXCLUDE_DIRS |
-#         xargs grep --with-filename -e event_time -e event_date | cut -d: -f1 | sort -u
-# ) )
-# for test_case in "${tests_with_event_time_date[@]}"; do
-#     cat "$test_case" | tr '\n' ' ' | grep -q -i -e 'WHERE.*event_date[ ]*=[ ]*today()' -e 'WHERE.*event_date[ ]*=[ ]*today()' && {
-#         echo "event_time/event_date should be filtered using >=yesterday() in $test_case (to avoid flakiness)"
-#     }
-# done
-#
-# expect_tests=( $(find $ROOT_PATH/tests/queries -name '*.expect') )
-# for test_case in "${expect_tests[@]}"; do
-#     pattern="^exp_internal -f \$env(CLICKHOUSE_TMP)/\$basename.debuglog 0$"
-#     grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
-#
-#     if grep -q "^spawn.*CLICKHOUSE_CLIENT_BINARY$" "$test_case"; then
-#         pattern="^spawn.*CLICKHOUSE_CLIENT_BINARY.*--history_file$"
-#         grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
-#     fi
-#
-#     # Otherwise expect_after/expect_before will not bail without stdin attached
-#     # (and actually this is a hack anyway, correct way is to use $any_spawn_id)
-#     pattern="-i \$any_spawn_id timeout"
-#     grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
-#     pattern="-i \$any_spawn_id eof"
-#     grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
-# done
-#
-# # Conflict markers
-# find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
-#     xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
-#
-# # Forbid subprocess.check_call(...) in integration tests because it does not provide enough information on errors
-# find $ROOT_PATH'/tests/integration' -name '*.py' |
-#     xargs grep -F 'subprocess.check_call' | grep -v "STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL" && echo "Use helpers.cluster.run_and_check or subprocess.run instead of subprocess.check_call to print detailed info on error"
-#
-# # Forbid non-unique error codes
-# if [[ "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | wc -l)" != "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | sort | uniq | wc -l)" ]]
-# then
-#     echo "ErrorCodes.cpp contains non-unique error codes"
-# fi
-#
-# # Check that there is no system-wide libraries/headers in use.
-# #
-# # NOTE: it is better to override find_path/find_library in cmake, but right now
-# # it is not possible, see [1] for the reference.
-# #
-# #   [1]: git grep --recurse-submodules -e find_library -e find_path contrib
-# if git grep -e find_path -e find_library -- :**CMakeLists.txt; then
-#     echo "There is find_path/find_library usage. ClickHouse should use everything bundled. Consider adding one more contrib module."
-# fi
-#
-# # Forbid files that differ only by character case
-# find $ROOT_PATH | sort -f | uniq -i -c | awk '{ if ($1 > 1) print }'
-#
-# # Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly
-# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead"
-#
-# # Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable
-# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead"
+#   [1]: git grep --recurse-submodules -e find_library -e find_path contrib
+if git grep -e find_path -e find_library -- :**CMakeLists.txt; then
+    echo "There is find_path/find_library usage. ClickHouse should use everything bundled. Consider adding one more contrib module."
+fi
+
+# Forbid files that differ only by character case
+find $ROOT_PATH | sort -f | uniq -i -c | awk '{ if ($1 > 1) print }'
+
+# Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead"
+
+# Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead"
 
 # Forbid mt19937() and random_device() which are outdated and slow
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
     grep -vP $EXCLUDE_DIRS |
     xargs grep -P '(std::mt19937|std::mersenne_twister_engine|std::random_device)' && echo "Use pcg64_fast (from pcg_random.h) and randomSeed (from Common/randomSeed.h) instead"
 
-# # Require checking return value of close(),
-# # since it can hide fd misuse and break other places.
-# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
-#     grep -vP $EXCLUDE_DIRS |
-#     xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked"
-#
-# # Check for existence of __init__.py files
-# for i in "${ROOT_PATH}"/tests/integration/test_*; do FILE="${i}/__init__.py"; [ ! -f "${FILE}" ] && echo "${FILE} should exist for every integration test"; done
-#
-# # A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647
-# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -l -F '#ifdef NDEBUG' | xargs -I@FILE awk '/#ifdef NDEBUG/ { inside = 1; dirty = 1 } /#endif/ { if (inside && dirty) { print "File @FILE has suspicious #ifdef NDEBUG, possibly confused with #ifndef NDEBUG" }; inside = 0 } /#else/ { dirty = 0 }' @FILE
-#
-# # If a user is doing dynamic or typeid cast with a pointer, and immediately dereferencing it, it is unsafe.
-# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep --line-number -P '(dynamic|typeid)_cast<[^>]+\*>\([^\(\)]+\)->' | grep -P '.' && echo "It's suspicious when you are doing a dynamic_cast or typeid_cast with a pointer and immediately dereferencing it. Use references instead of pointers or check a pointer to nullptr."
-#
-# # The stateful directory should only contain the tests that depend on the test dataset (hits or visits).
-# find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."'
-#
-# # Check for bad punctuation: whitespace before comma.
-# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"
-#
-# # Cyrillic characters hiding inside Latin.
-# find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."
+# Require checking return value of close(),
+# since it can hide fd misuse and break other places.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked"
+
+# Check for existence of __init__.py files
+for i in "${ROOT_PATH}"/tests/integration/test_*; do FILE="${i}/__init__.py"; [ ! -f "${FILE}" ] && echo "${FILE} should exist for every integration test"; done
+
+# A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -l -F '#ifdef NDEBUG' | xargs -I@FILE awk '/#ifdef NDEBUG/ { inside = 1; dirty = 1 } /#endif/ { if (inside && dirty) { print "File @FILE has suspicious #ifdef NDEBUG, possibly confused with #ifndef NDEBUG" }; inside = 0 } /#else/ { dirty = 0 }' @FILE
+
+# If a user is doing dynamic or typeid cast with a pointer, and immediately dereferencing it, it is unsafe.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep --line-number -P '(dynamic|typeid)_cast<[^>]+\*>\([^\(\)]+\)->' | grep -P '.' && echo "It's suspicious when you are doing a dynamic_cast or typeid_cast with a pointer and immediately dereferencing it. Use references instead of pointers or check a pointer to nullptr."
+
+# The stateful directory should only contain the tests that depend on the test dataset (hits or visits).
+find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."'
+
+# Check for bad punctuation: whitespace before comma.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"
+
+# Cyrillic characters hiding inside Latin.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."

From 86ba6ad1e85c5c6651ef4d6d2f83567e50ab6d69 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 8 Nov 2023 10:22:44 +0000
Subject: [PATCH 0245/1097] Local backup and restore

---
 .../BackupCoordinationKeeperMapTables.cpp     |  23 +
 .../BackupCoordinationKeeperMapTables.h       |  23 +
 src/Backups/BackupCoordinationLocal.cpp       |  12 +
 src/Backups/BackupCoordinationLocal.h         |  14 +
 src/Backups/BackupCoordinationRemote.cpp      |  13 +
 src/Backups/BackupCoordinationRemote.h        |  13 +
 src/Backups/BackupsWorker.cpp                 |  11 +-
 src/Backups/IBackupCoordination.h             |   6 +
 src/Backups/IRestoreCoordination.h            |   4 +
 src/Backups/RestoreCoordinationLocal.cpp      |   6 +
 src/Backups/RestoreCoordinationLocal.h        |   5 +
 src/Backups/RestoreCoordinationRemote.cpp     |   5 +
 src/Backups/RestoreCoordinationRemote.h       |   4 +
 src/Backups/WithRetries.cpp                   |  20 +
 src/Backups/WithRetries.h                     |   5 +
 src/Core/Settings.h                           |   1 +
 src/Storages/StorageKeeperMap.cpp             | 420 +++++++++++++++---
 src/Storages/StorageKeeperMap.h               |  22 +-
 18 files changed, 527 insertions(+), 80 deletions(-)
 create mode 100644 src/Backups/BackupCoordinationKeeperMapTables.cpp
 create mode 100644 src/Backups/BackupCoordinationKeeperMapTables.h

diff --git a/src/Backups/BackupCoordinationKeeperMapTables.cpp b/src/Backups/BackupCoordinationKeeperMapTables.cpp
new file mode 100644
index 00000000000..50561560dd5
--- /dev/null
+++ b/src/Backups/BackupCoordinationKeeperMapTables.cpp
@@ -0,0 +1,23 @@
+#include <Backups/BackupCoordinationKeeperMapTables.h>
+
+namespace DB
+{
+
+void BackupCoordinationKeeperMapTables::addTable(const std::string & table_zookeeper_root_path, const std::string & table_id, const std::string & data_path_in_backup)
+{
+    if (auto it = tables_with_info.find(table_zookeeper_root_path); it != tables_with_info.end())
+    {
+        if (table_id > it->second.table_id)
+            it->second = KeeperMapTableInfo{table_id, data_path_in_backup};
+        return;
+    }
+
+    tables_with_info.emplace(table_zookeeper_root_path, KeeperMapTableInfo{table_id, data_path_in_backup});
+}
+
+std::string BackupCoordinationKeeperMapTables::getDataPath(const std::string & table_zookeeper_root_path) const
+{
+    return tables_with_info.at(table_zookeeper_root_path).data_path_in_backup;
+}
+
+}
diff --git a/src/Backups/BackupCoordinationKeeperMapTables.h b/src/Backups/BackupCoordinationKeeperMapTables.h
new file mode 100644
index 00000000000..28894bb9c6e
--- /dev/null
+++ b/src/Backups/BackupCoordinationKeeperMapTables.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <unordered_map>
+#include <string>
+
+namespace DB
+{
+
+struct BackupCoordinationKeeperMapTables
+{
+    void addTable(const std::string & table_zookeeper_root_path, const std::string & table_id, const std::string & data_path_in_backup);
+    std::string getDataPath(const std::string & table_zookeeper_root_path) const;
+private:
+    struct KeeperMapTableInfo
+    {
+        std::string table_id;
+        std::string data_path_in_backup;
+    };
+
+    std::unordered_map<std::string /* root zookeeper path */, KeeperMapTableInfo> tables_with_info;
+};
+
+}
diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp
index 27e0f173cf3..fb91bae2303 100644
--- a/src/Backups/BackupCoordinationLocal.cpp
+++ b/src/Backups/BackupCoordinationLocal.cpp
@@ -97,6 +97,18 @@ Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & load
     return replicated_sql_objects.getDirectories(loader_zk_path, object_type, "");
 }
 
+void BackupCoordinationLocal::addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup)
+{
+    std::lock_guard lock(keeper_map_tables_mutex);
+    keeper_map_tables.addTable(table_zookeeper_root_path, table_id, data_path_in_backup);
+}
+
+String BackupCoordinationLocal::getKeeperMapDataPath(const String & table_zookeeper_root_path) const
+{
+    std::lock_guard lock(keeper_map_tables_mutex);
+    return keeper_map_tables.getDataPath(table_zookeeper_root_path);
+}
+
 
 void BackupCoordinationLocal::addFileInfos(BackupFileInfos && file_infos_)
 {
diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h
index 60fcc014720..1fecf30c51c 100644
--- a/src/Backups/BackupCoordinationLocal.h
+++ b/src/Backups/BackupCoordinationLocal.h
@@ -6,6 +6,8 @@
 #include <Backups/BackupCoordinationReplicatedSQLObjects.h>
 #include <Backups/BackupCoordinationReplicatedTables.h>
 #include <base/defines.h>
+#include "Backups/BackupCoordinationKeeperMapTables.h"
+#include <cstddef>
 #include <mutex>
 #include <unordered_set>
 
@@ -44,6 +46,9 @@ public:
     void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
     Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;
 
+    void addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup) override;
+    String getKeeperMapDataPath(const String & table_zookeeper_root_path) const override;
+
     void addFileInfos(BackupFileInfos && file_infos) override;
     BackupFileInfos getFileInfos() const override;
     BackupFileInfos getFileInfosForAllHosts() const override;
@@ -58,13 +63,22 @@ private:
     BackupCoordinationReplicatedAccess TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
     BackupCoordinationReplicatedSQLObjects TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;
     BackupCoordinationFileInfos TSA_GUARDED_BY(file_infos_mutex) file_infos;
+    BackupCoordinationKeeperMapTables keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
     std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
+    
+    struct KeeperMapTableInfo
+    {
+        String table_id;
+        String data_path_in_backup;
+    };
+
 
     mutable std::mutex replicated_tables_mutex;
     mutable std::mutex replicated_access_mutex;
     mutable std::mutex replicated_sql_objects_mutex;
     mutable std::mutex file_infos_mutex;
     mutable std::mutex writing_files_mutex;
+    mutable std::mutex keeper_map_tables_mutex;
 };
 
 }
diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index e5fcbf26781..72fc2509089 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -666,6 +666,19 @@ void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
         replicated_sql_objects->addDirectory(std::move(directory));
 }
 
+void BackupCoordinationRemote::addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup)
+{
+    std::lock_guard lock(keeper_map_tables_mutex);
+    keeper_map_tables.addTable(table_zookeeper_root_path, table_id, data_path_in_backup);
+}
+
+String BackupCoordinationRemote::getKeeperMapDataPath(const String & table_zookeeper_root_path) const
+{
+    std::lock_guard lock(keeper_map_tables_mutex);
+    return keeper_map_tables.getDataPath(table_zookeeper_root_path);
+}
+
+
 void BackupCoordinationRemote::addFileInfos(BackupFileInfos && file_infos_)
 {
     {
diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h
index c1c7a40fc44..28c24c574a6 100644
--- a/src/Backups/BackupCoordinationRemote.h
+++ b/src/Backups/BackupCoordinationRemote.h
@@ -7,6 +7,7 @@
 #include <Backups/BackupCoordinationReplicatedTables.h>
 #include <Backups/BackupCoordinationStageSync.h>
 #include <Backups/WithRetries.h>
+#include "Backups/BackupCoordinationKeeperMapTables.h"
 
 
 namespace DB
@@ -63,6 +64,9 @@ public:
     void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
     Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;
 
+    void addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup) override;
+    String getKeeperMapDataPath(const String & table_zookeeper_root_path) const override;
+
     void addFileInfos(BackupFileInfos && file_infos) override;
     BackupFileInfos getFileInfos() const override;
     BackupFileInfos getFileInfosForAllHosts() const override;
@@ -108,12 +112,21 @@ private:
     mutable std::optional<BackupCoordinationFileInfos> TSA_GUARDED_BY(file_infos_mutex) file_infos;
     std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
 
+    struct KeeperMapTableInfo
+    {
+        String table_id;
+        String data_path_in_backup;
+    };
+
+    mutable BackupCoordinationKeeperMapTables keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
+
     mutable std::mutex zookeeper_mutex;
     mutable std::mutex replicated_tables_mutex;
     mutable std::mutex replicated_access_mutex;
     mutable std::mutex replicated_sql_objects_mutex;
     mutable std::mutex file_infos_mutex;
     mutable std::mutex writing_files_mutex;
+    mutable std::mutex keeper_map_tables_mutex;
 };
 
 }
diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp
index da814dcbc08..b19135c5cba 100644
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@@ -58,16 +58,7 @@ namespace
 
             auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
 
-            BackupCoordinationRemote::BackupKeeperSettings keeper_settings
-            {
-                .keeper_max_retries = context->getSettingsRef().backup_restore_keeper_max_retries,
-                .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
-                .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms,
-                .batch_size_for_keeper_multiread = context->getSettingsRef().backup_restore_batch_size_for_keeper_multiread,
-                .keeper_fault_injection_probability = context->getSettingsRef().backup_restore_keeper_fault_injection_probability,
-                .keeper_fault_injection_seed = context->getSettingsRef().backup_restore_keeper_fault_injection_seed,
-                .keeper_value_max_size = context->getSettingsRef().backup_restore_keeper_value_max_size,
-            };
+            BackupCoordinationRemote::BackupKeeperSettings keeper_settings = WithRetries::KeeperSettings::fromContext(context);
 
             auto all_hosts = BackupSettings::Util::filterHostIDs(
                 backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h
index 75d9202374b..f80b5dee883 100644
--- a/src/Backups/IBackupCoordination.h
+++ b/src/Backups/IBackupCoordination.h
@@ -56,6 +56,12 @@ public:
     /// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
     virtual std::vector<MutationInfo> getReplicatedMutations(const String & table_shared_id, const String & replica_name) const = 0;
 
+    /// Adds information about KeeperMap tables
+    virtual void addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup) = 0;
+
+    /// KeeperMap tables use shared storage without local data so only one table should backup the data
+    virtual String getKeeperMapDataPath(const String & table_zookeeper_root_path) const = 0;
+
     /// Adds a data path in backup for a replicated table.
     /// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
     /// getReplicatedDataPaths().
diff --git a/src/Backups/IRestoreCoordination.h b/src/Backups/IRestoreCoordination.h
index fd6f014c326..489292cb88f 100644
--- a/src/Backups/IRestoreCoordination.h
+++ b/src/Backups/IRestoreCoordination.h
@@ -41,6 +41,10 @@ public:
     /// The function returns false if user-defined function at a specified zk path are being already restored by another replica.
     virtual bool acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) = 0;
 
+    /// Sets that this table is going to restore data into Keeper for all KeeperMap tables defined on root_zk_path.
+    /// The function returns false if data for this specific root path is already being restored by another table.
+    virtual bool acquireInsertingDataForKeeperMap(const String & root_zk_path) = 0;
+
     /// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica,
     /// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
     virtual void generateUUIDForTable(ASTCreateQuery & create_query) = 0;
diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp
index 1bd2f8e2ed1..d32625c2c51 100644
--- a/src/Backups/RestoreCoordinationLocal.cpp
+++ b/src/Backups/RestoreCoordinationLocal.cpp
@@ -52,6 +52,12 @@ bool RestoreCoordinationLocal::acquireReplicatedSQLObjects(const String &, UserD
     return true;
 }
 
+bool RestoreCoordinationLocal::acquireInsertingDataForKeeperMap(const String & root_zk_path)
+{
+    std::lock_guard lock{mutex};
+    return acquired_data_in_keeper_map_tables.emplace(root_zk_path).second;
+}
+
 void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_query)
 {
     String query_str = serializeAST(create_query);
diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h
index 339b754fca5..93fbdb79d9f 100644
--- a/src/Backups/RestoreCoordinationLocal.h
+++ b/src/Backups/RestoreCoordinationLocal.h
@@ -40,6 +40,10 @@ public:
     /// The function returns false if user-defined function at a specified zk path are being already restored by another replica.
     bool acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) override;
 
+    /// Sets that this table is going to restore data into Keeper for all KeeperMap tables defined on root_zk_path.
+    /// The function returns false if data for this specific root path is already being restored by another table.
+    bool acquireInsertingDataForKeeperMap(const String & root_zk_path) override;
+
     /// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica,
     /// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
     void generateUUIDForTable(ASTCreateQuery & create_query) override;
@@ -52,6 +56,7 @@ private:
     std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
     std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
     std::unordered_map<String, ASTCreateQuery::UUIDs> create_query_uuids;
+    std::unordered_set<String /* root_zk_path */> acquired_data_in_keeper_map_tables;
 
     mutable std::mutex mutex;
 };
diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp
index c71466ad8f4..7e059b8d9cc 100644
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@@ -234,6 +234,11 @@ bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loade
     return result;
 }
 
+bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & /*root_zk_path*/)
+{
+    return true;
+}
+
 void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query)
 {
     String query_str = serializeAST(create_query);
diff --git a/src/Backups/RestoreCoordinationRemote.h b/src/Backups/RestoreCoordinationRemote.h
index 22d0c0ed6df..7d3ae4ceec9 100644
--- a/src/Backups/RestoreCoordinationRemote.h
+++ b/src/Backups/RestoreCoordinationRemote.h
@@ -46,6 +46,10 @@ public:
     /// The function returns false if user-defined function at a specified zk path are being already restored by another replica.
     bool acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) override;
 
+    /// Sets that this table is going to restore data into Keeper for all KeeperMap tables defined on root_zk_path.
+    /// The function returns false if data for this specific root path is already being restored by another table.
+    bool acquireInsertingDataForKeeperMap(const String & root_zk_path) override;
+
     /// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica,
     /// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
     void generateUUIDForTable(ASTCreateQuery & create_query) override;
diff --git a/src/Backups/WithRetries.cpp b/src/Backups/WithRetries.cpp
index 0893c65d8fd..d1612a7da4f 100644
--- a/src/Backups/WithRetries.cpp
+++ b/src/Backups/WithRetries.cpp
@@ -5,6 +5,21 @@ namespace DB
 {
 
 
+WithRetries::KeeperSettings WithRetries::KeeperSettings::fromContext(ContextPtr context)
+{
+    return
+    {
+        .keeper_max_retries = context->getSettingsRef().backup_restore_keeper_max_retries,
+        .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
+        .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms,
+        .batch_size_for_keeper_multiread = context->getSettingsRef().backup_restore_batch_size_for_keeper_multiread,
+        .keeper_fault_injection_probability = context->getSettingsRef().backup_restore_keeper_fault_injection_probability,
+        .keeper_fault_injection_seed = context->getSettingsRef().backup_restore_keeper_fault_injection_seed,
+        .keeper_value_max_size = context->getSettingsRef().backup_restore_keeper_value_max_size,
+        .batch_size_for_keeper_multi = context->getSettingsRef().backup_restore_batch_size_for_keeper_multi,
+    };
+}
+
 WithRetries::WithRetries(Poco::Logger * log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, RenewerCallback callback_)
     : log(log_)
     , get_zookeeper(get_zookeeper_)
@@ -42,6 +57,11 @@ void WithRetries::renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const
     }
 }
 
+const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const
+{
+    return settings;
+}
+
 WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const
 {
     /// We need to create new instance of ZooKeeperWithFaultInjection each time a copy a pointer to ZooKeeper client there
diff --git a/src/Backups/WithRetries.h b/src/Backups/WithRetries.h
index 3955682be94..8f4a730e6a1 100644
--- a/src/Backups/WithRetries.h
+++ b/src/Backups/WithRetries.h
@@ -26,6 +26,9 @@ public:
         Float64 keeper_fault_injection_probability{0};
         UInt64 keeper_fault_injection_seed{42};
         UInt64 keeper_value_max_size{1048576};
+        UInt64 batch_size_for_keeper_multi{1000};
+
+        static KeeperSettings fromContext(ContextPtr context);
     };
 
     /// For simplicity a separate ZooKeeperRetriesInfo and a faulty [Zoo]Keeper client
@@ -53,6 +56,8 @@ public:
 
     /// Used to re-establish new connection inside a retry loop.
     void renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const;
+
+    const KeeperSettings & getKeeperSettings() const;
 private:
     /// This will provide a special wrapper which is useful for testing
     FaultyKeeper getFaultyZooKeeper() const;
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index aa5c8569be6..bab9005a22c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -465,6 +465,7 @@ class IColumn;
     M(UInt64, backup_restore_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
     M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
     M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \
+    M(UInt64, backup_restore_batch_size_for_keeper_multi, 1000, "Maximum size of batch for multi request to [Zoo]Keeper during backup or restore", 0) \
     M(UInt64, max_backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
     \
     M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp
index f98728c012e..33a97af53f1 100644
--- a/src/Storages/StorageKeeperMap.cpp
+++ b/src/Storages/StorageKeeperMap.cpp
@@ -1,3 +1,4 @@
+#include <memory>
 #include <Storages/StorageKeeperMap.h>
 
 #include <Columns/ColumnString.h>
@@ -13,6 +14,9 @@
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/MutationsInterpreter.h>
 
+#include <Compression/CompressedWriteBuffer.h>
+#include <Compression/CompressedReadBuffer.h>
+
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -38,6 +42,16 @@
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/ZooKeeperConstants.h>
 
+#include <Backups/BackupEntriesCollector.h>
+#include <Backups/IBackupCoordination.h>
+#include <Backups/IBackupEntriesLazyBatch.h>
+#include <Backups/BackupEntryFromAppendOnlyFile.h>
+#include <Backups/BackupEntryFromMemory.h>
+#include <Backups/IBackup.h>
+#include <Backups/IRestoreCoordination.h>
+#include <Backups/RestorerFromBackup.h>
+#include <Backups/WithRetries.h>
+
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
 #include <base/types.h>
@@ -54,6 +68,7 @@ namespace ErrorCodes
     extern const int KEEPER_EXCEPTION;
     extern const int LOGICAL_ERROR;
     extern const int LIMIT_EXCEEDED;
+    extern const int CANNOT_RESTORE_TABLE;
 }
 
 namespace
@@ -296,13 +311,13 @@ StorageKeeperMap::StorageKeeperMap(
     const StorageInMemoryMetadata & metadata,
     bool attach,
     std::string_view primary_key_,
-    const std::string & root_path_,
+    const std::string & zk_root_path_,
     UInt64 keys_limit_)
     : IStorage(table_id)
     , WithContext(context_->getGlobalContext())
-    , root_path(zkutil::extractZooKeeperPath(root_path_, false))
+    , zk_root_path(zkutil::extractZooKeeperPath(zk_root_path_, false))
     , primary_key(primary_key_)
-    , zookeeper_name(zkutil::extractZooKeeperName(root_path_))
+    , zookeeper_name(zkutil::extractZooKeeperName(zk_root_path_))
     , keys_limit(keys_limit_)
     , log(&Poco::Logger::get(fmt::format("StorageKeeperMap ({})", table_id.getNameForLogs())))
 {
@@ -320,10 +335,10 @@ StorageKeeperMap::StorageKeeperMap(
         << "primary key: " << formattedAST(metadata.getPrimaryKey().expression_list_ast) << "\n";
     metadata_string = out.str();
 
-    if (root_path.empty())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "root_path should not be empty");
-    if (!root_path.starts_with('/'))
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "root_path should start with '/'");
+    if (zk_root_path.empty())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "zk_root_path should not be empty");
+    if (!zk_root_path.starts_with('/'))
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "zk_root_path should start with '/'");
 
     auto config_keys_limit = context_->getConfigRef().getUInt64("keeper_map_keys_limit", 0);
     if (config_keys_limit != 0 && (keys_limit == 0 || keys_limit > config_keys_limit))
@@ -341,20 +356,20 @@ StorageKeeperMap::StorageKeeperMap(
         LOG_INFO(log, "Keys limit will be set to {}", keys_limit);
     }
 
-    auto root_path_fs = fs::path(path_prefix) / std::string_view{root_path}.substr(1);
-    root_path = root_path_fs.generic_string();
+    auto zk_root_path_fs = fs::path(path_prefix) / std::string_view{zk_root_path}.substr(1);
+    zk_root_path = zk_root_path_fs.generic_string();
 
-    data_path = root_path_fs / "data";
+    zk_data_path = zk_root_path_fs / "data";
 
-    auto metadata_path_fs = root_path_fs / "metadata";
-    metadata_path = metadata_path_fs;
-    tables_path = metadata_path_fs / "tables";
+    auto metadata_path_fs = zk_root_path_fs / "metadata";
+    zk_metadata_path = metadata_path_fs;
+    zk_tables_path = metadata_path_fs / "tables";
 
     auto table_unique_id = toString(table_id.uuid) + toString(ServerUUID::get());
-    table_path = fs::path(tables_path) / table_unique_id;
+    zk_table_path = fs::path(zk_tables_path) / table_unique_id;
 
-    dropped_path = metadata_path_fs / "dropped";
-    dropped_lock_path = fs::path(dropped_path) / "lock";
+    zk_dropped_path = metadata_path_fs / "dropped";
+    zk_dropped_lock_path = fs::path(zk_dropped_path) / "lock";
 
     if (attach)
     {
@@ -364,17 +379,17 @@ StorageKeeperMap::StorageKeeperMap(
 
     auto client = getClient();
 
-    if (root_path != "/" && !client->exists(root_path))
+    if (zk_root_path != "/" && !client->exists(zk_root_path))
     {
-        LOG_TRACE(log, "Creating root path {}", root_path);
-        client->createAncestors(root_path);
-        client->createIfNotExists(root_path, "");
+        LOG_TRACE(log, "Creating root path {}", zk_root_path);
+        client->createAncestors(zk_root_path);
+        client->createIfNotExists(zk_root_path, "");
     }
 
     for (size_t i = 0; i < 1000; ++i)
     {
         std::string stored_metadata_string;
-        auto exists = client->tryGet(metadata_path, stored_metadata_string);
+        auto exists = client->tryGet(zk_metadata_path, stored_metadata_string);
 
         if (exists)
         {
@@ -384,10 +399,10 @@ StorageKeeperMap::StorageKeeperMap(
                 throw Exception(
                     ErrorCodes::BAD_ARGUMENTS,
                     "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}",
-                    root_path,
+                    zk_root_path,
                     stored_metadata_string);
 
-            auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent);
+            auto code = client->tryCreate(zk_table_path, "", zkutil::CreateMode::Persistent);
 
             // tables_path was removed with drop
             if (code == Coordination::Error::ZNONODE)
@@ -397,16 +412,16 @@ StorageKeeperMap::StorageKeeperMap(
             }
             else if (code != Coordination::Error::ZOK)
             {
-                throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", root_path);
+                throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path);
             }
 
             return;
         }
 
-        if (client->exists(dropped_path))
+        if (client->exists(zk_dropped_path))
         {
             LOG_INFO(log, "Removing leftover nodes");
-            auto code = client->tryCreate(dropped_lock_path, "", zkutil::CreateMode::Ephemeral);
+            auto code = client->tryCreate(zk_dropped_lock_path, "", zkutil::CreateMode::Ephemeral);
 
             if (code == Coordination::Error::ZNONODE)
             {
@@ -419,11 +434,11 @@ StorageKeeperMap::StorageKeeperMap(
             }
             else if (code != Coordination::Error::ZOK)
             {
-                throw Coordination::Exception::fromPath(code, dropped_lock_path);
+                throw Coordination::Exception::fromPath(code, zk_dropped_lock_path);
             }
             else
             {
-                auto metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(dropped_lock_path, *client);
+                auto metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(zk_dropped_lock_path, *client);
                 if (!dropTable(client, metadata_drop_lock))
                     continue;
             }
@@ -431,17 +446,17 @@ StorageKeeperMap::StorageKeeperMap(
 
         Coordination::Requests create_requests
         {
-            zkutil::makeCreateRequest(metadata_path, metadata_string, zkutil::CreateMode::Persistent),
-            zkutil::makeCreateRequest(data_path, metadata_string, zkutil::CreateMode::Persistent),
-            zkutil::makeCreateRequest(tables_path, "", zkutil::CreateMode::Persistent),
-            zkutil::makeCreateRequest(table_path, "", zkutil::CreateMode::Persistent),
+            zkutil::makeCreateRequest(zk_metadata_path, metadata_string, zkutil::CreateMode::Persistent),
+            zkutil::makeCreateRequest(zk_data_path, metadata_string, zkutil::CreateMode::Persistent),
+            zkutil::makeCreateRequest(zk_tables_path, "", zkutil::CreateMode::Persistent),
+            zkutil::makeCreateRequest(zk_table_path, "", zkutil::CreateMode::Persistent),
         };
 
         Coordination::Responses create_responses;
         auto code = client->tryMulti(create_requests, create_responses);
         if (code == Coordination::Error::ZNODEEXISTS)
         {
-            LOG_INFO(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path);
+            LOG_INFO(log, "It looks like a table on path {} was created by another server at the same moment, will retry", zk_root_path);
             continue;
         }
         else if (code != Coordination::Error::ZOK)
@@ -456,7 +471,7 @@ StorageKeeperMap::StorageKeeperMap(
 
     throw Exception(ErrorCodes::BAD_ARGUMENTS,
                     "Cannot create metadata for table, because it is removed concurrently or because "
-                    "of wrong root_path ({})", root_path);
+                    "of wrong zk_root_path ({})", zk_root_path);
 }
 
 
@@ -519,7 +534,7 @@ Pipe StorageKeeperMap::read(
 
     auto client = getClient();
     if (all_scan)
-        return process_keys(std::make_shared<std::vector<std::string>>(client->getChildren(data_path)));
+        return process_keys(std::make_shared<std::vector<std::string>>(client->getChildren(zk_data_path)));
 
     return process_keys(std::move(filtered_keys));
 }
@@ -534,19 +549,19 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont
 {
     checkTable<true>();
     auto client = getClient();
-    client->tryRemoveChildrenRecursive(data_path, true);
+    client->tryRemoveChildrenRecursive(zk_data_path, true);
 }
 
 bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock)
 {
-    zookeeper->removeChildrenRecursive(data_path);
+    zookeeper->removeChildrenRecursive(zk_data_path);
 
     bool completely_removed = false;
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeRemoveRequest(metadata_drop_lock->getPath(), -1));
-    ops.emplace_back(zkutil::makeRemoveRequest(dropped_path, -1));
-    ops.emplace_back(zkutil::makeRemoveRequest(data_path, -1));
-    ops.emplace_back(zkutil::makeRemoveRequest(metadata_path, -1));
+    ops.emplace_back(zkutil::makeRemoveRequest(zk_dropped_path, -1));
+    ops.emplace_back(zkutil::makeRemoveRequest(zk_data_path, -1));
+    ops.emplace_back(zkutil::makeRemoveRequest(zk_metadata_path, -1));
 
     Coordination::Responses responses;
     auto code = zookeeper->tryMulti(ops, responses);
@@ -557,7 +572,7 @@ bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::E
         {
             metadata_drop_lock->setAlreadyRemoved();
             completely_removed = true;
-            LOG_INFO(log, "Metadata ({}) and data ({}) was successfully removed from ZooKeeper", metadata_path, data_path);
+            LOG_INFO(log, "Metadata ({}) and data ({}) was successfully removed from ZooKeeper", zk_metadata_path, zk_data_path);
             break;
         }
         case ZNONODE:
@@ -578,25 +593,25 @@ void StorageKeeperMap::drop()
     auto client = getClient();
 
     // we allow ZNONODE in case we got hardware error on previous drop
-    if (auto code = client->tryRemove(table_path); code == Coordination::Error::ZNOTEMPTY)
+    if (auto code = client->tryRemove(zk_table_path); code == Coordination::Error::ZNOTEMPTY)
     {
         throw zkutil::KeeperException(
-            code, "{} contains children which shouldn't happen. Please DETACH the table if you want to delete it", table_path);
+            code, "{} contains children which shouldn't happen. Please DETACH the table if you want to delete it", zk_table_path);
     }
 
     std::vector<std::string> children;
     // if the tables_path is not found, some other table removed it
     // if there are children, some other tables are still using this path as storage
-    if (auto code = client->tryGetChildren(tables_path, children);
+    if (auto code = client->tryGetChildren(zk_tables_path, children);
         code != Coordination::Error::ZOK || !children.empty())
         return;
 
     Coordination::Requests ops;
     Coordination::Responses responses;
 
-    ops.emplace_back(zkutil::makeRemoveRequest(tables_path, -1));
-    ops.emplace_back(zkutil::makeCreateRequest(dropped_path, "", zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(dropped_lock_path, "", zkutil::CreateMode::Ephemeral));
+    ops.emplace_back(zkutil::makeRemoveRequest(zk_tables_path, -1));
+    ops.emplace_back(zkutil::makeCreateRequest(zk_dropped_path, "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zk_dropped_lock_path, "", zkutil::CreateMode::Ephemeral));
 
     auto code = client->tryMulti(ops, responses);
 
@@ -613,7 +628,7 @@ void StorageKeeperMap::drop()
     else if (code != Coordination::Error::ZOK)
         zkutil::KeeperMultiException::check(code, ops, responses);
 
-    auto metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(dropped_lock_path, *client);
+    auto metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(zk_dropped_lock_path, *client);
     dropTable(client, metadata_drop_lock);
 }
 
@@ -623,6 +638,285 @@ NamesAndTypesList StorageKeeperMap::getVirtuals() const
         {std::string{version_column_name}, std::make_shared<DataTypeInt32>()}};
 }
 
+namespace
+{
+
+constexpr std::string_view backup_data_filename = "data.bin";
+constexpr std::string_view backup_data_location_filename = "data_location.bin";
+
+class KeeperMapBackup : public IBackupEntriesLazyBatch, boost::noncopyable
+{
+public:
+    KeeperMapBackup(
+        const std::string & data_zookeeper_path_,
+        const std::string & data_path_in_backup,
+        const DiskPtr & temp_disk_,
+        UInt64 max_compress_block_size_,
+        std::shared_ptr<WithRetries> with_retries_)
+        : data_zookeeper_path(data_zookeeper_path_)
+        , temp_disk(temp_disk_)
+        , max_compress_block_size(max_compress_block_size_)
+        , with_retries(std::move(with_retries_))
+    {
+        file_path = fs::path(data_path_in_backup) / backup_data_filename;
+    }
+
+private:
+    size_t getSize() const override
+    {
+        return 1;
+    }
+
+    const String & getName(size_t i) const override
+    {
+        chassert(i == 0);
+        return file_path;
+    }
+
+    BackupEntries generate() override
+    {
+        temp_dir_owner.emplace(temp_disk);
+        fs::path temp_dir = temp_dir_owner->getRelativePath();
+        temp_disk->createDirectories(temp_dir);
+
+        auto data_file_path = temp_dir / fs::path{file_path}.filename();
+        auto data_out_compressed = temp_disk->writeFile(data_file_path);
+        auto data_out = std::make_unique<CompressedWriteBuffer>(*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), max_compress_block_size);
+        std::vector<std::string> data_children;
+        {
+            auto holder = with_retries->createRetriesControlHolder("getKeeperMapDataKeys");
+            holder.retries_ctl.retryLoop(
+            [&, &zk = holder.faulty_zookeeper]()
+            {
+                with_retries->renewZooKeeper(zk);
+                data_children = zk->getChildren(data_zookeeper_path);
+            });
+        }
+        LOG_INFO(&Poco::Logger::get("BACKUPER"), "Got {} children", data_children.size());
+
+        const auto write_rows = [&](std::span<std::string> keys)
+        {
+            std::vector<std::string> keys_full_path;
+            keys_full_path.reserve(data_children.size());
+
+            for (const auto & key : data_children)
+                keys_full_path.push_back(data_zookeeper_path / key);
+
+            zkutil::ZooKeeper::MultiGetResponse data;
+            auto holder = with_retries->createRetriesControlHolder("getKeeperMapDataKeys");
+            holder.retries_ctl.retryLoop(
+            [&, &zk = holder.faulty_zookeeper]
+            {
+                with_retries->renewZooKeeper(zk);
+                data = zk->get(keys_full_path);
+            });
+
+            for (size_t i = 0; i < keys.size(); ++i)
+            {
+                auto & child_data = data[i];
+                if (child_data.error != Coordination::Error::ZOK)
+                    continue;
+
+                writeStringBinary(keys[i], *data_out);
+                writeStringBinary(child_data.data, *data_out);
+            }
+        };
+
+        auto max_multiread_size = with_retries->getKeeperSettings().batch_size_for_keeper_multiread;
+
+        auto keys_it = data_children.begin();
+        while (keys_it != data_children.end())
+        {
+            auto step = std::min(static_cast<UInt64>(std::distance(keys_it, data_children.end())), max_multiread_size);
+            write_rows(std::span{keys_it, keys_it + step});
+            keys_it = keys_it + step;
+        }
+
+        data_out->finalize();
+        data_out.reset();
+        data_out_compressed->finalize();
+        data_out_compressed.reset();
+
+        return {{file_path, std::make_shared<BackupEntryFromAppendOnlyFile>(temp_disk, data_file_path)}};
+    }
+
+    fs::path data_zookeeper_path;
+    DiskPtr temp_disk;
+    std::optional<TemporaryFileOnDisk> temp_dir_owner;
+    UInt64 max_compress_block_size;
+    String file_path;
+    std::shared_ptr<WithRetries> with_retries;
+};
+}
+
+void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & /*partitions*/)
+{
+    auto table_id = toString(getStorageID().uuid);
+
+    std::cout << "Backing up for path " << zk_root_path << " table id " << table_id << std::endl;
+    auto coordination = backup_entries_collector.getBackupCoordination();
+    coordination->addKeeperMapTable(zk_root_path, table_id, data_path_in_backup);
+
+    /// This task will be executed after all tables have registered their root zk path and the coordination is ready to
+    /// assign each path to a single table only.
+    auto post_collecting_task = [my_table_id = std::move(table_id), coordination, &backup_entries_collector, my_data_path_in_backup = data_path_in_backup, this]
+    {
+        auto path_with_data = coordination->getKeeperMapDataPath(zk_root_path);
+        if (path_with_data == my_data_path_in_backup)
+        {
+            std::cout << "Will be backing up data for path " << zk_root_path << " table id " << my_table_id << std::endl;
+
+            auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0);
+            auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size;
+
+            auto with_retries = std::make_shared<WithRetries>
+            (
+                &Poco::Logger::get(fmt::format("StorageKeeperMapBackup ({})", getStorageID().getNameForLogs())),
+                [&] { return getClient(); },
+                WithRetries::KeeperSettings::fromContext(backup_entries_collector.getContext()),
+                [](WithRetries::FaultyKeeper &) {}
+            );
+
+            backup_entries_collector.addBackupEntries(
+                std::make_shared<KeeperMapBackup>(this->zk_data_path, path_with_data, temp_disk, max_compress_block_size, std::move(with_retries))
+                    ->getBackupEntries());
+            return;
+        }
+
+        std::cout << "Not backing up data for path " << zk_root_path << " table id " << my_table_id << " writing only path with data " << path_with_data << std::endl;
+        auto file_path = fs::path(my_data_path_in_backup) / backup_data_location_filename;
+        backup_entries_collector.addBackupEntries({{file_path, std::make_shared<BackupEntryFromMemory>(path_with_data)}});
+    };
+
+    backup_entries_collector.addPostTask(post_collecting_task);
+}
+
+void StorageKeeperMap::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & /*partitions*/)
+{
+    auto backup = restorer.getBackup();
+    if (!backup->hasFiles(data_path_in_backup))
+        return;
+
+    if (!restorer.getRestoreCoordination()->acquireInsertingDataForKeeperMap(zk_root_path))
+    {
+        /// Other table is already restoring the data for this Keeper path.
+        /// Tables defined on the same path share data
+        return;
+    }
+
+    auto with_retries = std::make_shared<WithRetries>
+    (
+        &Poco::Logger::get(fmt::format("StorageKeeperMapRestore ({})", getStorageID().getNameForLogs())),
+        [&] { return getClient(); },
+        WithRetries::KeeperSettings::fromContext(restorer.getContext()),
+        [](WithRetries::FaultyKeeper &) {}
+    );
+
+    bool allow_non_empty_tables = restorer.isNonEmptyTableAllowed();
+    if (!allow_non_empty_tables)
+    {
+        Coordination::Stat data_stats;
+
+        auto holder = with_retries->createRetriesControlHolder("checkKeeperMapData");
+        holder.retries_ctl.retryLoop(
+        [&, &zk = holder.faulty_zookeeper]()
+        {
+            with_retries->renewZooKeeper(zk);
+            zk->get(zk_data_path, &data_stats);
+        });
+
+        if (data_stats.numChildren != 0)
+            RestorerFromBackup::throwTableIsNotEmpty(getStorageID());
+    }
+
+    /// TODO: Should we backup and verify the table structure?
+
+    //auto temp_disk = restorer.getContext()->getGlobalTemporaryVolume()->getDisk(0);
+    /// only 1 table should restore data for a single path
+    restorer.addDataRestoreTask(
+        [storage = std::static_pointer_cast<StorageKeeperMap>(shared_from_this()), backup, data_path_in_backup, with_retries, allow_non_empty_tables]
+        { storage->restoreDataImpl(backup, data_path_in_backup, with_retries, allow_non_empty_tables); });
+}
+
+void StorageKeeperMap::restoreDataImpl(const BackupPtr & backup, const String & data_path_in_backup, std::shared_ptr<WithRetries> with_retries, bool allow_non_empty_tables)
+{
+    auto table_id = toString(getStorageID().uuid);
+
+    std::cout << "Restoring into " << zk_root_path << " table id " << table_id << std::endl;
+
+    fs::path data_path_in_backup_fs = data_path_in_backup;
+
+    String data_file = data_path_in_backup_fs /  backup_data_filename;
+
+    if (!backup->fileExists(data_file))
+    {
+        String data_location_file = data_path_in_backup_fs / "data_location.bin";
+        if (!backup->fileExists(data_location_file))
+            throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Files {} or {} in backup are required to restore table", data_file, data_location_file);
+
+        auto in = backup->readFile(data_location_file);
+        readStringUntilEOF(data_file, *in);
+
+        data_file = fs::path(data_file) / backup_data_filename;
+
+        if (!backup->fileExists(data_file))
+            throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "File {} in backup is required to restore table", data_file);
+    }
+
+    /// should we store locally in temp file?
+    auto in = backup->readFile(data_file);
+    CompressedReadBuffer compressed_in{*in};
+    fs::path data_path_fs(zk_data_path);
+
+    auto max_multi_size = with_retries->getKeeperSettings().batch_size_for_keeper_multi;
+
+    Coordination::Requests create_requests;
+    const auto flush_create_requests = [&]
+    {
+        auto holder = with_retries->createRetriesControlHolder("addKeeperMapData");
+        holder.retries_ctl.retryLoop(
+        [&, &zk = holder.faulty_zookeeper]()
+        {
+            with_retries->renewZooKeeper(zk);
+            zk->multi(create_requests);
+        });
+    };
+
+    while (!in->eof())
+    {
+        std::string key;
+        std::string value;
+        readStringBinary(key, compressed_in);
+        readStringBinary(value, compressed_in);
+
+        /// if a table can be non empty we can have conflicting keys so we need to do single create for each row
+        if (allow_non_empty_tables)
+        {
+            auto holder = with_retries->createRetriesControlHolder("addKeeperMapData");
+            holder.retries_ctl.retryLoop(
+            [&, &zk = holder.faulty_zookeeper]()
+            {
+                with_retries->renewZooKeeper(zk);
+                zk->tryCreate(data_path_fs / key, value, zkutil::CreateMode::Persistent);
+            });
+        }
+        /// otherwise we can do multi requests
+        else
+        {
+            create_requests.push_back(zkutil::makeCreateRequest(data_path_fs / key, value, zkutil::CreateMode::Persistent));
+
+            if (create_requests.size() == max_multi_size)
+            {
+                flush_create_requests();
+                create_requests.clear();
+            }
+        }
+    }
+
+    if (!create_requests.empty())
+        flush_create_requests();
+}
+
 zkutil::ZooKeeperPtr StorageKeeperMap::getClient() const
 {
     std::lock_guard lock{zookeeper_mutex};
@@ -634,7 +928,7 @@ zkutil::ZooKeeperPtr StorageKeeperMap::getClient() const
         else
             zookeeper_client = getContext()->getAuxiliaryZooKeeper(zookeeper_name);
 
-        zookeeper_client->sync(root_path);
+        zookeeper_client->sync(zk_root_path);
     }
 
     return zookeeper_client;
@@ -642,12 +936,12 @@ zkutil::ZooKeeperPtr StorageKeeperMap::getClient() const
 
 const std::string & StorageKeeperMap::dataPath() const
 {
-    return data_path;
+    return zk_data_path;
 }
 
 std::string StorageKeeperMap::fullPathForKey(const std::string_view key) const
 {
-    return fs::path(data_path) / key;
+    return fs::path(zk_data_path) / key;
 }
 
 UInt64 StorageKeeperMap::keysLimit() const
@@ -668,7 +962,7 @@ std::optional<bool> StorageKeeperMap::isTableValid() const
             auto client = getClient();
 
             Coordination::Stat metadata_stat;
-            auto stored_metadata_string = client->get(metadata_path, &metadata_stat);
+            auto stored_metadata_string = client->get(zk_metadata_path, &metadata_stat);
 
             if (metadata_stat.numChildren == 0)
             {
@@ -681,7 +975,7 @@ std::optional<bool> StorageKeeperMap::isTableValid() const
                 LOG_ERROR(
                     log,
                     "Table definition does not match to the one stored in the path {}. Stored definition: {}",
-                    root_path,
+                    zk_root_path,
                     stored_metadata_string);
                 table_is_valid = false;
                 return;
@@ -689,9 +983,9 @@ std::optional<bool> StorageKeeperMap::isTableValid() const
 
             // validate all metadata and data nodes are present
             Coordination::Requests requests;
-            requests.push_back(zkutil::makeCheckRequest(table_path, -1));
-            requests.push_back(zkutil::makeCheckRequest(data_path, -1));
-            requests.push_back(zkutil::makeCheckRequest(dropped_path, -1));
+            requests.push_back(zkutil::makeCheckRequest(zk_table_path, -1));
+            requests.push_back(zkutil::makeCheckRequest(zk_data_path, -1));
+            requests.push_back(zkutil::makeCheckRequest(zk_dropped_path, -1));
 
             Coordination::Responses responses;
             client->tryMulti(requests, responses);
@@ -699,19 +993,19 @@ std::optional<bool> StorageKeeperMap::isTableValid() const
             table_is_valid = false;
             if (responses[0]->error != Coordination::Error::ZOK)
             {
-                LOG_ERROR(log, "Table node ({}) is missing", table_path);
+                LOG_ERROR(log, "Table node ({}) is missing", zk_table_path);
                 return;
             }
 
             if (responses[1]->error != Coordination::Error::ZOK)
             {
-                LOG_ERROR(log, "Data node ({}) is missing", data_path);
+                LOG_ERROR(log, "Data node ({}) is missing", zk_data_path);
                 return;
             }
 
             if (responses[2]->error == Coordination::Error::ZOK)
             {
-                LOG_ERROR(log, "Tables with root node {} are being dropped", root_path);
+                LOG_ERROR(log, "Tables with root node {} are being dropped", zk_root_path);
                 return;
             }
 
@@ -962,11 +1256,11 @@ StoragePtr create(const StorageFactory::Arguments & args)
         throw Exception(
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
             "Storage KeeperMap requires 1-3 arguments:\n"
-            "root_path: path in the Keeper where the values will be stored (required)\n"
+            "zk_root_path: path in the Keeper where the values will be stored (required)\n"
             "keys_limit: number of keys allowed to be stored, 0 is no limit (default: 0)");
 
-    const auto root_path_node = evaluateConstantExpressionAsLiteral(engine_args[0], args.getLocalContext());
-    auto root_path = checkAndGetLiteralArgument<std::string>(root_path_node, "root_path");
+    const auto zk_root_path_node = evaluateConstantExpressionAsLiteral(engine_args[0], args.getLocalContext());
+    auto zk_root_path = checkAndGetLiteralArgument<std::string>(zk_root_path_node, "zk_root_path");
 
     UInt64 keys_limit = 0;
     if (engine_args.size() > 1)
@@ -985,7 +1279,7 @@ StoragePtr create(const StorageFactory::Arguments & args)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageKeeperMap requires one column in primary key");
 
     return std::make_shared<StorageKeeperMap>(
-        args.getContext(), args.table_id, metadata, args.query.attach, primary_key_names[0], root_path, keys_limit);
+        args.getContext(), args.table_id, metadata, args.query.attach, primary_key_names[0], zk_root_path, keys_limit);
 }
 
 }
diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h
index ad7b719e972..94b02ca0242 100644
--- a/src/Storages/StorageKeeperMap.h
+++ b/src/Storages/StorageKeeperMap.h
@@ -10,6 +10,9 @@
 #include <Common/logger_useful.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 
+#include <Backups/IBackup.h>
+#include <Backups/WithRetries.h>
+
 #include <span>
 
 namespace DB
@@ -72,6 +75,9 @@ public:
     }
     bool supportsDelete() const override { return true; }
 
+    void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;
+    void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;
+
     zkutil::ZooKeeperPtr getClient() const;
     const std::string & dataPath() const;
     std::string fullPathForKey(std::string_view key) const;
@@ -114,18 +120,20 @@ private:
 
     std::optional<bool> isTableValid() const;
 
-    std::string root_path;
+    void restoreDataImpl(const BackupPtr & backup, const String & data_path_in_backup, std::shared_ptr<WithRetries> with_retries, bool allow_non_empty_tables);
+
+    std::string zk_root_path;
     std::string primary_key;
 
-    std::string data_path;
+    std::string zk_data_path;
 
-    std::string metadata_path;
+    std::string zk_metadata_path;
 
-    std::string tables_path;
-    std::string table_path;
+    std::string zk_tables_path;
+    std::string zk_table_path;
 
-    std::string dropped_path;
-    std::string dropped_lock_path;
+    std::string zk_dropped_path;
+    std::string zk_dropped_lock_path;
 
     std::string zookeeper_name;
 

From 399d61b222b2a9b9a082569a2dc29c64c8a14c87 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Wed, 8 Nov 2023 11:34:49 +0000
Subject: [PATCH 0246/1097] Fix transfer query to MySQL compatible query

---
 .../transformQueryForExternalDatabase.cpp     | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp
index 84a696a1e9c..7999584ce0e 100644
--- a/src/Storages/transformQueryForExternalDatabase.cpp
+++ b/src/Storages/transformQueryForExternalDatabase.cpp
@@ -75,6 +75,33 @@ public:
     }
 };
 
+struct ReplaceLiteralToExprVisitorData
+{
+    using TypeToVisit = ASTFunction;
+
+    void visit(ASTFunction & func, ASTPtr &) const
+    {
+        if (func.name == "and" || func.name == "or")
+        {
+            for (auto & argument : func.arguments->children)
+            {
+                auto * literal_expr = typeid_cast<ASTLiteral *>(argument.get());
+                UInt64 value;
+                if (literal_expr && literal_expr->value.tryGet<UInt64>(value) && (value == 0 || value == 1))
+                {
+                    /// 1 -> 1=1, 0 -> 1=0.
+                    if (value)
+                        argument = makeASTFunction("equals", std::make_shared<ASTLiteral>(1), std::make_shared<ASTLiteral>(1));
+                    else
+                        argument = makeASTFunction("equals", std::make_shared<ASTLiteral>(1), std::make_shared<ASTLiteral>(0));
+                }
+            }
+        }
+    }
+};
+
+using ReplaceLiteralToExprVisitor = InDepthNodeVisitor<OneTypeMatcher<ReplaceLiteralToExprVisitorData>, true>;
+
 class DropAliasesMatcher
 {
 public:
@@ -288,6 +315,10 @@ String transformQueryForExternalDatabaseImpl(
     {
         replaceConstantExpressions(original_where, context, available_columns);
 
+        /// Replace like WHERE 1 AND 1 to WHRE 1 = 1 AND 1 = 1
+        ReplaceLiteralToExprVisitor::Data replace_literal_to_expr_data;
+        ReplaceLiteralToExprVisitor(replace_literal_to_expr_data).visit(original_where);
+
         if (isCompatible(original_where))
         {
             select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(original_where));

From 18a5eeec38663bca5a894bb01f0cf84ad39e0b64 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 8 Nov 2023 13:14:09 +0000
Subject: [PATCH 0247/1097] Make on cluster backup/restore work

---
 .../BackupCoordinationKeeperMapTables.h       |  4 +-
 src/Backups/BackupCoordinationLocal.h         |  4 +-
 src/Backups/BackupCoordinationRemote.cpp      | 65 ++++++++++++++++++-
 src/Backups/BackupCoordinationRemote.h        |  5 +-
 src/Backups/RestoreCoordinationRemote.cpp     | 39 ++++++++++-
 src/Storages/StorageKeeperMap.cpp             |  8 +--
 6 files changed, 107 insertions(+), 18 deletions(-)

diff --git a/src/Backups/BackupCoordinationKeeperMapTables.h b/src/Backups/BackupCoordinationKeeperMapTables.h
index 28894bb9c6e..a642903cfae 100644
--- a/src/Backups/BackupCoordinationKeeperMapTables.h
+++ b/src/Backups/BackupCoordinationKeeperMapTables.h
@@ -10,13 +10,13 @@ struct BackupCoordinationKeeperMapTables
 {
     void addTable(const std::string & table_zookeeper_root_path, const std::string & table_id, const std::string & data_path_in_backup);
     std::string getDataPath(const std::string & table_zookeeper_root_path) const;
-private:
+
     struct KeeperMapTableInfo
     {
         std::string table_id;
         std::string data_path_in_backup;
     };
-
+private:
     std::unordered_map<std::string /* root zookeeper path */, KeeperMapTableInfo> tables_with_info;
 };
 
diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h
index 1fecf30c51c..6f8e750697c 100644
--- a/src/Backups/BackupCoordinationLocal.h
+++ b/src/Backups/BackupCoordinationLocal.h
@@ -5,8 +5,8 @@
 #include <Backups/BackupCoordinationReplicatedAccess.h>
 #include <Backups/BackupCoordinationReplicatedSQLObjects.h>
 #include <Backups/BackupCoordinationReplicatedTables.h>
+#include <Backups/BackupCoordinationKeeperMapTables.h>
 #include <base/defines.h>
-#include "Backups/BackupCoordinationKeeperMapTables.h"
 #include <cstddef>
 #include <mutex>
 #include <unordered_set>
@@ -65,7 +65,7 @@ private:
     BackupCoordinationFileInfos TSA_GUARDED_BY(file_infos_mutex) file_infos;
     BackupCoordinationKeeperMapTables keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
     std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
-    
+
     struct KeeperMapTableInfo
     {
         String table_id;
diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 72fc2509089..309cbc8be6a 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -230,6 +230,7 @@ void BackupCoordinationRemote::createRootNodes()
         ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_data_paths", "", zkutil::CreateMode::Persistent));
         ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access", "", zkutil::CreateMode::Persistent));
         ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects", "", zkutil::CreateMode::Persistent));
+        ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/keeper_map_tables", "", zkutil::CreateMode::Persistent));
         ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/file_infos", "", zkutil::CreateMode::Persistent));
         ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/writing_files", "", zkutil::CreateMode::Persistent));
         zk->tryMulti(ops, responses);
@@ -668,14 +669,72 @@ void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
 
 void BackupCoordinationRemote::addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup)
 {
-    std::lock_guard lock(keeper_map_tables_mutex);
-    keeper_map_tables.addTable(table_zookeeper_root_path, table_id, data_path_in_backup);
+    {
+        std::lock_guard lock{keeper_map_tables_mutex};
+        if (keeper_map_tables)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "addKeeperMapTable() must not be called after preparing");
+    }
+
+    auto holder = with_retries.createRetriesControlHolder("addKeeperMapTable");
+    holder.retries_ctl.retryLoop(
+    [&, &zk = holder.faulty_zookeeper]()
+    {
+        with_retries.renewZooKeeper(zk);
+        String path = zookeeper_path + "/keeper_map_tables/" + escapeForFileName(table_id);
+        zk->create(path, fmt::format("{}\n{}", table_zookeeper_root_path, data_path_in_backup), zkutil::CreateMode::Persistent);
+    });
+}
+
+void BackupCoordinationRemote::prepareKeeperMapTables() const
+{
+    if (keeper_map_tables)
+        return;
+
+    std::vector<std::pair<std::string, BackupCoordinationKeeperMapTables::KeeperMapTableInfo>> keeper_map_table_infos;
+    auto holder = with_retries.createRetriesControlHolder("prepareKeeperMapTables");
+    holder.retries_ctl.retryLoop(
+        [&, &zk = holder.faulty_zookeeper]()
+    {
+        keeper_map_table_infos.clear();
+
+        with_retries.renewZooKeeper(zk);
+
+        fs::path tables_path = fs::path(zookeeper_path) / "keeper_map_tables";
+
+        auto tables = zk->getChildren(tables_path);
+        keeper_map_table_infos.reserve(tables.size());
+
+        for (auto & table : tables)
+            table = tables_path / table;
+
+        auto tables_info = zk->get(tables);
+        for (size_t i = 0; i < tables_info.size(); ++i)
+        {
+            const auto & table_info = tables_info[i];
+
+            if (table_info.error != Coordination::Error::ZOK)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Path in Keeper {} is unexpectedly missing", tables[i]);
+
+            std::vector<std::string> data;
+            boost::split(data, table_info.data, [](char c) { return c == '\n'; });
+            keeper_map_table_infos.emplace_back(
+                std::move(data[0]),
+                BackupCoordinationKeeperMapTables::KeeperMapTableInfo{
+                    .table_id = fs::path(tables[i]).filename(), .data_path_in_backup = std::move(data[1])});
+        }
+    });
+
+    keeper_map_tables.emplace();
+    for (const auto & [zk_root_path, table_info] : keeper_map_table_infos)
+        keeper_map_tables->addTable(zk_root_path, table_info.table_id, table_info.data_path_in_backup);
+
 }
 
 String BackupCoordinationRemote::getKeeperMapDataPath(const String & table_zookeeper_root_path) const
 {
     std::lock_guard lock(keeper_map_tables_mutex);
-    return keeper_map_tables.getDataPath(table_zookeeper_root_path);
+    prepareKeeperMapTables();
+    return keeper_map_tables->getDataPath(table_zookeeper_root_path);
 }
 
 
diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h
index 28c24c574a6..a0a9224bf71 100644
--- a/src/Backups/BackupCoordinationRemote.h
+++ b/src/Backups/BackupCoordinationRemote.h
@@ -5,9 +5,9 @@
 #include <Backups/BackupCoordinationReplicatedAccess.h>
 #include <Backups/BackupCoordinationReplicatedSQLObjects.h>
 #include <Backups/BackupCoordinationReplicatedTables.h>
+#include <Backups/BackupCoordinationKeeperMapTables.h>
 #include <Backups/BackupCoordinationStageSync.h>
 #include <Backups/WithRetries.h>
-#include "Backups/BackupCoordinationKeeperMapTables.h"
 
 
 namespace DB
@@ -89,6 +89,7 @@ private:
     void prepareReplicatedTables() const TSA_REQUIRES(replicated_tables_mutex);
     void prepareReplicatedAccess() const TSA_REQUIRES(replicated_access_mutex);
     void prepareReplicatedSQLObjects() const TSA_REQUIRES(replicated_sql_objects_mutex);
+    void prepareKeeperMapTables() const TSA_REQUIRES(keeper_map_tables_mutex);
     void prepareFileInfos() const TSA_REQUIRES(file_infos_mutex);
 
     const String root_zookeeper_path;
@@ -110,6 +111,7 @@ private:
     mutable std::optional<BackupCoordinationReplicatedAccess> TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
     mutable std::optional<BackupCoordinationReplicatedSQLObjects> TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;
     mutable std::optional<BackupCoordinationFileInfos> TSA_GUARDED_BY(file_infos_mutex) file_infos;
+    mutable std::optional<BackupCoordinationKeeperMapTables> keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
     std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
 
     struct KeeperMapTableInfo
@@ -118,7 +120,6 @@ private:
         String data_path_in_backup;
     };
 
-    mutable BackupCoordinationKeeperMapTables keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
 
     mutable std::mutex zookeeper_mutex;
     mutable std::mutex replicated_tables_mutex;
diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp
index 7e059b8d9cc..12a67d2a55d 100644
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@@ -89,6 +89,7 @@ void RestoreCoordinationRemote::createRootNodes()
             ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_tables_data_acquired", "", zkutil::CreateMode::Persistent));
             ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access_storages_acquired", "", zkutil::CreateMode::Persistent));
             ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects_acquired", "", zkutil::CreateMode::Persistent));
+            ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/keeper_map_tables", "", zkutil::CreateMode::Persistent));
             ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/table_uuids", "", zkutil::CreateMode::Persistent));
             zk->tryMulti(ops, responses);
         });
@@ -234,9 +235,43 @@ bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loade
     return result;
 }
 
-bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & /*root_zk_path*/)
+bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & root_zk_path)
 {
-    return true;
+    bool result = false;
+    auto holder = with_retries.createRetriesControlHolder("acquireInsertingDataForKeeperMap");
+    holder.retries_ctl.retryLoop(
+        [&, &zk = holder.faulty_zookeeper]()
+        {
+            with_retries.renewZooKeeper(zk);
+
+            fs::path base_path = fs::path(zookeeper_path) / "keeper_map_tables" / root_zk_path;
+            zk->createAncestors(base_path);
+            std::string restore_lock_path = base_path / "restore_lock";
+            result = zk->tryCreate(restore_lock_path, "restorelock", zkutil::CreateMode::Persistent) == Coordination::Error::ZOK;
+
+            if (result)
+                return;
+
+            /// there can be an edge case where a path contains `/restore_lock/ in the middle of it
+            /// to differentiate that case from lock we also set the data
+            for (size_t i = 0; i < 1000; ++i)
+            {
+                Coordination::Stat lock_stat;
+                auto data = zk->get(restore_lock_path, &lock_stat);
+                if (data == "restorelock")
+                    return;
+
+                if (auto set_result = zk->trySet(restore_lock_path, "restorelock", lock_stat.version);
+                    set_result == Coordination::Error::ZOK)
+                {
+                    result = true;
+                    return;
+                }
+                else if (set_result == Coordination::Error::ZNONODE)
+                    throw zkutil::KeeperException::fromPath(set_result, restore_lock_path);
+            }
+        });
+    return result;
 }
 
 void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query)
diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp
index 33a97af53f1..c583a693035 100644
--- a/src/Storages/StorageKeeperMap.cpp
+++ b/src/Storages/StorageKeeperMap.cpp
@@ -753,7 +753,6 @@ void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collec
 {
     auto table_id = toString(getStorageID().uuid);
 
-    std::cout << "Backing up for path " << zk_root_path << " table id " << table_id << std::endl;
     auto coordination = backup_entries_collector.getBackupCoordination();
     coordination->addKeeperMapTable(zk_root_path, table_id, data_path_in_backup);
 
@@ -764,8 +763,6 @@ void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collec
         auto path_with_data = coordination->getKeeperMapDataPath(zk_root_path);
         if (path_with_data == my_data_path_in_backup)
         {
-            std::cout << "Will be backing up data for path " << zk_root_path << " table id " << my_table_id << std::endl;
-
             auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0);
             auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size;
 
@@ -783,7 +780,6 @@ void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collec
             return;
         }
 
-        std::cout << "Not backing up data for path " << zk_root_path << " table id " << my_table_id << " writing only path with data " << path_with_data << std::endl;
         auto file_path = fs::path(my_data_path_in_backup) / backup_data_location_filename;
         backup_entries_collector.addBackupEntries({{file_path, std::make_shared<BackupEntryFromMemory>(path_with_data)}});
     };
@@ -842,8 +838,6 @@ void StorageKeeperMap::restoreDataImpl(const BackupPtr & backup, const String &
 {
     auto table_id = toString(getStorageID().uuid);
 
-    std::cout << "Restoring into " << zk_root_path << " table id " << table_id << std::endl;
-
     fs::path data_path_in_backup_fs = data_path_in_backup;
 
     String data_file = data_path_in_backup_fs /  backup_data_filename;
@@ -882,7 +876,7 @@ void StorageKeeperMap::restoreDataImpl(const BackupPtr & backup, const String &
         });
     };
 
-    while (!in->eof())
+    while (!compressed_in.eof())
     {
         std::string key;
         std::string value;

From 4438c2f70aeadd6ec6ab221f2980210351b7d1c5 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 8 Nov 2023 14:36:39 +0000
Subject: [PATCH 0248/1097] Remove unnecassary log

---
 src/Storages/StorageKeeperMap.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp
index c583a693035..74c1905cd61 100644
--- a/src/Storages/StorageKeeperMap.cpp
+++ b/src/Storages/StorageKeeperMap.cpp
@@ -692,14 +692,13 @@ private:
                 data_children = zk->getChildren(data_zookeeper_path);
             });
         }
-        LOG_INFO(&Poco::Logger::get("BACKUPER"), "Got {} children", data_children.size());
 
         const auto write_rows = [&](std::span<std::string> keys)
         {
             std::vector<std::string> keys_full_path;
-            keys_full_path.reserve(data_children.size());
+            keys_full_path.reserve(keys.size());
 
-            for (const auto & key : data_children)
+            for (const auto & key : keys)
                 keys_full_path.push_back(data_zookeeper_path / key);
 
             zkutil::ZooKeeper::MultiGetResponse data;

From d0ba561faacdd97f22f8486b92cc2b29e7236677 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Wed, 8 Nov 2023 23:20:12 +0800
Subject: [PATCH 0249/1097] Update
 src/Storages/transformQueryForExternalDatabase.cpp

Co-authored-by: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
---
 src/Storages/transformQueryForExternalDatabase.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp
index 7999584ce0e..fd3c47e0e3f 100644
--- a/src/Storages/transformQueryForExternalDatabase.cpp
+++ b/src/Storages/transformQueryForExternalDatabase.cpp
@@ -315,7 +315,7 @@ String transformQueryForExternalDatabaseImpl(
     {
         replaceConstantExpressions(original_where, context, available_columns);
 
-        /// Replace like WHERE 1 AND 1 to WHRE 1 = 1 AND 1 = 1
+        /// Replace like WHERE 1 AND 1 to WHERE 1 = 1 AND 1 = 1
         ReplaceLiteralToExprVisitor::Data replace_literal_to_expr_data;
         ReplaceLiteralToExprVisitor(replace_literal_to_expr_data).visit(original_where);
 

From 6e189b17c01cc45c3c8da1079cf8749daff0a2e0 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 8 Nov 2023 18:55:19 +0300
Subject: [PATCH 0250/1097] merge_row_policy: processAliases introduced

---
 src/Storages/StorageMerge.cpp             | 279 ++++++++++++++++------
 src/Storages/StorageMerge.h               |  16 +-
 src/TableFunctions/TableFunctionMerge.cpp |   2 +-
 3 files changed, 213 insertions(+), 84 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 8002a5f4b8f..d9460190677 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -385,7 +385,7 @@ public:
     /// Add to data stream columns that are needed only for row policies
     ///  SELECT x from T  if  T has row policy  y=42
     ///  required y in data pipeline
-    void extendNames(Names &);
+    void extendNames(Names &, bool alias_allowed = true);
 
     /// Use storage facilities to filter data
     ///  optimization
@@ -401,6 +401,7 @@ private:
     std::string filter_column_name; // complex filter, may contain logic operations
     ActionsDAGPtr actions_dag;
     ExpressionActionsPtr filter_actions;
+    StorageMetadataPtr storage_metadata_snapshot;
 };
 
 // using RowPolicyDataPtr = std::unique_ptr<ReadFromMerge::RowPolicyData>;
@@ -472,7 +473,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
         query_info.input_order_info = input_sorting_info;
     }
 
-    auto sample_block = merge_storage_snapshot->getMetadataForQuery()->getSampleBlock();
+    // auto sample_block = merge_storage_snapshot->getMetadataForQuery()->getSampleBlock();
 
     std::vector<std::unique_ptr<QueryPipelineBuilder>> pipelines;
     QueryPlanResourceHolder resources;
@@ -494,98 +495,100 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
         if (sampling_requested && !storage->supportsSampling())
             throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, "Illegal SAMPLE: table doesn't support sampling");
 
-        const auto & [database_name, _, _b, table_name] = table;
+        // const auto & [database_name, _, _b, table_name] = table;
 
-        std::unique_ptr<RowPolicyData> row_policy_data_ptr;
+        // std::unique_ptr<RowPolicyData> row_policy_data_ptr;
 
-        auto row_policy_filter_ptr = context->getRowPolicyFilter(
-          database_name,
-          table_name,
-          RowPolicyFilterType::SELECT_FILTER);
-        if (row_policy_filter_ptr)
-        {
-            row_policy_data_ptr = std::make_unique<RowPolicyData>(row_policy_filter_ptr, storage, context);
-            row_policy_data_ptr->extendNames(column_names);
-        }
+        // auto row_policy_filter_ptr = context->getRowPolicyFilter(
+        //   database_name,
+        //   table_name,
+        //   RowPolicyFilterType::SELECT_FILTER);
+        // if (row_policy_filter_ptr)
+        // {
+        //     row_policy_data_ptr = std::make_unique<RowPolicyData>(row_policy_filter_ptr, storage, context);
+        //     row_policy_data_ptr->extendNames(column_names);
+        // }
 
 
-        Aliases aliases;
-        auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
-        auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, context);
+        // Aliases aliases;
+        // auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
+        // auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, context);
 
-        auto modified_query_info = getModifiedQueryInfo(query_info, context, table, nested_storage_snaphsot);
-        Names column_names_as_aliases;
+        // auto modified_query_info = getModifiedQueryInfo(query_info, context, table, nested_storage_snaphsot);
+        // Names column_names_as_aliases;
 
-        if (!context->getSettingsRef().allow_experimental_analyzer)
-        {
-            auto storage_columns = storage_metadata_snapshot->getColumns();
-            auto syntax_result = TreeRewriter(context).analyzeSelect(
-                modified_query_info.query, TreeRewriterResult({}, storage, nested_storage_snaphsot));
+        // if (!context->getSettingsRef().allow_experimental_analyzer)
+        // {
+        //     auto storage_columns = storage_metadata_snapshot->getColumns();
+        //     auto syntax_result = TreeRewriter(context).analyzeSelect(
+        //         modified_query_info.query, TreeRewriterResult({}, storage, nested_storage_snaphsot));
 
-            bool with_aliases = common_processed_stage == QueryProcessingStage::FetchColumns && !storage_columns.getAliases().empty();
-            if (with_aliases)
-            {
-                ASTPtr required_columns_expr_list = std::make_shared<ASTExpressionList>();
-                ASTPtr column_expr;
+        //     bool with_aliases = common_processed_stage == QueryProcessingStage::FetchColumns && !storage_columns.getAliases().empty();
+        //     if (with_aliases)
+        //     {
+        //         ASTPtr required_columns_expr_list = std::make_shared<ASTExpressionList>();
+        //         ASTPtr column_expr;
 
-                for (const auto & column : column_names)
-                {
-                    const auto column_default = storage_columns.getDefault(column);
-                    bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
+        //         for (const auto & column : column_names)
+        //         {
+        //             const auto column_default = storage_columns.getDefault(column);
+        //             bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
 
-                    if (is_alias)
-                    {
-                        column_expr = column_default->expression->clone();
-                        replaceAliasColumnsInQuery(column_expr, storage_metadata_snapshot->getColumns(),
-                                                syntax_result->array_join_result_to_source, context);
+        //             if (is_alias)
+        //             {
+        //                 column_expr = column_default->expression->clone();
+        //                 replaceAliasColumnsInQuery(column_expr, storage_metadata_snapshot->getColumns(),
+        //                                         syntax_result->array_join_result_to_source, context);
 
-                        const auto & column_description = storage_columns.get(column);
-                        column_expr = addTypeConversionToAST(std::move(column_expr), column_description.type->getName(),
-                                                            storage_metadata_snapshot->getColumns().getAll(), context);
-                        column_expr = setAlias(column_expr, column);
+        //                 const auto & column_description = storage_columns.get(column);
+        //                 column_expr = addTypeConversionToAST(std::move(column_expr), column_description.type->getName(),
+        //                                                     storage_metadata_snapshot->getColumns().getAll(), context);
+        //                 column_expr = setAlias(column_expr, column);
 
-                        auto type = sample_block.getByName(column).type;
-                        aliases.push_back({ .name = column, .type = type, .expression = column_expr->clone() });
+        //                 auto type = sample_block.getByName(column).type;
+        //                 aliases.push_back({ .name = column, .type = type, .expression = column_expr->clone() });
 
-                        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
-                            "adding new alias name {}, expression {}",
-                            column, column_expr->formatForLogging());
+        //                 LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
+        //                     "adding new alias name {}, expression {}",
+        //                     column, column_expr->formatForLogging());
 
-                    }
-                    else
-                        column_expr = std::make_shared<ASTIdentifier>(column);
+        //             }
+        //             else
+        //                 column_expr = std::make_shared<ASTIdentifier>(column);
 
-                    required_columns_expr_list->children.emplace_back(std::move(column_expr));
-                }
+        //             required_columns_expr_list->children.emplace_back(std::move(column_expr));
+        //         }
 
-                syntax_result = TreeRewriter(context).analyze(
-                    required_columns_expr_list, storage_columns.getAllPhysical(), storage, storage->getStorageSnapshot(storage_metadata_snapshot, context));
+        //         syntax_result = TreeRewriter(context).analyze(
+        //             required_columns_expr_list, storage_columns.getAllPhysical(), storage, storage->getStorageSnapshot(storage_metadata_snapshot, context));
 
-                auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActionsDAG(true);
+        //         auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActionsDAG(true);
 
-                column_names_as_aliases = alias_actions->getRequiredColumns().getNames();
-                LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
-                    "alias_actions->getRequiredColumns: {}", alias_actions->getRequiredColumns().toString());
+        //         column_names_as_aliases = alias_actions->getRequiredColumns().getNames();
+        //         LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
+        //             "alias_actions->getRequiredColumns: {}", alias_actions->getRequiredColumns().toString());
 
-                // if (row_policy_data_ptr)
-                //     row_policy_data_ptr->extendNames(column_names_as_aliases);
+        //         // if (row_policy_data_ptr)
+        //         //     row_policy_data_ptr->extendNames(column_names_as_aliases, false /* alias_allowed */);
 
-                if (column_names_as_aliases.empty())
-                    column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
-            }
-        }
+        //         if (column_names_as_aliases.empty())
+        //             column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
+        //     }
+        // }
 
         auto source_pipeline = createSources(
-            nested_storage_snaphsot,
-            modified_query_info,
+            // nested_storage_snaphsot,
+            // modified_query_info,
             common_processed_stage,
             required_max_block_size,
             common_header,
-            aliases,
+            // aliases,
             table,
-            column_names_as_aliases.empty() ? column_names : column_names_as_aliases,
-            std::move(row_policy_data_ptr),
+            // column_names_as_aliases.empty() ? column_names : column_names_as_aliases,
+            column_names,
+            merge_storage_snapshot->getMetadataForQuery()->getSampleBlock(),
+            // std::move(row_policy_data_ptr),
             context,
             current_streams);
 
@@ -678,23 +681,128 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & quer
     return modified_query_info;
 }
 
+void ReadFromMerge::processAliases(
+    Names & real_column_names,
+    const StorageWithLockAndName & storage_with_lock,
+    Aliases & aliases,
+    const Block & sample_block,
+    ContextMutablePtr modified_context)
+{
+    auto storage = std::get<1>(storage_with_lock);
+    auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
+    auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, modified_context);
+
+    auto modified_query_info = getModifiedQueryInfo(query_info, context, storage_with_lock, nested_storage_snaphsot);
+    Names column_names_as_aliases;
+
+    if (!context->getSettingsRef().allow_experimental_analyzer)
+    {
+        auto storage_columns = storage_metadata_snapshot->getColumns();
+        auto syntax_result = TreeRewriter(context).analyzeSelect(
+            modified_query_info.query, TreeRewriterResult({}, storage, nested_storage_snaphsot));
+
+        bool with_aliases = common_processed_stage == QueryProcessingStage::FetchColumns && !storage_columns.getAliases().empty();
+        if (with_aliases)
+        {
+            ASTPtr required_columns_expr_list = std::make_shared<ASTExpressionList>();
+            ASTPtr column_expr;
+
+            for (const auto & column : real_column_names)
+            {
+                const auto column_default = storage_columns.getDefault(column);
+                bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
+
+                if (is_alias)
+                {
+                    column_expr = column_default->expression->clone();
+                    replaceAliasColumnsInQuery(column_expr, storage_metadata_snapshot->getColumns(),
+                        syntax_result->array_join_result_to_source, context);
+
+                    const auto & column_description = storage_columns.get(column);
+                    column_expr = addTypeConversionToAST(std::move(column_expr), column_description.type->getName(),
+                        storage_metadata_snapshot->getColumns().getAll(), context);
+                    column_expr = setAlias(column_expr, column);
+
+                    auto type = sample_block.getByName(column).type;
+                    aliases.push_back({ .name = column, .type = type, .expression = column_expr->clone() });
+
+                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
+                        "adding new alias name {}, expression {}",
+                        column, column_expr->formatForLogging());
+
+                }
+                else
+                    column_expr = std::make_shared<ASTIdentifier>(column);
+
+                required_columns_expr_list->children.emplace_back(std::move(column_expr));
+            }
+
+            syntax_result = TreeRewriter(context).analyze(
+                required_columns_expr_list, storage_columns.getAllPhysical(), storage, storage->getStorageSnapshot(storage_metadata_snapshot, context));
+
+            auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActionsDAG(true);
+
+            column_names_as_aliases = alias_actions->getRequiredColumns().getNames();
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::processAliases"),
+                "alias_actions->getRequiredColumns: {}", alias_actions->getRequiredColumns().toString());
+
+            // if (row_policy_data_ptr)
+            //     row_policy_data_ptr->extendNames(column_names_as_aliases, false /* alias_allowed */);
+
+            if (column_names_as_aliases.empty())
+                column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
+        }
+    }
+    if (!column_names_as_aliases.empty())
+    {
+        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::processAliases"),
+            "substitute real_column_names by column_names_as_aliases");
+        real_column_names = column_names_as_aliases;
+    }
+}
+
+
 QueryPipelineBuilderPtr ReadFromMerge::createSources(
-    const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & modified_query_info,
+    // const StorageSnapshotPtr & storage_snapshot,
+    // SelectQueryInfo & modified_query_info,
     QueryProcessingStage::Enum processed_stage,
     UInt64 max_block_size,
     const Block & header,
-    const Aliases & aliases,
     const StorageWithLockAndName & storage_with_lock,
     Names real_column_names,
-    std::unique_ptr<RowPolicyData> row_policy_data_ptr,
+    const Block & sample_block,
+    // std::unique_ptr<RowPolicyData> row_policy_data_ptr,
     ContextMutablePtr modified_context,
     size_t streams_num,
     bool concat_streams)
 {
     const auto & [database_name, storage, _, table_name] = storage_with_lock;
+    auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
+    auto storage_snapshot = storage->getStorageSnapshot(storage_metadata_snapshot, context);
+    auto modified_query_info = getModifiedQueryInfo(query_info, context, storage_with_lock, storage_snapshot);
+
     auto & modified_select = modified_query_info.query->as<ASTSelectQuery &>();
 
+    std::unique_ptr<RowPolicyData> row_policy_data_ptr;
+
+    auto row_policy_filter_ptr = context->getRowPolicyFilter(
+        database_name,
+        table_name,
+        RowPolicyFilterType::SELECT_FILTER);
+    if (row_policy_filter_ptr)
+    {
+        row_policy_data_ptr = std::make_unique<RowPolicyData>(row_policy_filter_ptr, storage, context);
+        row_policy_data_ptr->extendNames(real_column_names);
+    }
+
+
+    Aliases aliases;
+    processAliases(real_column_names, storage_with_lock, aliases, sample_block, modified_context);
+    // if (row_policy_data_ptr)
+    // {
+    //     row_policy_data_ptr->extendNames(real_column_names);
+    // }
+
     QueryPipelineBuilderPtr builder;
     if (!InterpreterSelectQuery::isQueryWithFinal(modified_query_info) && storage->needRewriteQueryWithFinal(real_column_names))
     {
@@ -883,7 +991,10 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
     ContextPtr local_context)
     : row_policy_filter_ptr(row_policy_filter_ptr_)
 {
-    auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
+    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData ctor"),
+        "storage {}", storage->getName());
+
+    storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
     auto storage_columns = storage_metadata_snapshot->getColumns();
     auto needed_columns = storage_columns.getAll/*Physical*/();
 
@@ -912,7 +1023,7 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
     filter_column_name = added.getNames().front();
 }
 
-void ReadFromMerge::RowPolicyData::extendNames(Names & names)
+void ReadFromMerge::RowPolicyData::extendNames(Names & names, bool alias_allowed)
 {
     std::sort(names.begin(), names.end());
     NameSet added_names;
@@ -921,6 +1032,16 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names)
     {
         if (!std::binary_search(names.begin(), names.end(), req_column))
         {
+            if (!alias_allowed)
+            {
+                auto storage_columns = storage_metadata_snapshot->getColumns();
+                const auto column_default = storage_columns.getDefault(req_column);
+                bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
+                if (is_alias)
+                {
+                    continue;
+                }
+            }
             added_names.insert(req_column);
         }
     }
@@ -929,11 +1050,11 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names)
         std::copy(added_names.begin(), added_names.end(), std::back_inserter(names));
         LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::extendNames"),
             "{} names added", added_names.size());
-        // for (const auto & added_name : added_names)
-        // {
-        //     LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::extendNames"),
-        //         "  added name {}", added_name);
-        // }
+        for (const auto & added_name : added_names)
+        {
+            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::extendNames"),
+                "  added name {}", added_name);
+        }
     }
 }
 
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 80c5790d319..350d871359a 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -184,16 +184,24 @@ private:
         const StorageWithLockAndName & storage_with_lock_and_name,
         const StorageSnapshotPtr & storage_snapshot);
 
+    void processAliases(
+        Names & real_column_names,
+        const StorageWithLockAndName & storage_with_lock,
+        Aliases & aliases,
+        const Block & sample_block,
+        ContextMutablePtr modified_context);
+
     QueryPipelineBuilderPtr createSources(
-        const StorageSnapshotPtr & storage_snapshot,
-        SelectQueryInfo & query_info,
+        // const StorageSnapshotPtr & storage_snapshot,
+        // SelectQueryInfo & query_info,
         QueryProcessingStage::Enum processed_stage,
         UInt64 max_block_size,
         const Block & header,
-        const Aliases & aliases,
+        // const Aliases & aliases,
         const StorageWithLockAndName & storage_with_lock,
         Names real_column_names,
-        std::unique_ptr<RowPolicyData> row_policy_data_ptr,
+        const Block & sample_block,
+        // std::unique_ptr<RowPolicyData> row_policy_data_ptr,
         ContextMutablePtr modified_context,
         size_t streams_num,
         bool concat_streams = false);
diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp
index 5231c651f17..d0457504144 100644
--- a/src/TableFunctions/TableFunctionMerge.cpp
+++ b/src/TableFunctions/TableFunctionMerge.cpp
@@ -126,7 +126,7 @@ ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr contex
         {
             auto storage = DatabaseCatalog::instance().tryGetTable(StorageID{db_with_tables.first, table}, context);
             if (storage)
-              return ColumnsDescription{storage->getInMemoryMetadataPtr()->getColumns().getAllPhysical()};  // !!!
+              return ColumnsDescription{storage->getInMemoryMetadataPtr()->getColumns().getAll/* Physical*/()};  // !!!
         }
     }
 

From 93e22e85d1ce07fde446b68f5fcaa9d75cb2a090 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 8 Nov 2023 17:16:56 +0100
Subject: [PATCH 0251/1097] Better

---
 src/Interpreters/Cache/FileCache.cpp          | 188 ++++++++++++++----
 src/Interpreters/Cache/FileCache.h            |   4 +
 src/Interpreters/Cache/FileSegment.cpp        |   1 +
 .../tests/gtest_lru_file_cache.cpp            |   2 +-
 4 files changed, 154 insertions(+), 41 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 31e9008c69a..1a09db7a3f0 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -156,7 +156,7 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
     {
         auto file_segment = std::make_shared<FileSegment>(
             locked_key.getKey(), range.left, range.size(), FileSegment::State::DETACHED);
-        return { file_segment };
+        return {file_segment};
     }
 
     if (locked_key.empty())
@@ -245,11 +245,34 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
     return result;
 }
 
+std::vector<FileSegment::Range> FileCache::splitRange(size_t offset, size_t size)
+{
+    assert(size > 0);
+    std::vector<FileSegment::Range> ranges;
+
+    size_t current_pos = offset;
+    size_t end_pos_non_included = offset + size;
+    size_t remaining_size = size;
+
+    FileSegments file_segments;
+    while (current_pos < end_pos_non_included)
+    {
+        auto current_file_segment_size = std::min(remaining_size, max_file_segment_size);
+        ranges.emplace_back(current_pos, current_pos + current_file_segment_size - 1);
+
+        remaining_size -= current_file_segment_size;
+        current_pos += current_file_segment_size;
+    }
+
+    return ranges;
+}
+
 FileSegments FileCache::splitRangeIntoFileSegments(
     LockedKey & locked_key,
     size_t offset,
     size_t size,
     FileSegment::State state,
+    size_t file_segments_limit,
     const CreateFileSegmentSettings & settings)
 {
     assert(size > 0);
@@ -261,7 +284,7 @@ FileSegments FileCache::splitRangeIntoFileSegments(
     size_t remaining_size = size;
 
     FileSegments file_segments;
-    while (current_pos < end_pos_non_included)
+    while (current_pos < end_pos_non_included && (!file_segments_limit || file_segments.size() < file_segments_limit))
     {
         current_file_segment_size = std::min(remaining_size, max_file_segment_size);
         remaining_size -= current_file_segment_size;
@@ -273,7 +296,7 @@ FileSegments FileCache::splitRangeIntoFileSegments(
         current_pos += current_file_segment_size;
     }
 
-    assert(file_segments.empty() || offset + size - 1 == file_segments.back()->range().right);
+    assert(file_segments.empty() || file_segments_limit > 0 || offset + size - 1 == file_segments.back()->range().right);
     return file_segments;
 }
 
@@ -298,6 +321,7 @@ void FileCache::fillHolesWithEmptyFileSegments(
     assert(!file_segments.empty());
 
     auto it = file_segments.begin();
+    size_t added = 0;
     auto segment_range = (*it)->range();
 
     size_t current_pos;
@@ -310,11 +334,12 @@ void FileCache::fillHolesWithEmptyFileSegments(
 
         current_pos = segment_range.right + 1;
         ++it;
+        ++added;
     }
     else
         current_pos = range.left;
 
-    while (current_pos <= range.right && it != file_segments.end())
+    while (current_pos <= range.right && it != file_segments.end() && (!file_segments_limit || added < file_segments_limit))
     {
         segment_range = (*it)->range();
 
@@ -322,6 +347,7 @@ void FileCache::fillHolesWithEmptyFileSegments(
         {
             current_pos = segment_range.right + 1;
             ++it;
+            ++added;
             continue;
         }
 
@@ -338,17 +364,38 @@ void FileCache::fillHolesWithEmptyFileSegments(
         }
         else
         {
-            auto split = splitRangeIntoFileSegments(
-                locked_key, current_pos, hole_size, FileSegment::State::EMPTY, settings);
-            file_segments.splice(it, std::move(split));
+            auto ranges = splitRange(current_pos, hole_size);
+            FileSegments hole;
+            for (const auto & r : ranges)
+            {
+                auto metadata_it = addFileSegment(locked_key, r.left, r.size(), FileSegment::State::EMPTY, settings, nullptr);
+                hole.push_back(metadata_it->second->file_segment);
+                ++added;
+
+                if (file_segments_limit && added == file_segments_limit)
+                {
+                    file_segments.splice(it, std::move(hole));
+                    file_segments.erase(it, file_segments.end());
+                    return;
+                }
+            }
+            file_segments.splice(it, std::move(hole));
         }
 
         current_pos = segment_range.right + 1;
         ++it;
+        ++added;
     }
 
-    if (file_segments_limit && file_segments.size() >= file_segments_limit)
+    if (file_segments_limit && added == file_segments_limit)
+    {
+        chassert(file_segments.size() >= file_segments_limit);
+        file_segments.erase(it, file_segments.end());
+        chassert(file_segments.size() == file_segments_limit);
         return;
+    }
+
+    chassert(!file_segments_limit || file_segments.size() < file_segments_limit);
 
     if (current_pos <= range.right)
     {
@@ -368,9 +415,22 @@ void FileCache::fillHolesWithEmptyFileSegments(
         }
         else
         {
-            auto split = splitRangeIntoFileSegments(
-                locked_key, current_pos, hole_size, FileSegment::State::EMPTY, settings);
-            file_segments.splice(file_segments.end(), std::move(split));
+            auto ranges = splitRange(current_pos, hole_size);
+            FileSegments hole;
+            for (const auto & r : ranges)
+            {
+                auto metadata_it = addFileSegment(locked_key, r.left, r.size(), FileSegment::State::EMPTY, settings, nullptr);
+                hole.push_back(metadata_it->second->file_segment);
+                ++added;
+
+                if (file_segments_limit && added == file_segments_limit)
+                {
+                    file_segments.splice(it, std::move(hole));
+                    file_segments.erase(it, file_segments.end());
+                    return;
+                }
+            }
+            file_segments.splice(it, std::move(hole));
         }
     }
 }
@@ -400,7 +460,7 @@ FileSegmentsHolderPtr FileCache::set(
     else
     {
         file_segments = splitRangeIntoFileSegments(
-            *locked_key, offset, size, FileSegment::State::EMPTY, settings);
+            *locked_key, offset, size, FileSegment::State::EMPTY, /* file_segments_limit */0, settings);
     }
 
     return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
@@ -419,77 +479,125 @@ FileCache::getOrSet(
 
     assertInitialized();
 
-    const auto end_offset = offset + size - 1;
-    const auto aligned_offset = roundDownToMultiple(offset, boundary_alignment);
-    const auto aligned_end_offset = std::min(roundUpToMultiple(offset + size, boundary_alignment), file_size) - 1;
-    chassert(aligned_offset <= offset);
+    FileSegment::Range range(offset, offset + size - 1);
+
+    const auto aligned_offset = roundDownToMultiple(range.left, boundary_alignment);
+    auto aligned_end_offset = std::min(roundUpToMultiple(offset + size, boundary_alignment), file_size) - 1;
+
+    chassert(aligned_offset <= range.left);
+    chassert(aligned_end_offset >= range.right);
 
     auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY);
-
     /// Get all segments which intersect with the given range.
-    FileSegment::Range range(offset, end_offset);
     auto file_segments = getImpl(*locked_key, range, file_segments_limit);
 
-    if (aligned_offset < offset && (file_segments.empty() || offset < file_segments.front()->range().left))
+    if (file_segments_limit)
     {
-        auto prefix_range = FileSegment::Range(aligned_offset, file_segments.empty() ? offset - 1 : file_segments.front()->range().left - 1);
+        chassert(file_segments.size() <= file_segments_limit);
+        if (file_segments.size() == file_segments_limit)
+            range.right = aligned_end_offset = file_segments.back()->range().right;
+    }
+
+    /// Check case if we have uncovered prefix, e.g.
+    ///
+    ///   [_______________]
+    ///   ^               ^
+    ///   range.left      range.right
+    ///         [___] [__________]        <-- current cache (example)
+    ///   [    ]
+    ///   ^----^
+    ///   uncovered prefix.
+    const bool has_uncovered_prefix = file_segments.empty() || range.left < file_segments.front()->range().left;
+
+    if (aligned_offset < range.left && has_uncovered_prefix)
+    {
+        auto prefix_range = FileSegment::Range(aligned_offset, file_segments.empty() ? range.left - 1 : file_segments.front()->range().left - 1);
         auto prefix_file_segments = getImpl(*locked_key, prefix_range, /* file_segments_limit */0);
 
         if (prefix_file_segments.empty())
         {
+            ///   [____________________][_______________]
+            ///   ^                     ^               ^
+            ///   aligned_offset        range.left      range.right
+            ///                             [___] [__________]         <-- current cache (example)
             range.left = aligned_offset;
         }
         else
         {
-            size_t last_right_offset = prefix_file_segments.back()->range().right;
+            ///   [____________________][_______________]
+            ///   ^                     ^               ^
+            ///   aligned_offset        range.left          range.right
+            ///   ____]     [____]           [___] [__________]        <-- current cache (example)
+            ///                  ^
+            ///                  prefix_file_segments.back().right
 
-            while (!prefix_file_segments.empty() && prefix_file_segments.front()->range().right < offset)
-                prefix_file_segments.pop_front();
+            chassert(prefix_file_segments.back()->range().right < range.left);
+            chassert(prefix_file_segments.back()->range().right >= aligned_offset);
 
-            if (prefix_file_segments.empty())
-            {
-                range.left = last_right_offset + 1;
-            }
-            else
-            {
-                file_segments.splice(file_segments.begin(), prefix_file_segments);
-                range.left = file_segments.front()->range().left;
-            }
+            range.left = prefix_file_segments.back()->range().right + 1;
         }
     }
 
-    if (end_offset < aligned_end_offset && (file_segments.empty() || file_segments.back()->range().right < end_offset))
+    /// Check case if we have uncovered suffix.
+    ///
+    ///   [___________________]
+    ///   ^                   ^
+    ///   range.left          range.right
+    ///      [___]   [___]                  <-- current cache (example)
+    ///                   [___]
+    ///                   ^---^
+    ///                    uncovered_suffix
+    const bool has_uncovered_suffix = file_segments.empty() || file_segments.back()->range().right < range.right;
+
+    if (range.right < aligned_end_offset && has_uncovered_suffix)
     {
-        auto suffix_range = FileSegment::Range(end_offset, aligned_end_offset);
-        /// Get only 1 file segment.
+        auto suffix_range = FileSegment::Range(range.right, aligned_end_offset);
+        /// We need to get 1 file segment, so file_segments_limit = 1 here.
         auto suffix_file_segments = getImpl(*locked_key, suffix_range, /* file_segments_limit */1);
 
         if (suffix_file_segments.empty())
+        {
+            ///   [__________________][                       ]
+            ///   ^                  ^                        ^
+            ///   range.left         range.right              aligned_end_offset
+            ///      [___]   [___]                                    <-- current cache (example)
+
             range.right = aligned_end_offset;
+        }
         else
+        {
+            ///   [__________________][                       ]
+            ///   ^                  ^                        ^
+            ///   range.left         range.right              aligned_end_offset
+            ///      [___]   [___]          [_________]               <-- current cache (example)
+            ///                             ^
+            ///                             suffix_file_segments.front().left
             range.right = suffix_file_segments.front()->range().left - 1;
+        }
     }
 
     if (file_segments.empty())
     {
-        file_segments = splitRangeIntoFileSegments(*locked_key, range.left, range.size(), FileSegment::State::EMPTY, settings);
+        file_segments = splitRangeIntoFileSegments(*locked_key, range.left, range.size(), FileSegment::State::EMPTY, file_segments_limit, settings);
     }
     else
     {
-        chassert(file_segments.front()->range().right >= offset);
-        chassert(file_segments.back()->range().left <= end_offset);
+        chassert(file_segments.front()->range().right >= range.left);
+        chassert(file_segments.back()->range().left <= range.right);
 
         fillHolesWithEmptyFileSegments(
             *locked_key, file_segments, range, file_segments_limit, /* fill_with_detached */false, settings);
 
+        chassert(!file_segments_limit || file_segments.size() <= file_segments_limit);
+
         if (!file_segments.front()->range().contains(offset))
         {
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} to include {} "
                             "(end offset: {}, aligned offset: {}, aligned end offset: {})",
-                            file_segments.front()->range().toString(), offset, end_offset, aligned_offset, aligned_end_offset);
+                            file_segments.front()->range().toString(), offset, range.right, aligned_offset, aligned_end_offset);
         }
 
-        chassert(file_segments_limit ? file_segments.back()->range().left <= end_offset : file_segments.back()->range().contains(end_offset));
+        chassert(file_segments_limit ? file_segments.back()->range().left <= range.right : file_segments.back()->range().contains(range.right));
     }
 
     while (file_segments_limit && file_segments.size() > file_segments_limit)
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index f8fd9635cd5..523ff90e33e 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -216,13 +216,17 @@ private:
     void loadMetadataImpl();
     void loadMetadataForKeys(const std::filesystem::path & keys_dir);
 
+    /// bool - if `file_segments_limit` reached or not.
     FileSegments getImpl(const LockedKey & locked_key, const FileSegment::Range & range, size_t file_segments_limit) const;
 
+    std::vector<FileSegment::Range> splitRange(size_t offset, size_t size);
+
     FileSegments splitRangeIntoFileSegments(
         LockedKey & locked_key,
         size_t offset,
         size_t size,
         FileSegment::State state,
+        size_t file_segments_limit,
         const CreateFileSegmentSettings & create_settings);
 
     void fillHolesWithEmptyFileSegments(
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 362103f3e22..59134229997 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -926,6 +926,7 @@ void FileSegment::use()
 
 FileSegmentsHolder::FileSegmentsHolder(FileSegments && file_segments_)
     : file_segments(std::move(file_segments_))
+{
     CurrentMetrics::add(CurrentMetrics::FilesystemCacheHoldFileSegments, file_segments.size());
     ProfileEvents::increment(ProfileEvents::FilesystemCacheHoldFileSegments, file_segments.size());
 }
diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp
index ab2a128de34..e1db07958fc 100644
--- a/src/Interpreters/tests/gtest_lru_file_cache.cpp
+++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp
@@ -242,7 +242,7 @@ TEST_F(FileCacheTest, get)
     settings.max_elements = 5;
     settings.boundary_alignment = 1;
 
-    const size_t file_size = -1; // the value doesn't really matter because boundary_alignment == 1.
+    const size_t file_size = INT_MAX; // the value doesn't really matter because boundary_alignment == 1.
 
     {
         std::cerr << "Step 1\n";

From ca18674f1af01bf6463fe683fb8dcb4835b08dd2 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Wed, 8 Nov 2023 16:21:16 +0000
Subject: [PATCH 0252/1097] update test

---
 .../gtest_transform_query_for_external_database.cpp    | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp
index 749a154c19d..c40c9b0f5bc 100644
--- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp
+++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp
@@ -279,9 +279,13 @@ TEST(TransformQueryForExternalDatabase, MultipleAndSubqueries)
 {
     const State & state = State::instance();
 
-    check(state, 1, {"column"},
-          "SELECT column FROM test.table WHERE 1 = 1 AND toString(column) = '42' AND column = 42 AND left(toString(column), 10) = RIGHT(toString(column), 10) AND column IN (1, 42) AND SUBSTRING(toString(column) FROM 1 FOR 2) = 'Hello' AND column != 4",
-          R"(SELECT "column" FROM "test"."table" WHERE 1 AND ("column" = 42) AND ("column" IN (1, 42)) AND ("column" != 4))");
+    check(
+        state,
+        1,
+        {"column"},
+        "SELECT column FROM test.table WHERE 1 = 1 AND toString(column) = '42' AND column = 42 AND left(toString(column), 10) = "
+        "RIGHT(toString(column), 10) AND column IN (1, 42) AND SUBSTRING(toString(column) FROM 1 FOR 2) = 'Hello' AND column != 4",
+        R"(SELECT "column" FROM "test"."table" WHERE 1 = 1 AND ("column" = 42) AND ("column" IN (1, 42)) AND ("column" != 4))");
     check(state, 1, {"column"},
           "SELECT column FROM test.table WHERE toString(column) = '42' AND left(toString(column), 10) = RIGHT(toString(column), 10) AND column = 42",
           R"(SELECT "column" FROM "test"."table" WHERE "column" = 42)");

From f1af83447e548db2ac28447486e3b141bba1bd12 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Wed, 8 Nov 2023 23:07:08 +0300
Subject: [PATCH 0253/1097] merge_row_policy: cleanup, traces removed

---
 src/Storages/StorageMerge.cpp                 | 191 ++----------------
 src/Storages/StorageMerge.h                   |   2 +-
 src/TableFunctions/TableFunctionMerge.cpp     |   2 +-
 .../02763_row_policy_storage_merge.reference  |  58 ++++++
 .../02763_row_policy_storage_merge.sql.j2     |  38 ++--
 5 files changed, 100 insertions(+), 191 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index d9460190677..5412a87fa01 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -385,7 +385,7 @@ public:
     /// Add to data stream columns that are needed only for row policies
     ///  SELECT x from T  if  T has row policy  y=42
     ///  required y in data pipeline
-    void extendNames(Names &, bool alias_allowed = true);
+    void extendNames(Names &);
 
     /// Use storage facilities to filter data
     ///  optimization
@@ -495,100 +495,13 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
         if (sampling_requested && !storage->supportsSampling())
             throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, "Illegal SAMPLE: table doesn't support sampling");
 
-        // const auto & [database_name, _, _b, table_name] = table;
-
-        // std::unique_ptr<RowPolicyData> row_policy_data_ptr;
-
-        // auto row_policy_filter_ptr = context->getRowPolicyFilter(
-        //   database_name,
-        //   table_name,
-        //   RowPolicyFilterType::SELECT_FILTER);
-        // if (row_policy_filter_ptr)
-        // {
-        //     row_policy_data_ptr = std::make_unique<RowPolicyData>(row_policy_filter_ptr, storage, context);
-        //     row_policy_data_ptr->extendNames(column_names);
-        // }
-
-
-
-        // Aliases aliases;
-        // auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
-        // auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, context);
-
-        // auto modified_query_info = getModifiedQueryInfo(query_info, context, table, nested_storage_snaphsot);
-        // Names column_names_as_aliases;
-
-        // if (!context->getSettingsRef().allow_experimental_analyzer)
-        // {
-        //     auto storage_columns = storage_metadata_snapshot->getColumns();
-        //     auto syntax_result = TreeRewriter(context).analyzeSelect(
-        //         modified_query_info.query, TreeRewriterResult({}, storage, nested_storage_snaphsot));
-
-        //     bool with_aliases = common_processed_stage == QueryProcessingStage::FetchColumns && !storage_columns.getAliases().empty();
-        //     if (with_aliases)
-        //     {
-        //         ASTPtr required_columns_expr_list = std::make_shared<ASTExpressionList>();
-        //         ASTPtr column_expr;
-
-        //         for (const auto & column : column_names)
-        //         {
-        //             const auto column_default = storage_columns.getDefault(column);
-        //             bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
-
-        //             if (is_alias)
-        //             {
-        //                 column_expr = column_default->expression->clone();
-        //                 replaceAliasColumnsInQuery(column_expr, storage_metadata_snapshot->getColumns(),
-        //                                         syntax_result->array_join_result_to_source, context);
-
-        //                 const auto & column_description = storage_columns.get(column);
-        //                 column_expr = addTypeConversionToAST(std::move(column_expr), column_description.type->getName(),
-        //                                                     storage_metadata_snapshot->getColumns().getAll(), context);
-        //                 column_expr = setAlias(column_expr, column);
-
-        //                 auto type = sample_block.getByName(column).type;
-        //                 aliases.push_back({ .name = column, .type = type, .expression = column_expr->clone() });
-
-        //                 LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
-        //                     "adding new alias name {}, expression {}",
-        //                     column, column_expr->formatForLogging());
-
-        //             }
-        //             else
-        //                 column_expr = std::make_shared<ASTIdentifier>(column);
-
-        //             required_columns_expr_list->children.emplace_back(std::move(column_expr));
-        //         }
-
-        //         syntax_result = TreeRewriter(context).analyze(
-        //             required_columns_expr_list, storage_columns.getAllPhysical(), storage, storage->getStorageSnapshot(storage_metadata_snapshot, context));
-
-        //         auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActionsDAG(true);
-
-        //         column_names_as_aliases = alias_actions->getRequiredColumns().getNames();
-        //         LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
-        //             "alias_actions->getRequiredColumns: {}", alias_actions->getRequiredColumns().toString());
-
-        //         // if (row_policy_data_ptr)
-        //         //     row_policy_data_ptr->extendNames(column_names_as_aliases, false /* alias_allowed */);
-
-        //         if (column_names_as_aliases.empty())
-        //             column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
-        //     }
-        // }
-
         auto source_pipeline = createSources(
-            // nested_storage_snaphsot,
-            // modified_query_info,
             common_processed_stage,
             required_max_block_size,
             common_header,
-            // aliases,
             table,
-            // column_names_as_aliases.empty() ? column_names : column_names_as_aliases,
             column_names,
             merge_storage_snapshot->getMetadataForQuery()->getSampleBlock(),
-            // std::move(row_policy_data_ptr),
             context,
             current_streams);
 
@@ -723,13 +636,9 @@ void ReadFromMerge::processAliases(
                         storage_metadata_snapshot->getColumns().getAll(), context);
                     column_expr = setAlias(column_expr, column);
 
-                    auto type = sample_block.getByName(column).type;
+                    auto type = sample_block.has(column) ? sample_block.getByName(column).type : column_description.type;
+
                     aliases.push_back({ .name = column, .type = type, .expression = column_expr->clone() });
-
-                    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::initializePipeline"),
-                        "adding new alias name {}, expression {}",
-                        column, column_expr->formatForLogging());
-
                 }
                 else
                     column_expr = std::make_shared<ASTIdentifier>(column);
@@ -743,11 +652,6 @@ void ReadFromMerge::processAliases(
             auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActionsDAG(true);
 
             column_names_as_aliases = alias_actions->getRequiredColumns().getNames();
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::processAliases"),
-                "alias_actions->getRequiredColumns: {}", alias_actions->getRequiredColumns().toString());
-
-            // if (row_policy_data_ptr)
-            //     row_policy_data_ptr->extendNames(column_names_as_aliases, false /* alias_allowed */);
 
             if (column_names_as_aliases.empty())
                 column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
@@ -755,23 +659,18 @@ void ReadFromMerge::processAliases(
     }
     if (!column_names_as_aliases.empty())
     {
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::processAliases"),
-            "substitute real_column_names by column_names_as_aliases");
         real_column_names = column_names_as_aliases;
     }
 }
 
 
 QueryPipelineBuilderPtr ReadFromMerge::createSources(
-    // const StorageSnapshotPtr & storage_snapshot,
-    // SelectQueryInfo & modified_query_info,
     QueryProcessingStage::Enum processed_stage,
     UInt64 max_block_size,
     const Block & header,
     const StorageWithLockAndName & storage_with_lock,
     Names real_column_names,
     const Block & sample_block,
-    // std::unique_ptr<RowPolicyData> row_policy_data_ptr,
     ContextMutablePtr modified_context,
     size_t streams_num,
     bool concat_streams)
@@ -795,13 +694,8 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         row_policy_data_ptr->extendNames(real_column_names);
     }
 
-
     Aliases aliases;
     processAliases(real_column_names, storage_with_lock, aliases, sample_block, modified_context);
-    // if (row_policy_data_ptr)
-    // {
-    //     row_policy_data_ptr->extendNames(real_column_names);
-    // }
 
     QueryPipelineBuilderPtr builder;
     if (!InterpreterSelectQuery::isQueryWithFinal(modified_query_info) && storage->needRewriteQueryWithFinal(real_column_names))
@@ -818,8 +712,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         storage_snapshot,
         modified_query_info);
 
-    // std::optional<RowPolicyData> row_policy_data;
-
     if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns))
     {
         /// If there are only virtual columns in query, you must request at least one other column.
@@ -831,16 +723,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
         StorageView * view = dynamic_cast<StorageView *>(storage.get());
         if (!view || allow_experimental_analyzer)
         {
-            // auto row_policy_filter_ptr = modified_context->getRowPolicyFilter(
-            //         database_name,
-            //         table_name,
-            //         RowPolicyFilterType::SELECT_FILTER);
-            // if (row_policy_data_ptr)
-            // {
-            //    row_policy_data.emplace(row_policy_filter_ptr, storage, modified_context);
-            //    row_policy_data_ptr->extendNames(real_column_names);
-            // }
-
             storage->read(plan,
                 real_column_names,
                 storage_snapshot,
@@ -972,15 +854,14 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
             });
         }
 
-        // if (row_policy_data_ptr)
-        // {
-        //     row_policy_data_ptr->addFilterTransform(*builder);
-        // }
-
         /// Subordinary tables could have different but convertible types, like numeric types of different width.
         /// We must return streams with structure equals to structure of Merge table.
-        convertingSourceStream(header, storage_snapshot->metadata, aliases, std::move(row_policy_data_ptr), modified_context, *builder, processed_stage);
-
+        convertAndFilterSourceStream(header,
+            storage_snapshot->metadata,
+            aliases, std::move(row_policy_data_ptr),
+            modified_context,
+            *builder,
+            processed_stage);
     }
 
     return builder;
@@ -991,9 +872,6 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
     ContextPtr local_context)
     : row_policy_filter_ptr(row_policy_filter_ptr_)
 {
-    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData ctor"),
-        "storage {}", storage->getName());
-
     storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
     auto storage_columns = storage_metadata_snapshot->getColumns();
     auto needed_columns = storage_columns.getAll/*Physical*/();
@@ -1017,70 +895,41 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
     if (!deleted.empty() || added.size() != 1)
     {
         throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "Cannot determine row level filter");
+            "Cannot determine row level filter; {} columns deleted, {} columns added",
+            deleted.size(), added.size());
     }
 
     filter_column_name = added.getNames().front();
 }
 
-void ReadFromMerge::RowPolicyData::extendNames(Names & names, bool alias_allowed)
+void ReadFromMerge::RowPolicyData::extendNames(Names & names)
 {
-    std::sort(names.begin(), names.end());
+    boost::container::flat_set<std::string_view> names_set(names.begin(), names.end());
     NameSet added_names;
 
     for (const auto & req_column : filter_actions->getRequiredColumns())
     {
-        if (!std::binary_search(names.begin(), names.end(), req_column))
+        if (!names_set.contains(req_column))
         {
-            if (!alias_allowed)
-            {
-                auto storage_columns = storage_metadata_snapshot->getColumns();
-                const auto column_default = storage_columns.getDefault(req_column);
-                bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
-                if (is_alias)
-                {
-                    continue;
-                }
-            }
-            added_names.insert(req_column);
+            added_names.emplace(req_column);
         }
     }
+
     if (!added_names.empty())
     {
         std::copy(added_names.begin(), added_names.end(), std::back_inserter(names));
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::extendNames"),
-            "{} names added", added_names.size());
-        for (const auto & added_name : added_names)
-        {
-            LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::extendNames"),
-                "  added name {}", added_name);
-        }
     }
 }
 
 void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step)
 {
-    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::addStorageFilter"), "filter_actions_dag: {},<> {}, <> {}",
-        filter_actions->getActionsDAG().dumpNames(),
-        filter_actions->getActionsDAG().dumpDAG(),
-        filter_actions->getSampleBlock().dumpStructure());
-
     step->addFilter(actions_dag, filter_column_name);
 }
 
 void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & builder)
 {
-    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::addFilterTransform"), "filter_actions_dag: {},<> {}, <> {}",
-        filter_actions->getActionsDAG().dumpNames(),
-        filter_actions->getActionsDAG().dumpDAG(),
-        filter_actions->getSampleBlock().dumpStructure());
-
     builder.addSimpleTransform([&](const Block & stream_header)
     {
-        LOG_TRACE(&Poco::Logger::get("ReadFromMerge::RowPolicyData::addFilterTransform"),
-            "stream_header.dumpNames {}", stream_header.dumpNames());
-
-
         return std::make_shared<FilterTransform>(stream_header, filter_actions, filter_column_name, true /* remove filter column */);
     });
 }
@@ -1255,7 +1104,7 @@ void StorageMerge::alter(
     setInMemoryMetadata(storage_metadata);
 }
 
-void ReadFromMerge::convertingSourceStream(
+void ReadFromMerge::convertAndFilterSourceStream(
     const Block & header,
     const StorageMetadataPtr & metadata_snapshot,
     const Aliases & aliases,
@@ -1293,12 +1142,6 @@ void ReadFromMerge::convertingSourceStream(
     if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)
         convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position;
 
-    LOG_TRACE(&Poco::Logger::get("ReadFromMerge::convertingSourceStream"),
-        "builder.getHeader(): {}, header.getColumnsWithTypeAndName: {}",
-        builder.getHeader().dumpStructure(),
-        header.dumpStructure());
-
-
     if (row_policy_data_ptr)
     {
         row_policy_data_ptr->addFilterTransform(builder);
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 350d871359a..f7c31ab3ef8 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -206,7 +206,7 @@ private:
         size_t streams_num,
         bool concat_streams = false);
 
-    static void convertingSourceStream(
+    static void convertAndFilterSourceStream(
         const Block & header,
         const StorageMetadataPtr & metadata_snapshot,
         const Aliases & aliases,
diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp
index d0457504144..599953a1add 100644
--- a/src/TableFunctions/TableFunctionMerge.cpp
+++ b/src/TableFunctions/TableFunctionMerge.cpp
@@ -126,7 +126,7 @@ ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr contex
         {
             auto storage = DatabaseCatalog::instance().tryGetTable(StorageID{db_with_tables.first, table}, context);
             if (storage)
-              return ColumnsDescription{storage->getInMemoryMetadataPtr()->getColumns().getAll/* Physical*/()};  // !!!
+                return ColumnsDescription{storage->getInMemoryMetadataPtr()->getColumns().getAllPhysical()};
         }
     }
 
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
index 9c91a1652c7..0b7664deb7d 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
@@ -49,6 +49,10 @@ SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>2
 3	13
 4	14
 4	14
+SELECT * FROM engine_merge_12 WHERE x>2
+3	13
+4	14
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge)
 1	11
 1	11
@@ -162,6 +166,31 @@ SELECT x, y from merge(currentDatabase(), 02763_merge
 4	14
 4	14
 4	14
+02763_merge_aliases
+x, y, z FROM 02763_a_merge
+3	13	16
+4	14	18
+* FROM 02763_a_merge
+3	13	16
+4	14	18
+x, y FROM 02763_a_merge
+3	13
+4	14
+SELECT x, y FROM merge(currentDatabase(), 02763_alias)
+3	13
+4	14
+SELECT x, y FROM merge(currentDatabase(), 02763_alias)
+2	12
+3	13
+4	14
+SELECT x FROM merge(currentDatabase(), 02763_alias)
+12
+13
+14
+SELECT y FROM merge(currentDatabase(), 02763_alias)
+2
+3
+4
 SETTINGS optimize_move_to_prewhere= 1
 SELECT * FROM 02763_merge_log_1
 3	13
@@ -191,6 +220,10 @@ SELECT * FROM merge(currentDatabase(), 02763_merge_merge) WHERE x>2
 3	13
 4	14
 4	14
+SELECT * FROM engine_merge_12 WHERE x>2
+3	13
+4	14
+4	14
 SELECT * FROM merge(currentDatabase(), 02763_merge)
 1	11
 1	11
@@ -304,3 +337,28 @@ SELECT x, y from merge(currentDatabase(), 02763_merge
 4	14
 4	14
 4	14
+02763_merge_aliases
+x, y, z FROM 02763_a_merge
+3	13	16
+4	14	18
+* FROM 02763_a_merge
+3	13	16
+4	14	18
+x, y FROM 02763_a_merge
+3	13
+4	14
+SELECT x, y FROM merge(currentDatabase(), 02763_alias)
+3	13
+4	14
+SELECT x, y FROM merge(currentDatabase(), 02763_alias)
+2	12
+3	13
+4	14
+SELECT x FROM merge(currentDatabase(), 02763_alias)
+12
+13
+14
+SELECT y FROM merge(currentDatabase(), 02763_alias)
+2
+3
+4
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
index 0bc1292d4a5..a22a9c5b641 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -116,19 +116,19 @@ INSERT INTO 02763_merge_fancycols  (x, y) SELECT x, y from merge(currentDatabase
 
 CREATE ROW POLICY 02763_filter_5 ON 02763_merge_fancycols USING cnst<>42 AS permissive TO ALL;
 SELECT 'SELECT *';
-SELECT * from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+SELECT * from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT x, lc';
-SELECT x, lc from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+SELECT x, lc from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
 CREATE ROW POLICY 02763_filter_6 ON 02763_merge_fancycols USING lc='111' AS permissive TO ALL;
 SELECT 'SELECT *';
-SELECT * from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+SELECT * from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT x, lc';
-SELECT x, lc from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+SELECT x, lc from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT x, lc, cnst';
-SELECT x, lc, cnst from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x;
+SELECT x, lc, cnst from merge(currentDatabase(), '02763_merge_fancycols') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 SELECT 'SELECT x, y from merge(currentDatabase(), 02763_merge';
-SELECT x, y from merge(currentDatabase(), '02763_merge') ORDER BY x;
+SELECT x, y from merge(currentDatabase(), '02763_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
 DROP TABLE 02763_merge_fancycols;
 
@@ -138,19 +138,26 @@ INSERT INTO 02763_alias VALUES (1, 11), (2, 12), (3, 13), (4, 14);
 
 CREATE ROW POLICY 02763_filter_7 ON 02763_alias USING z>15 AS permissive TO ALL;
 
--- SELECT 'SELECT * FROM 02763_alias ORDER BY x';
--- SELECT x, y, z FROM 02763_alias ORDER BY x;
-
 CREATE TABLE 02763_a_merge (x UInt8, y UInt64, z UInt64) ENGINE = Merge(currentDatabase(), '02763_alias');
 
 
--- SELECT 'SELECT * FROM merge(currentDatabase(), 02763_merge_alias) ORDER BY x';
--- SELECT * FROM merge(currentDatabase(), '02763_merge_alias') ORDER BY x;
+SELECT 'x, y, z FROM 02763_a_merge';
+SELECT x, y, z FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT '* FROM 02763_a_merge';
+SELECT * FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'x, y FROM 02763_a_merge';
+SELECT x, y FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'SELECT x, y FROM merge(currentDatabase(), 02763_alias)';
+SELECT x, y FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
-SELECT 'x, y, z FROM 02763_a_merge ORDER BY x';
-SELECT x, y, z FROM 02763_a_merge ORDER BY x;
--- SELECT 'SELECT x, y, z FROM merge(currentDatabase(), 02763_merge_alias) ORDER BY x';
--- SELECT x, y, z FROM merge(currentDatabase(), '02763_merge_alias') ORDER BY x;
+CREATE ROW POLICY 02763_filter_8 ON 02763_alias USING y>11 AS permissive TO ALL;
+
+SELECT 'SELECT x, y FROM merge(currentDatabase(), 02763_alias)';
+SELECT x, y FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'SELECT x FROM merge(currentDatabase(), 02763_alias)';
+SELECT y FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'SELECT y FROM merge(currentDatabase(), 02763_alias)';
+SELECT x FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
 
 DROP TABLE 02763_alias;
 DROP TABLE 02763_a_merge;
@@ -165,5 +172,6 @@ DROP ROW POLICY 02763_filter_5 ON 02763_merge_fancycols;
 DROP ROW POLICY 02763_filter_6 ON 02763_merge_fancycols;
 
 DROP ROW POLICY 02763_filter_7 ON 02763_alias;
+DROP ROW POLICY 02763_filter_8 ON 02763_alias;
 
 {% endfor %}

From 9c5f9f7dcb0e8ab646c6712def0a7384074d522d Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Thu, 9 Nov 2023 00:51:57 +0300
Subject: [PATCH 0254/1097] merge_row_policy: style fix, cleanup

---
 src/Storages/StorageMerge.cpp | 9 ++-------
 src/Storages/StorageMerge.h   | 4 ----
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 5412a87fa01..5d5c4b716ec 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -376,7 +376,8 @@ void StorageMerge::read(
 
 /// A transient object of this helper class is created
 ///  when processing a Merge table data source (subordinary table)
-///  to guarantee that row policies are applied
+///  that has row policies
+///  to guarantee that these row policies are applied
 class ReadFromMerge::RowPolicyData
 {
 public:
@@ -404,7 +405,6 @@ private:
     StorageMetadataPtr storage_metadata_snapshot;
 };
 
-// using RowPolicyDataPtr = std::unique_ptr<ReadFromMerge::RowPolicyData>;
 
 ReadFromMerge::ReadFromMerge(
     Block common_header_,
@@ -473,8 +473,6 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
         query_info.input_order_info = input_sorting_info;
     }
 
-    // auto sample_block = merge_storage_snapshot->getMetadataForQuery()->getSampleBlock();
-
     std::vector<std::unique_ptr<QueryPipelineBuilder>> pipelines;
     QueryPlanResourceHolder resources;
 
@@ -1135,8 +1133,6 @@ void ReadFromMerge::convertAndFilterSourceStream(
         });
     }
 
-
-
     ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name;
 
     if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)
@@ -1158,7 +1154,6 @@ void ReadFromMerge::convertAndFilterSourceStream(
     {
         return std::make_shared<ExpressionTransform>(stream_header, actions);
     });
-
 }
 
 bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_)
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index f7c31ab3ef8..71680b90072 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -192,16 +192,12 @@ private:
         ContextMutablePtr modified_context);
 
     QueryPipelineBuilderPtr createSources(
-        // const StorageSnapshotPtr & storage_snapshot,
-        // SelectQueryInfo & query_info,
         QueryProcessingStage::Enum processed_stage,
         UInt64 max_block_size,
         const Block & header,
-        // const Aliases & aliases,
         const StorageWithLockAndName & storage_with_lock,
         Names real_column_names,
         const Block & sample_block,
-        // std::unique_ptr<RowPolicyData> row_policy_data_ptr,
         ContextMutablePtr modified_context,
         size_t streams_num,
         bool concat_streams = false);

From cba4810910e683ad148a3e485a23a6417aa344b7 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 8 Nov 2023 22:10:34 +0000
Subject: [PATCH 0255/1097] Simplify a little bit

---
 src/Interpreters/Cache/QueryCache.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 33cb124f3bc..603f7e2b254 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -154,10 +154,8 @@ bool QueryCache::Key::operator==(const Key & other) const
 
 size_t QueryCache::KeyHasher::operator()(const Key & key) const
 {
-    SipHash hash;
-    hash.update(key.ast->getTreeHash());
-    auto res = hash.get64();
-    return res;
+    IAST::Hash hash = key.ast->getTreeHash();
+    return hash.low64;
 }
 
 size_t QueryCache::QueryCacheEntryWeight::operator()(const Entry & entry) const

From 86685685d36c4a07c631b84589fcd34004a3877f Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Tue, 7 Nov 2023 00:16:38 +0000
Subject: [PATCH 0256/1097] Fix segfault during Kerberos initialization

---
 src/Access/KerberosInit.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Access/KerberosInit.cpp b/src/Access/KerberosInit.cpp
index 58e4a46f2aa..772938ad9b2 100644
--- a/src/Access/KerberosInit.cpp
+++ b/src/Access/KerberosInit.cpp
@@ -44,7 +44,7 @@ private:
     krb5_ccache defcache = nullptr;
     krb5_get_init_creds_opt * options = nullptr;
     // Credentials structure including ticket, session key, and lifetime info.
-    krb5_creds my_creds;
+    krb5_creds my_creds {};
     krb5_keytab keytab = nullptr;
     krb5_principal defcache_princ = nullptr;
     String fmtError(krb5_error_code code) const;

From ad4a96ce0f14eaf0c56747389b335dc69d721ca1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 9 Nov 2023 00:22:29 +0100
Subject: [PATCH 0257/1097] Fix tidy

---
 src/Functions/FunctionJoinGet.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp
index 96ab5a451ea..5602c88c60e 100644
--- a/src/Functions/FunctionJoinGet.cpp
+++ b/src/Functions/FunctionJoinGet.cpp
@@ -133,7 +133,7 @@ ExecutableFunctionPtr FunctionJoinGet<or_null>::prepare(const ColumnsWithTypeAnd
     return std::make_unique<ExecutableFunctionJoinGet<or_null>>(getContext(), table_lock, storage_join, result_columns);
 }
 
-static std::pair<std::shared_ptr<StorageJoin>, String>
+std::pair<std::shared_ptr<StorageJoin>, String>
 getJoin(const ColumnsWithTypeAndName & arguments, ContextPtr context)
 {
     String join_name;

From 2ce0ef9c8a656bfbbf8c763af78cb55a07f60044 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 9 Nov 2023 00:23:20 +0100
Subject: [PATCH 0258/1097] Enable HTTP OPTIONS method by default

---
 programs/server/config.xml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index d0bf1c7d66a..e2da8d789fb 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -104,15 +104,14 @@
     </url_scheme_mappers>
 
     <!-- Add headers to response in options request. OPTIONS method is used in CORS preflight requests. -->
-    <!-- It is off by default. Next headers are obligate for CORS.-->
-    <!-- http_options_response>
+    <http_options_response>
         <header>
             <name>Access-Control-Allow-Origin</name>
             <value>*</value>
         </header>
         <header>
             <name>Access-Control-Allow-Headers</name>
-            <value>origin, x-requested-with</value>
+            <value>origin, x-requested-with, x-clickhouse-format, x-clickhouse-user, x-clickhouse-key, Authorization</value>
         </header>
         <header>
             <name>Access-Control-Allow-Methods</name>
@@ -122,7 +121,7 @@
             <name>Access-Control-Max-Age</name>
             <value>86400</value>
         </header>
-    </http_options_response -->
+    </http_options_response>
 
     <!-- It is the name that will be shown in the clickhouse-client.
          By default, anything with "production" will be highlighted in red in query prompt.

From d34809bc7cf18f1f61b56c691727ab97c8886f31 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 9 Nov 2023 00:32:05 +0100
Subject: [PATCH 0259/1097] Revert a wrong change in server settings

---
 docs/en/operations/server-configuration-parameters/settings.md | 2 +-
 src/Core/ServerSettings.h                                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 2c3f8be79b3..1a7ee172c7e 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -214,7 +214,7 @@ Max consecutive resolving failures before dropping a host from ClickHouse DNS ca
 
 Type: UInt32
 
-Default: 1024
+Default: 10
 
 
 ## index_mark_cache_policy
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 34a6296fb55..de0fff35389 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -75,7 +75,7 @@ namespace DB
     \
     M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
     M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
-    M(UInt32, dns_max_consecutive_failures, 1024, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \
+    M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \
     \
     M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
     M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \

From 1685cdcb89fe110b45497c7ff27ce73cc03e82d1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 9 Nov 2023 00:32:23 +0100
Subject: [PATCH 0260/1097] Reasonable value for HTTP keep-alive

---
 programs/server/config.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index d0bf1c7d66a..e9e61819c53 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -245,7 +245,7 @@
     <max_connections>4096</max_connections>
 
     <!-- For 'Connection: keep-alive' in HTTP 1.1 -->
-    <keep_alive_timeout>3</keep_alive_timeout>
+    <keep_alive_timeout>10</keep_alive_timeout>
 
     <!-- gRPC protocol (see src/Server/grpc_protos/clickhouse_grpc.proto for the API) -->
     <!-- <grpc_port>9100</grpc_port> -->

From 44eb73980f1c4bb42e57915d633794715d55e3c4 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 9 Nov 2023 04:31:49 +0000
Subject: [PATCH 0261/1097] Fix startup failure due to TTL dependency

---
 src/Databases/DDLLoadingDependencyVisitor.cpp |  9 +++++++
 src/Databases/DDLLoadingDependencyVisitor.h   |  1 +
 .../02908_table_ttl_dependency.reference      |  0
 .../0_stateless/02908_table_ttl_dependency.sh | 24 +++++++++++++++++++
 4 files changed, 34 insertions(+)
 create mode 100644 tests/queries/0_stateless/02908_table_ttl_dependency.reference
 create mode 100755 tests/queries/0_stateless/02908_table_ttl_dependency.sh

diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp
index 99538fd801e..fc362dd8578 100644
--- a/src/Databases/DDLLoadingDependencyVisitor.cpp
+++ b/src/Databases/DDLLoadingDependencyVisitor.cpp
@@ -1,4 +1,5 @@
 #include <Databases/DDLLoadingDependencyVisitor.h>
+#include <Databases/DDLDependencyVisitor.h>
 #include <Dictionaries/getDictionaryConfigurationFromAST.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/misc.h>
@@ -7,6 +8,7 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
+#include <Parsers/ASTTTLElement.h>
 #include <Poco/String.h>
 
 
@@ -22,6 +24,7 @@ TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, c
     data.default_database = global_context->getCurrentDatabase();
     data.create_query = ast;
     data.global_context = global_context;
+    data.table_name = table;
     TableLoadingDependenciesVisitor visitor{data};
     visitor.visit(ast);
     data.dependencies.erase(table);
@@ -113,6 +116,12 @@ void DDLLoadingDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments &
 
 void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data)
 {
+    if (storage.ttl_table)
+    {
+        auto ttl_dependensies = getDependenciesFromCreateQuery(data.global_context, data.table_name, storage.ttl_table->ptr());
+        data.dependencies.merge(ttl_dependensies);
+    }
+
     if (!storage.engine)
         return;
 
diff --git a/src/Databases/DDLLoadingDependencyVisitor.h b/src/Databases/DDLLoadingDependencyVisitor.h
index f173517f852..a9e9f4d7a53 100644
--- a/src/Databases/DDLLoadingDependencyVisitor.h
+++ b/src/Databases/DDLLoadingDependencyVisitor.h
@@ -38,6 +38,7 @@ public:
         TableNamesSet dependencies;
         ContextPtr global_context;
         ASTPtr create_query;
+        QualifiedTableName table_name;
     };
 
     using Visitor = ConstInDepthNodeVisitor<DDLLoadingDependencyVisitor, true>;
diff --git a/tests/queries/0_stateless/02908_table_ttl_dependency.reference b/tests/queries/0_stateless/02908_table_ttl_dependency.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02908_table_ttl_dependency.sh b/tests/queries/0_stateless/02908_table_ttl_dependency.sh
new file mode 100755
index 00000000000..70136b4a42b
--- /dev/null
+++ b/tests/queries/0_stateless/02908_table_ttl_dependency.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+# Tags: no-ordinary-database
+# Tag no-ordinary-database: requires UUID
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -nm -q "
+    DROP TABLE IF EXISTS 02908_dependent;
+    DROP TABLE IF EXISTS 02908_main;
+
+    CREATE TABLE 02908_main (a UInt32) ENGINE = MergeTree ORDER BY a;
+    CREATE TABLE 02908_dependent (a UInt32, ts DateTime) ENGINE = MergeTree ORDER BY a TTL ts + 1 WHERE a IN (SELECT a FROM ${CLICKHOUSE_DATABASE}.02908_main);
+"
+
+$CLICKHOUSE_CLIENT -nm -q "
+    DROP TABLE 02908_main;
+" 2>&1 | grep -F -q "HAVE_DEPENDENT_OBJECTS"
+
+$CLICKHOUSE_CLIENT -nm -q "
+    DROP TABLE 02908_dependent;
+    DROP TABLE 02908_main;
+"

From 41cdd5dd1a3e520d85f36be71c8fd69967ffee57 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Thu, 9 Nov 2023 06:23:23 +0000
Subject: [PATCH 0262/1097] Fix

---
 .../tests/gtest_transform_query_for_external_database.cpp       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp
index c40c9b0f5bc..1b2a4187c94 100644
--- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp
+++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp
@@ -285,7 +285,7 @@ TEST(TransformQueryForExternalDatabase, MultipleAndSubqueries)
         {"column"},
         "SELECT column FROM test.table WHERE 1 = 1 AND toString(column) = '42' AND column = 42 AND left(toString(column), 10) = "
         "RIGHT(toString(column), 10) AND column IN (1, 42) AND SUBSTRING(toString(column) FROM 1 FOR 2) = 'Hello' AND column != 4",
-        R"(SELECT "column" FROM "test"."table" WHERE 1 = 1 AND ("column" = 42) AND ("column" IN (1, 42)) AND ("column" != 4))");
+        R"(SELECT "column" FROM "test"."table" WHERE (1 = 1) AND ("column" = 42) AND ("column" IN (1, 42)) AND ("column" != 4))");
     check(state, 1, {"column"},
           "SELECT column FROM test.table WHERE toString(column) = '42' AND left(toString(column), 10) = RIGHT(toString(column), 10) AND column = 42",
           R"(SELECT "column" FROM "test"."table" WHERE "column" = 42)");

From 4ac3dcc417bb629c9731aa7447d030aabf56b5a3 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 9 Nov 2023 07:07:41 +0000
Subject: [PATCH 0263/1097] Fix ALTER COLUMN with ALIAS

---
 src/Storages/AlterCommands.cpp                            | 2 +-
 .../0_stateless/02908_alter_column_alias.reference        | 1 +
 tests/queries/0_stateless/02908_alter_column_alias.sql    | 8 ++++++++
 3 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02908_alter_column_alias.reference
 create mode 100644 tests/queries/0_stateless/02908_alter_column_alias.sql

diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index c6fa17583b5..3d6f4b864a8 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -1148,7 +1148,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
             /// The change of data type to/from Object is broken, so disable it for now
             if (command.data_type)
             {
-                const GetColumnsOptions options(GetColumnsOptions::AllPhysical);
+                const GetColumnsOptions options(GetColumnsOptions::All);
                 const auto old_data_type = all_columns.getColumn(options, column_name).type;
 
                 if (command.data_type->getName().contains("Object")
diff --git a/tests/queries/0_stateless/02908_alter_column_alias.reference b/tests/queries/0_stateless/02908_alter_column_alias.reference
new file mode 100644
index 00000000000..e44df6e9ff6
--- /dev/null
+++ b/tests/queries/0_stateless/02908_alter_column_alias.reference
@@ -0,0 +1 @@
+CREATE TABLE default.t\n(\n    `c0` DateTime,\n    `c1` DateTime,\n    `a` DateTime ALIAS c1\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
diff --git a/tests/queries/0_stateless/02908_alter_column_alias.sql b/tests/queries/0_stateless/02908_alter_column_alias.sql
new file mode 100644
index 00000000000..fd98339e8b5
--- /dev/null
+++ b/tests/queries/0_stateless/02908_alter_column_alias.sql
@@ -0,0 +1,8 @@
+CREATE TABLE t (
+    c0 DateTime,
+    c1 DateTime,
+    a DateTime alias toStartOfFifteenMinutes(c0)
+) ENGINE = MergeTree() ORDER BY tuple();
+
+ALTER TABLE t MODIFY COLUMN a DateTime ALIAS c1;
+SHOW CREATE t;

From 10ac68517e4303e3bf25697d45ff37f1d3c4eee7 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 9 Nov 2023 07:59:50 +0000
Subject: [PATCH 0264/1097] Fix empty NAMED COLLECTIONs

---
 src/Common/NamedCollections/NamedCollectionUtils.cpp        | 6 ++++++
 src/Parsers/ASTAlterNamedCollectionQuery.cpp                | 2 +-
 .../0_stateless/02908_empty_named_collection.reference      | 0
 tests/queries/0_stateless/02908_empty_named_collection.sql  | 5 +++++
 4 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02908_empty_named_collection.reference
 create mode 100644 tests/queries/0_stateless/02908_empty_named_collection.sql

diff --git a/src/Common/NamedCollections/NamedCollectionUtils.cpp b/src/Common/NamedCollections/NamedCollectionUtils.cpp
index cab844d6213..c535c851464 100644
--- a/src/Common/NamedCollections/NamedCollectionUtils.cpp
+++ b/src/Common/NamedCollections/NamedCollectionUtils.cpp
@@ -217,6 +217,12 @@ public:
         for (const auto & [name, value] : result_changes_map)
             create_query.changes.emplace_back(name, value);
 
+        if (create_query.changes.empty())
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "Named collection cannot be empty (collection name: {})",
+                query.collection_name);
+
         writeCreateQueryToMetadata(
             create_query,
             getMetadataPath(query.collection_name),
diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.cpp b/src/Parsers/ASTAlterNamedCollectionQuery.cpp
index 6363a7306bd..00f073b320a 100644
--- a/src/Parsers/ASTAlterNamedCollectionQuery.cpp
+++ b/src/Parsers/ASTAlterNamedCollectionQuery.cpp
@@ -14,7 +14,7 @@ ASTPtr ASTAlterNamedCollectionQuery::clone() const
 
 void ASTAlterNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const
 {
-    settings.ostr << (settings.hilite ? hilite_keyword : "") << "Alter NAMED COLLECTION ";
+    settings.ostr << (settings.hilite ? hilite_keyword : "") << "ALTER NAMED COLLECTION ";
     if (if_exists)
         settings.ostr << "IF EXISTS ";
     settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : "");
diff --git a/tests/queries/0_stateless/02908_empty_named_collection.reference b/tests/queries/0_stateless/02908_empty_named_collection.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02908_empty_named_collection.sql b/tests/queries/0_stateless/02908_empty_named_collection.sql
new file mode 100644
index 00000000000..6aab83858e8
--- /dev/null
+++ b/tests/queries/0_stateless/02908_empty_named_collection.sql
@@ -0,0 +1,5 @@
+-- Tags: no-parallel
+
+CREATE NAMED COLLECTION foobar03 AS a = 1;
+ALTER NAMED COLLECTION foobar03 DELETE b; -- { serverError BAD_ARGUMENTS }
+DROP NAMED COLLECTION foobar03;

From 113add2556f74830f73f4a02ff604d844afc8b25 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 9 Nov 2023 16:41:01 +0100
Subject: [PATCH 0265/1097] rmt: check shutdown flags in retry loops

---
 src/Storages/StorageReplicatedMergeTree.cpp | 23 +++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index c96b376f8b0..1aeaefcc755 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1686,6 +1686,9 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd
 
     while (true)
     {
+        if (shutdown_called || partial_shutdown_called)
+            throw Exception(ErrorCodes::ABORTED, "Cannot commit part because shutdown called");
+
         Coordination::Requests ops;
         size_t num_check_ops;
         getOpsToCheckPartChecksumsAndCommit(zookeeper, part, hardlinked_files, replace_zero_copy_lock, ops, num_check_ops);
@@ -2712,6 +2715,9 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
     /// we can possibly duplicate entries in queue of cloned replica.
     while (true)
     {
+        if (shutdown_called || partial_shutdown_called)
+            throw Exception(ErrorCodes::ABORTED, "Cannot clone replica because shutdown called");
+
         Coordination::Stat log_pointer_stat;
         String raw_log_pointer = zookeeper->get(fs::path(source_path) / "log_pointer", &log_pointer_stat);
 
@@ -3078,6 +3084,9 @@ void StorageReplicatedMergeTree::cloneMetadataIfNeeded(const String & source_rep
     String source_columns;
     while (true)
     {
+        if (shutdown_called || partial_shutdown_called)
+            throw Exception(ErrorCodes::ABORTED, "Cannot clone metadata because shutdown called");
+
         Coordination::Stat metadata_stat;
         Coordination::Stat columns_stat;
         source_metadata = zookeeper->get(source_path + "/metadata", &metadata_stat);
@@ -3907,6 +3916,8 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n
 
     while (true)
     {
+        if (shutdown_called || partial_shutdown_called)
+            throw Exception(ErrorCodes::ABORTED, "Cannot remove part because shutdown called");
 
         Coordination::Requests ops;
 
@@ -4430,6 +4441,9 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id)
 
     while (true)
     {
+        if (shutdown_called || partial_shutdown_called)
+            throw Exception(ErrorCodes::ABORTED, "Cannot clean last part node because shutdown called");
+
         Coordination::Stat added_parts_stat;
         String old_added_parts = zookeeper->get(quorum_last_part_path, &added_parts_stat);
 
@@ -7161,6 +7175,9 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte
     /// Should work well if the number of concurrent mutation requests is small.
     while (true)
     {
+        if (shutdown_called || partial_shutdown_called)
+            throw Exception(ErrorCodes::ABORTED, "Cannot assign mutation because shutdown called");
+
         Coordination::Stat mutations_stat;
         zookeeper->get(mutations_path, &mutations_stat);
 
@@ -8427,6 +8444,9 @@ bool StorageReplicatedMergeTree::dropPartImpl(
 
     while (true)
     {
+        if (shutdown_called || partial_shutdown_called)
+            throw Exception(ErrorCodes::ABORTED, "Cannot drop part because shutdown called");
+
         ReplicatedMergeTreeMergePredicate merge_pred = queue.getMergePredicate(zookeeper, PartitionIdsHint{part_info.partition_id});
 
         auto part = getPartIfExists(part_info, {MergeTreeDataPartState::Active});
@@ -9753,6 +9773,9 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP
 
         while (true)
         {
+            if (shutdown_called || partial_shutdown_called)
+                throw Exception(ErrorCodes::ABORTED, "Cannot create an empty part because shutdown called");
+
             /// We should be careful when creating an empty part, because we are not sure that this part is still needed.
             /// For example, it's possible that part (or partition) was dropped (or replaced) concurrently.
             /// We can enqueue part for check from DataPartExchange or SelectProcessor

From f9895ab37b2133a36296b67b8904d251ffdaf3e4 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 9 Nov 2023 15:56:57 +0000
Subject: [PATCH 0266/1097] Small fixes and add test

---
 src/Backups/BackupCoordinationRemote.cpp      |   5 +-
 src/Backups/RestoreCoordinationRemote.cpp     |   7 +-
 src/Common/ZooKeeper/ZooKeeper.cpp            |   1 +
 src/Storages/StorageKeeperMap.cpp             |  43 +++++--
 src/Storages/StorageKeeperMap.h               |   7 +-
 .../__init__.py                               |   0
 .../configs/backups_disk.xml                  |  13 ++
 .../configs/keeper_map_path_prefix.xml        |   3 +
 .../configs/remote_servers.xml                |  22 ++++
 .../configs/zookeeper_retries.xml             |  11 ++
 .../test_backup_restore_keeper_map/test.py    | 111 ++++++++++++++++++
 .../02911_backup_restore_keeper_map.reference |  13 ++
 .../02911_backup_restore_keeper_map.sh        |  47 ++++++++
 13 files changed, 269 insertions(+), 14 deletions(-)
 create mode 100644 tests/integration/test_backup_restore_keeper_map/__init__.py
 create mode 100644 tests/integration/test_backup_restore_keeper_map/configs/backups_disk.xml
 create mode 100644 tests/integration/test_backup_restore_keeper_map/configs/keeper_map_path_prefix.xml
 create mode 100644 tests/integration/test_backup_restore_keeper_map/configs/remote_servers.xml
 create mode 100644 tests/integration/test_backup_restore_keeper_map/configs/zookeeper_retries.xml
 create mode 100644 tests/integration/test_backup_restore_keeper_map/test.py
 create mode 100644 tests/queries/0_stateless/02911_backup_restore_keeper_map.reference
 create mode 100755 tests/queries/0_stateless/02911_backup_restore_keeper_map.sh

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 309cbc8be6a..064e0599f6e 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -681,7 +681,10 @@ void BackupCoordinationRemote::addKeeperMapTable(const String & table_zookeeper_
     {
         with_retries.renewZooKeeper(zk);
         String path = zookeeper_path + "/keeper_map_tables/" + escapeForFileName(table_id);
-        zk->create(path, fmt::format("{}\n{}", table_zookeeper_root_path, data_path_in_backup), zkutil::CreateMode::Persistent);
+        if (auto res
+            = zk->tryCreate(path, fmt::format("{}\n{}", table_zookeeper_root_path, data_path_in_backup), zkutil::CreateMode::Persistent);
+            res != Coordination::Error::ZOK && res != Coordination::Error::ZNODEEXISTS)
+            throw zkutil::KeeperException(res);
     });
 }
 
diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp
index 12a67d2a55d..1b814c2889e 100644
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@@ -244,9 +244,10 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String &
         {
             with_retries.renewZooKeeper(zk);
 
-            fs::path base_path = fs::path(zookeeper_path) / "keeper_map_tables" / root_zk_path;
-            zk->createAncestors(base_path);
-            std::string restore_lock_path = base_path / "restore_lock";
+            /// we need to remove leading '/' from root_zk_path
+            auto normalized_root_zk_path = std::string_view{root_zk_path}.substr(1);
+            std::string restore_lock_path = fs::path(zookeeper_path) / "keeper_map_tables" / normalized_root_zk_path / "restore_lock";
+            zk->createAncestors(restore_lock_path);
             result = zk->tryCreate(restore_lock_path, "restorelock", zkutil::CreateMode::Persistent) == Coordination::Error::ZOK;
 
             if (result)
diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 436a4e14f14..8a97362aa96 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -385,6 +385,7 @@ void ZooKeeper::createAncestors(const std::string & path)
     size_t last_pos = path.rfind('/');
     if (last_pos == std::string::npos || last_pos == 0)
         return;
+
     std::string current_node = path.substr(0, last_pos);
 
     while (true)
diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp
index 74c1905cd61..15ebc4d92d1 100644
--- a/src/Storages/StorageKeeperMap.cpp
+++ b/src/Storages/StorageKeeperMap.cpp
@@ -15,7 +15,7 @@
 #include <Interpreters/MutationsInterpreter.h>
 
 #include <Compression/CompressedWriteBuffer.h>
-#include <Compression/CompressedReadBuffer.h>
+#include <Compression/CompressedReadBufferFromFile.h>
 
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTExpressionList.h>
@@ -52,6 +52,8 @@
 #include <Backups/RestorerFromBackup.h>
 #include <Backups/WithRetries.h>
 
+#include <Disks/IO/createReadBufferFromFileBase.h>
+
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
 #include <base/types.h>
@@ -824,16 +826,24 @@ void StorageKeeperMap::restoreDataFromBackup(RestorerFromBackup & restorer, cons
             RestorerFromBackup::throwTableIsNotEmpty(getStorageID());
     }
 
-    /// TODO: Should we backup and verify the table structure?
+    auto temp_disk = restorer.getContext()->getGlobalTemporaryVolume()->getDisk(0);
 
-    //auto temp_disk = restorer.getContext()->getGlobalTemporaryVolume()->getDisk(0);
     /// only 1 table should restore data for a single path
     restorer.addDataRestoreTask(
-        [storage = std::static_pointer_cast<StorageKeeperMap>(shared_from_this()), backup, data_path_in_backup, with_retries, allow_non_empty_tables]
-        { storage->restoreDataImpl(backup, data_path_in_backup, with_retries, allow_non_empty_tables); });
+        [storage = std::static_pointer_cast<StorageKeeperMap>(shared_from_this()),
+         backup,
+         data_path_in_backup,
+         with_retries,
+         allow_non_empty_tables,
+         temp_disk] { storage->restoreDataImpl(backup, data_path_in_backup, with_retries, allow_non_empty_tables, temp_disk); });
 }
 
-void StorageKeeperMap::restoreDataImpl(const BackupPtr & backup, const String & data_path_in_backup, std::shared_ptr<WithRetries> with_retries, bool allow_non_empty_tables)
+void StorageKeeperMap::restoreDataImpl(
+    const BackupPtr & backup,
+    const String & data_path_in_backup,
+    std::shared_ptr<WithRetries> with_retries,
+    bool allow_non_empty_tables,
+    const DiskPtr & temporary_disk)
 {
     auto table_id = toString(getStorageID().uuid);
 
@@ -858,7 +868,17 @@ void StorageKeeperMap::restoreDataImpl(const BackupPtr & backup, const String &
 
     /// should we store locally in temp file?
     auto in = backup->readFile(data_file);
-    CompressedReadBuffer compressed_in{*in};
+    std::optional<TemporaryFileOnDisk> temp_data_file;
+    if (!dynamic_cast<ReadBufferFromFileBase *>(in.get()))
+    {
+        temp_data_file.emplace(temporary_disk);
+        auto out = std::make_unique<WriteBufferFromFile>(temp_data_file->getAbsolutePath());
+        copyData(*in, *out);
+        out.reset();
+        in = createReadBufferFromFileBase(temp_data_file->getAbsolutePath(), {});
+    }
+    std::unique_ptr<ReadBufferFromFileBase> in_from_file{static_cast<ReadBufferFromFileBase *>(in.release())};
+    CompressedReadBufferFromFile compressed_in{std::move(in_from_file)};
     fs::path data_path_fs(zk_data_path);
 
     auto max_multi_size = with_retries->getKeeperSettings().batch_size_for_keeper_multi;
@@ -871,7 +891,10 @@ void StorageKeeperMap::restoreDataImpl(const BackupPtr & backup, const String &
         [&, &zk = holder.faulty_zookeeper]()
         {
             with_retries->renewZooKeeper(zk);
-            zk->multi(create_requests);
+            Coordination::Responses create_responses;
+            if (auto res = zk->tryMulti(create_requests, create_responses);
+                res != Coordination::Error::ZOK && res != Coordination::Error::ZNODEEXISTS)
+                throw zkutil::KeeperMultiException(res, create_requests, create_responses);
         });
     };
 
@@ -890,7 +913,9 @@ void StorageKeeperMap::restoreDataImpl(const BackupPtr & backup, const String &
             [&, &zk = holder.faulty_zookeeper]()
             {
                 with_retries->renewZooKeeper(zk);
-                zk->tryCreate(data_path_fs / key, value, zkutil::CreateMode::Persistent);
+                if (auto res = zk->tryCreate(data_path_fs / key, value, zkutil::CreateMode::Persistent);
+                    res != Coordination::Error::ZOK && res != Coordination::Error::ZNODEEXISTS)
+                    throw zkutil::KeeperException::fromPath(res, data_path_fs / key);
             });
         }
         /// otherwise we can do multi requests
diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h
index 94b02ca0242..10eebdd0129 100644
--- a/src/Storages/StorageKeeperMap.h
+++ b/src/Storages/StorageKeeperMap.h
@@ -120,7 +120,12 @@ private:
 
     std::optional<bool> isTableValid() const;
 
-    void restoreDataImpl(const BackupPtr & backup, const String & data_path_in_backup, std::shared_ptr<WithRetries> with_retries, bool allow_non_empty_tables);
+    void restoreDataImpl(
+        const BackupPtr & backup,
+        const String & data_path_in_backup,
+        std::shared_ptr<WithRetries> with_retries,
+        bool allow_non_empty_tables,
+        const DiskPtr & temporary_disk);
 
     std::string zk_root_path;
     std::string primary_key;
diff --git a/tests/integration/test_backup_restore_keeper_map/__init__.py b/tests/integration/test_backup_restore_keeper_map/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_backup_restore_keeper_map/configs/backups_disk.xml b/tests/integration/test_backup_restore_keeper_map/configs/backups_disk.xml
new file mode 100644
index 00000000000..b99a51cd56d
--- /dev/null
+++ b/tests/integration/test_backup_restore_keeper_map/configs/backups_disk.xml
@@ -0,0 +1,13 @@
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <backups>
+                <type>local</type>
+                <path>/backups/</path>
+            </backups>
+        </disks>
+    </storage_configuration>
+    <backups>
+        <allowed_disk>backups</allowed_disk>
+    </backups>
+</clickhouse>
diff --git a/tests/integration/test_backup_restore_keeper_map/configs/keeper_map_path_prefix.xml b/tests/integration/test_backup_restore_keeper_map/configs/keeper_map_path_prefix.xml
new file mode 100644
index 00000000000..91d7b9d3f8f
--- /dev/null
+++ b/tests/integration/test_backup_restore_keeper_map/configs/keeper_map_path_prefix.xml
@@ -0,0 +1,3 @@
+<clickhouse>
+    <keeper_map_path_prefix>/keeper_map_tables</keeper_map_path_prefix>
+</clickhouse>
diff --git a/tests/integration/test_backup_restore_keeper_map/configs/remote_servers.xml b/tests/integration/test_backup_restore_keeper_map/configs/remote_servers.xml
new file mode 100644
index 00000000000..5cf07c69fd6
--- /dev/null
+++ b/tests/integration/test_backup_restore_keeper_map/configs/remote_servers.xml
@@ -0,0 +1,22 @@
+<clickhouse>
+    <remote_servers>
+        <cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node3</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </cluster>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_backup_restore_keeper_map/configs/zookeeper_retries.xml b/tests/integration/test_backup_restore_keeper_map/configs/zookeeper_retries.xml
new file mode 100644
index 00000000000..1283f28a8cb
--- /dev/null
+++ b/tests/integration/test_backup_restore_keeper_map/configs/zookeeper_retries.xml
@@ -0,0 +1,11 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <backup_restore_keeper_max_retries>1000</backup_restore_keeper_max_retries>
+            <backup_restore_keeper_retry_initial_backoff_ms>1</backup_restore_keeper_retry_initial_backoff_ms>
+            <backup_restore_keeper_retry_max_backoff_ms>1</backup_restore_keeper_retry_max_backoff_ms>
+            <backup_restore_keeper_fault_injection_seed>42</backup_restore_keeper_fault_injection_seed>
+            <backup_restore_keeper_fault_injection_probability>0.002</backup_restore_keeper_fault_injection_probability>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_backup_restore_keeper_map/test.py b/tests/integration/test_backup_restore_keeper_map/test.py
new file mode 100644
index 00000000000..95e8a8b3027
--- /dev/null
+++ b/tests/integration/test_backup_restore_keeper_map/test.py
@@ -0,0 +1,111 @@
+from time import sleep
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+
+cluster = ClickHouseCluster(__file__)
+
+main_configs = [
+    "configs/remote_servers.xml",
+    "configs/backups_disk.xml",
+    "configs/keeper_map_path_prefix.xml",
+]
+
+user_configs = [
+    "configs/zookeeper_retries.xml",
+]
+
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=main_configs,
+    user_configs=user_configs,
+    external_dirs=["/backups/"],
+    macros={"replica": "node1", "shard": "shard1"},
+    with_zookeeper=True,
+    stay_alive=True,
+)
+
+node2 = cluster.add_instance(
+    "node2",
+    main_configs=main_configs,
+    user_configs=user_configs,
+    external_dirs=["/backups/"],
+    macros={"replica": "node2", "shard": "shard1"},
+    with_zookeeper=True,
+    stay_alive=True,
+)
+
+
+node3 = cluster.add_instance(
+    "node3",
+    main_configs=main_configs,
+    user_configs=user_configs,
+    external_dirs=["/backups/"],
+    macros={"replica": "node3", "shard": "shard2"},
+    with_zookeeper=True,
+    stay_alive=True,
+)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+backup_id_counter = 0
+
+def new_backup_name(base_name):
+    global backup_id_counter
+    backup_id_counter += 1
+    return f"Disk('backups', '{base_name}{backup_id_counter}')"
+
+def test_on_cluster():
+    node1.query_with_retry("CREATE DATABASE keeper_backup ON CLUSTER cluster")
+    node1.query_with_retry("CREATE TABLE keeper_backup.keeper1 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster1') PRIMARY KEY key")
+    node1.query_with_retry("CREATE TABLE keeper_backup.keeper2 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster1') PRIMARY KEY key")
+    node1.query_with_retry("CREATE TABLE keeper_backup.keeper3 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster2') PRIMARY KEY key")
+    node1.query_with_retry("INSERT INTO keeper_backup.keeper2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5")
+    node1.query_with_retry("INSERT INTO keeper_backup.keeper3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5")
+
+    expected_result = ''.join(f'{i}\ttest{i}\n' for i in range(5))
+
+    def verify_data():
+        for node in [node1, node2, node3]:
+            for i in range(1, 4):
+                result = node.query_with_retry(f'SELECT key, value FROM keeper_backup.keeper{i} ORDER BY key FORMAT TSV')
+                assert result == expected_result
+
+    verify_data()
+
+    backup_name = new_backup_name('test_on_cluster')
+    node1.query(f"BACKUP DATABASE keeper_backup ON CLUSTER cluster TO {backup_name} SETTINGS async = false;")
+
+    node1.query("DROP DATABASE keeper_backup ON CLUSTER cluster SYNC;")
+
+    def apply_for_all_nodes(f):
+        for node in [node1, node2, node3]:
+            f(node)
+
+    def change_keeper_map_prefix(node):
+        node.replace_config(
+            "/etc/clickhouse-server/config.d/keeper_map_path_prefix.xml", """
+<clickhouse>
+    <keeper_map_path_prefix>/different_path/keeper_map</keeper_map_path_prefix>
+</clickhouse>
+""")
+
+    apply_for_all_nodes(lambda node: node.stop_clickhouse())
+    apply_for_all_nodes(change_keeper_map_prefix)
+    apply_for_all_nodes(lambda node: node.start_clickhouse())
+
+    node1.query(f"RESTORE DATABASE keeper_backup ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;")
+    
+    verify_data()
+    
+    node1.query("DROP TABLE keeper_backup.keeper3 ON CLUSTER cluster SYNC;")
+    node1.query(f"RESTORE TABLE keeper_backup.keeper3 ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;")
+
+    verify_data()
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.reference b/tests/queries/0_stateless/02911_backup_restore_keeper_map.reference
new file mode 100644
index 00000000000..e58335de67c
--- /dev/null
+++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.reference
@@ -0,0 +1,13 @@
+5000
+5000
+3000
+OK
+OK
+OK
+5000
+5000
+3000
+OK
+5000
+5000
+3000
diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
new file mode 100755
index 00000000000..6c463beb221
--- /dev/null
+++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -nm -q "
+    DROP DATABASE IF EXISTS 02911_keeper_map;
+    CREATE DATABASE 02911_keeper_map;
+    CREATE TABLE 02911_keeper_map.02911_backup_restore_keeper_map1 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
+    CREATE TABLE 02911_keeper_map.02911_backup_restore_keeper_map2 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
+    CREATE TABLE 02911_keeper_map.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key;
+
+    INSERT INTO 02911_keeper_map.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;
+    INSERT INTO 02911_keeper_map.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;
+"
+
+backup_path="$CLICKHOUSE_DATABASE/02911_keeper_map"
+for i in $(seq 1 3); do
+    $CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map$i;"
+done
+
+$CLICKHOUSE_CLIENT -q "BACKUP DATABASE 02911_keeper_map TO Disk('backups', '$backup_path');" > /dev/null
+
+$CLICKHOUSE_CLIENT -q "DROP DATABASE 02911_keeper_map SYNC;"
+
+for i in $(seq 1 3); do
+    $CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map$i;" 2>&1 | grep -Fq "UNKNOWN_DATABASE" && echo 'OK' || echo 'ERROR'
+done
+
+$CLICKHOUSE_CLIENT -q "RESTORE DATABASE 02911_keeper_map FROM Disk('backups', '$backup_path');" > /dev/null
+
+for i in $(seq 1 3); do
+    $CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map$i;"
+done
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE 02911_keeper_map.02911_backup_restore_keeper_map3 SYNC;"
+
+$CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map3;" 2>&1 | grep -Fq "UNKNOWN_TABLE" && echo 'OK' || echo 'ERROR'
+
+$CLICKHOUSE_CLIENT -q "RESTORE TABLE 02911_keeper_map.02911_backup_restore_keeper_map3 FROM Disk('backups', '$backup_path');" > /dev/null
+
+for i in $(seq 1 3); do
+    $CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map$i;"
+done
+
+$CLICKHOUSE_CLIENT -q "DROP DATABASE 02911_keeper_map SYNC;"
\ No newline at end of file

From ae09b16701624825da053a1b0501c20243e71fd3 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 9 Nov 2023 17:05:11 +0100
Subject: [PATCH 0267/1097] Debug logging

---
 src/Interpreters/Cache/FileCache.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 1a09db7a3f0..ed8cc547fbc 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -395,6 +395,14 @@ void FileCache::fillHolesWithEmptyFileSegments(
         return;
     }
 
+    if (file_segments_limit && file_segments.size() >= file_segments_limit)
+    {
+        std::string res;
+        for (const auto & f : file_segments)
+            res += " - " + f->range().toString();
+        LOG_ERROR(log, "Limit: {}, file segments: {}, added: {}, range: {}, file_segments: {}",
+                  file_segments_limit, file_segments.size(), added, range.toString(), res);
+    }
     chassert(!file_segments_limit || file_segments.size() < file_segments_limit);
 
     if (current_pos <= range.right)

From 188a88fa3391cd2044fce96d00da12c22b654319 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 9 Nov 2023 16:15:14 +0000
Subject: [PATCH 0268/1097] Automatic style fix

---
 .../test_backup_restore_keeper_map/test.py    | 59 +++++++++++++------
 1 file changed, 41 insertions(+), 18 deletions(-)

diff --git a/tests/integration/test_backup_restore_keeper_map/test.py b/tests/integration/test_backup_restore_keeper_map/test.py
index 95e8a8b3027..8343ad3177f 100644
--- a/tests/integration/test_backup_restore_keeper_map/test.py
+++ b/tests/integration/test_backup_restore_keeper_map/test.py
@@ -55,33 +55,50 @@ def start_cluster():
     finally:
         cluster.shutdown()
 
+
 backup_id_counter = 0
 
+
 def new_backup_name(base_name):
     global backup_id_counter
     backup_id_counter += 1
     return f"Disk('backups', '{base_name}{backup_id_counter}')"
 
+
 def test_on_cluster():
     node1.query_with_retry("CREATE DATABASE keeper_backup ON CLUSTER cluster")
-    node1.query_with_retry("CREATE TABLE keeper_backup.keeper1 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster1') PRIMARY KEY key")
-    node1.query_with_retry("CREATE TABLE keeper_backup.keeper2 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster1') PRIMARY KEY key")
-    node1.query_with_retry("CREATE TABLE keeper_backup.keeper3 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster2') PRIMARY KEY key")
-    node1.query_with_retry("INSERT INTO keeper_backup.keeper2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5")
-    node1.query_with_retry("INSERT INTO keeper_backup.keeper3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5")
+    node1.query_with_retry(
+        "CREATE TABLE keeper_backup.keeper1 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster1') PRIMARY KEY key"
+    )
+    node1.query_with_retry(
+        "CREATE TABLE keeper_backup.keeper2 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster1') PRIMARY KEY key"
+    )
+    node1.query_with_retry(
+        "CREATE TABLE keeper_backup.keeper3 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster2') PRIMARY KEY key"
+    )
+    node1.query_with_retry(
+        "INSERT INTO keeper_backup.keeper2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5"
+    )
+    node1.query_with_retry(
+        "INSERT INTO keeper_backup.keeper3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5"
+    )
 
-    expected_result = ''.join(f'{i}\ttest{i}\n' for i in range(5))
+    expected_result = "".join(f"{i}\ttest{i}\n" for i in range(5))
 
     def verify_data():
         for node in [node1, node2, node3]:
             for i in range(1, 4):
-                result = node.query_with_retry(f'SELECT key, value FROM keeper_backup.keeper{i} ORDER BY key FORMAT TSV')
+                result = node.query_with_retry(
+                    f"SELECT key, value FROM keeper_backup.keeper{i} ORDER BY key FORMAT TSV"
+                )
                 assert result == expected_result
 
     verify_data()
 
-    backup_name = new_backup_name('test_on_cluster')
-    node1.query(f"BACKUP DATABASE keeper_backup ON CLUSTER cluster TO {backup_name} SETTINGS async = false;")
+    backup_name = new_backup_name("test_on_cluster")
+    node1.query(
+        f"BACKUP DATABASE keeper_backup ON CLUSTER cluster TO {backup_name} SETTINGS async = false;"
+    )
 
     node1.query("DROP DATABASE keeper_backup ON CLUSTER cluster SYNC;")
 
@@ -91,21 +108,27 @@ def test_on_cluster():
 
     def change_keeper_map_prefix(node):
         node.replace_config(
-            "/etc/clickhouse-server/config.d/keeper_map_path_prefix.xml", """
+            "/etc/clickhouse-server/config.d/keeper_map_path_prefix.xml",
+            """
 <clickhouse>
     <keeper_map_path_prefix>/different_path/keeper_map</keeper_map_path_prefix>
 </clickhouse>
-""")
+""",
+        )
 
     apply_for_all_nodes(lambda node: node.stop_clickhouse())
     apply_for_all_nodes(change_keeper_map_prefix)
     apply_for_all_nodes(lambda node: node.start_clickhouse())
 
-    node1.query(f"RESTORE DATABASE keeper_backup ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;")
-    
-    verify_data()
-    
-    node1.query("DROP TABLE keeper_backup.keeper3 ON CLUSTER cluster SYNC;")
-    node1.query(f"RESTORE TABLE keeper_backup.keeper3 ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;")
+    node1.query(
+        f"RESTORE DATABASE keeper_backup ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;"
+    )
 
-    verify_data()
\ No newline at end of file
+    verify_data()
+
+    node1.query("DROP TABLE keeper_backup.keeper3 ON CLUSTER cluster SYNC;")
+    node1.query(
+        f"RESTORE TABLE keeper_backup.keeper3 ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;"
+    )
+
+    verify_data()

From 65ed229400800e9910c8590153baa7f65ad45c16 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 9 Nov 2023 16:24:17 +0000
Subject: [PATCH 0269/1097] Add a test with MOVE PARTITION and inactive
 replica.

---
 ..._move_partition_inactive_replica.reference |  1 +
 .../02915_move_partition_inactive_replica.sql | 57 +++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 tests/queries/0_stateless/02915_move_partition_inactive_replica.reference
 create mode 100644 tests/queries/0_stateless/02915_move_partition_inactive_replica.sql

diff --git a/tests/queries/0_stateless/02915_move_partition_inactive_replica.reference b/tests/queries/0_stateless/02915_move_partition_inactive_replica.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/02915_move_partition_inactive_replica.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/02915_move_partition_inactive_replica.sql b/tests/queries/0_stateless/02915_move_partition_inactive_replica.sql
new file mode 100644
index 00000000000..3b30a2b6c2c
--- /dev/null
+++ b/tests/queries/0_stateless/02915_move_partition_inactive_replica.sql
@@ -0,0 +1,57 @@
+-- Tags: no-parallel
+
+create database if not exists shard_0;
+create database if not exists shard_1;
+
+drop table if exists shard_0.from_0;
+drop table if exists shard_1.from_0;
+drop table if exists shard_0.from_1;
+drop table if exists shard_1.from_1;
+drop table if exists shard_0.to;
+drop table if exists shard_1.to;
+
+create table shard_0.from_0 (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/from_0_' || currentDatabase(), '0') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1;
+create table shard_1.from_0 (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/from_0_' || currentDatabase(), '1') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1;
+
+create table shard_0.from_1 (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/from_1_' || currentDatabase(), '0') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1;
+create table shard_1.from_1 (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/from_1_' || currentDatabase(), '1') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1;
+
+insert into shard_0.from_0 select number from numbers(10);
+insert into shard_0.from_0 select number + 10 from numbers(10);
+
+insert into shard_0.from_1 select number + 20 from numbers(10);
+insert into shard_0.from_1 select number + 30 from numbers(10);
+
+system sync replica shard_1.from_0;
+system sync replica shard_1.from_1;
+
+
+create table shard_0.to (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/to_' || currentDatabase(), '0') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1;
+
+create table shard_1.to (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/to_' || currentDatabase(), '1') order by x settings old_parts_lifetime=1, max_cleanup_delay_period=1, cleanup_delay_period=1;
+
+detach table shard_1.to;
+
+alter table shard_0.from_0 on cluster test_cluster_two_shards_different_databases move partition tuple() to table shard_0.to format Null settings distributed_ddl_output_mode='never_throw', distributed_ddl_task_timeout = 1;
+
+alter table shard_0.from_1 on cluster test_cluster_two_shards_different_databases move partition tuple() to table shard_0.to format Null settings distributed_ddl_output_mode='never_throw', distributed_ddl_task_timeout = 1;
+
+OPTIMIZE TABLE shard_0.from_0;
+OPTIMIZE TABLE shard_1.from_0;
+OPTIMIZE TABLE shard_0.from_1;
+OPTIMIZE TABLE shard_1.from_1;
+OPTIMIZE TABLE shard_0.to;
+
+system restart replica shard_0.to;
+
+select sleep(2);
+
+attach table shard_1.to;
+
+drop table if exists shard_0.from_0;
+drop table if exists shard_1.from_0;
+drop table if exists shard_0.from_1;
+drop table if exists shard_1.from_1;
+drop table if exists shard_0.to;
+drop table if exists shard_1.to;
+

From 7d206cbc3c0956391c17e348ea6cc3b31caca3dc Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Tue, 17 Oct 2023 15:32:34 +0200
Subject: [PATCH 0270/1097] NamedCollections: prevent fields overriding in
 functions call

Add syntax in SQL and XML to mark specific fields to allow
override or not.
Also add a new setting to control the default behaviour when
overriding support is not specified.
---
 docs/en/operations/named-collections.md       | 45 ++++++++++--
 .../statements/alter/named-collection.md      | 10 +--
 .../statements/create/named-collection.md     |  8 +-
 .../NamedCollectionConfiguration.cpp          | 22 +++++-
 .../NamedCollectionConfiguration.h            |  7 +-
 .../NamedCollections/NamedCollectionUtils.cpp | 20 ++++-
 .../NamedCollections/NamedCollections.cpp     | 29 ++++++++
 .../NamedCollections/NamedCollections.h       |  5 ++
 src/Core/Settings.h                           |  1 +
 src/Parsers/ASTAlterNamedCollectionQuery.cpp  |  3 +
 src/Parsers/ASTAlterNamedCollectionQuery.h    |  1 +
 src/Parsers/ASTCreateNamedCollectionQuery.cpp |  3 +
 src/Parsers/ASTCreateNamedCollectionQuery.h   |  1 +
 .../ParserAlterNamedCollectionQuery.cpp       | 37 ++++++----
 src/Parsers/ParserCreateQuery.cpp             |  8 ++
 src/Storages/NamedCollectionsHelpers.cpp      | 18 ++++-
 tests/config/config.d/named_collection.xml    |  8 ++
 ...02883_named_collections_override.reference | 32 ++++++++
 .../02883_named_collections_override.sql      | 73 +++++++++++++++++++
 19 files changed, 295 insertions(+), 36 deletions(-)
 create mode 100644 tests/queries/0_stateless/02883_named_collections_override.reference
 create mode 100644 tests/queries/0_stateless/02883_named_collections_override.sql

diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md
index 6ed72152c1e..06c05929ffa 100644
--- a/docs/en/operations/named-collections.md
+++ b/docs/en/operations/named-collections.md
@@ -18,7 +18,15 @@ function, table engine, database, etc. In the examples below the parameter list
 linked to for each type.
 
 Parameters set in a named collection can be overridden in SQL, this is shown in the examples
-below.
+below. This ability can be limited using `[NOT] OVERRIDABLE` keywords and XML attributes
+and/or the configuration option `allow_named_collection_override_by_default`.
+
+:::warning
+If override is allowed, it may be possible for users without administrative access to
+figure out the credentials that you are trying to hide.
+If you are using named collections with that purpose, you should disable
+`allow_named_collection_override_by_default` (which is enabled by default).
+:::
 
 ## Storing named collections in the system database
 
@@ -26,11 +34,17 @@ below.
 
 ```sql
 CREATE NAMED COLLECTION name AS
-key_1 = 'value',
-key_2 = 'value2',
+key_1 = 'value' OVERRIDABLE,
+key_2 = 'value2' NOT OVERRIDABLE,
 url = 'https://connection.url/'
 ```
 
+In the above example:
+
+ * `key_1` can always be overridden.
+ * `key_2` can never be overridden.
+ * `url` can be overridden or not depending on the value of `allow_named_collection_override_by_default`.
+
 ### Permissions to create named collections with DDL
 
 To manage named collections with DDL a user must have the `named_control_collection` privilege.  This can be assigned by adding a file to `/etc/clickhouse-server/users.d/`.  The example gives the user `default` both the `access_management` and `named_collection_control` privileges:
@@ -61,25 +75,37 @@ In the above example the `password_sha256_hex` value is the hexadecimal represen
 <clickhouse>
      <named_collections>
         <name>
-            <key_1>value</key_1>
-            <key_2>value_2</key_2>
+            <key_1 overridable="true">value</key_1>
+            <key_2 overridable="false">value_2</key_2>
             <url>https://connection.url/</url>
         </name>
      </named_collections>
 </clickhouse>
 ```
 
+In the above example:
+
+ * `key_1` can always be overridden.
+ * `key_2` can never be overridden.
+ * `url` can be overridden or not depending on the value of `allow_named_collection_override_by_default`.
+
 ## Modifying named collections
 
 Named collections that are created with DDL queries can be altered or dropped with DDL. Named collections created with XML files can be managed by editing or deleting the corresponding XML.
 
 ### Alter a DDL named collection
 
-Change or add the keys `key1` and `key3` of the collection `collection2`:
+Change or add the keys `key1` and `key3` of the collection `collection2`
+(this will not change the value of the `overridable` flag for those keys):
 ```sql
 ALTER NAMED COLLECTION collection2 SET key1=4, key3='value3'
 ```
 
+Change or add the key `key1` and allow it to be always overridden:
+```sql
+ALTER NAMED COLLECTION collection2 SET key1=4 OVERRIDABLE
+```
+
 Remove the key `key2` from `collection2`:
 ```sql
 ALTER NAMED COLLECTION collection2 DELETE key2
@@ -90,6 +116,13 @@ Change or add the key `key1` and delete the key `key3` of the collection `collec
 ALTER NAMED COLLECTION collection2 SET key1=4, DELETE key3
 ```
 
+To force a key to use the default settings for the `overridable` flag, you have to
+remove and re-add the key.
+```sql
+ALTER NAMED COLLECTION collection2 DELETE key1;
+ALTER NAMED COLLECTION collection2 SET key1=4;
+```
+
 ### Drop the DDL named collection `collection2`:
 ```sql
 DROP NAMED COLLECTION collection2
diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md
index ac6752127c1..71d4bfadd9c 100644
--- a/docs/en/sql-reference/statements/alter/named-collection.md
+++ b/docs/en/sql-reference/statements/alter/named-collection.md
@@ -12,9 +12,9 @@ This query intends to modify already existing named collections.
 ```sql
 ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster]
 [ SET
-key_name1 = 'some value',
-key_name2 = 'some value',
-key_name3 = 'some value',
+key_name1 = 'some value' [[NOT] OVERRIDABLE],
+key_name2 = 'some value' [[NOT] OVERRIDABLE],
+key_name3 = 'some value' [[NOT] OVERRIDABLE],
 ... ] |
 [ DELETE key_name4, key_name5, ... ]
 ```
@@ -22,9 +22,9 @@ key_name3 = 'some value',
 **Example**
 
 ```sql
-CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
+CREATE NAMED COLLECTION foobar AS a = '1' NOT OVERRIDABLE, b = '2';
 
-ALTER NAMED COLLECTION foobar SET a = '2', c = '3';
+ALTER NAMED COLLECTION foobar SET a = '2' OVERRIDABLE, c = '3';
 
 ALTER NAMED COLLECTION foobar DELETE b;
 ```
diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md
index 1fc7b11c554..f69fa2e3678 100644
--- a/docs/en/sql-reference/statements/create/named-collection.md
+++ b/docs/en/sql-reference/statements/create/named-collection.md
@@ -11,16 +11,16 @@ Creates a new named collection.
 
 ```sql
 CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS
-key_name1 = 'some value',
-key_name2 = 'some value',
-key_name3 = 'some value',
+key_name1 = 'some value' [[NOT] OVERRIDABLE],
+key_name2 = 'some value' [[NOT] OVERRIDABLE],
+key_name3 = 'some value' [[NOT] OVERRIDABLE],
 ...
 ```
 
 **Example**
 
 ```sql
-CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
+CREATE NAMED COLLECTION foobar AS a = '1', b = '2' OVERRIDABLE;
 ```
 
 **Related statements**
diff --git a/src/Common/NamedCollections/NamedCollectionConfiguration.cpp b/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
index d9bf2349a5a..a49244c62aa 100644
--- a/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
+++ b/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
@@ -123,6 +123,9 @@ template <typename T> void copyConfigValue(
             ErrorCodes::NOT_IMPLEMENTED,
             "Unsupported type in copyConfigValue(). "
             "Supported types are String, UInt64, Int64, Float64");
+    const auto overridable = getOverridable(from_config, from_path);
+    if (overridable)
+        setOverridable(to_config, to_path, *overridable);
 }
 
 void removeConfigValue(
@@ -147,13 +150,16 @@ ConfigurationPtr createEmptyConfiguration(const std::string & root_name)
     return config;
 }
 
-ConfigurationPtr createConfiguration(const std::string & root_name, const SettingsChanges & settings)
+ConfigurationPtr
+createConfiguration(const std::string & root_name, const SettingsChanges & settings, const SettingsChanges & overridability)
 {
     namespace Configuration = NamedCollectionConfiguration;
 
     auto config = Configuration::createEmptyConfiguration(root_name);
     for (const auto & [name, value] : settings)
         Configuration::setConfigValue<String>(*config, name, convertFieldToString(value));
+    for (const auto & [name, value] : overridability)
+        Configuration::setOverridable(*config, name, value.get<bool>());
 
     return config;
 }
@@ -204,6 +210,20 @@ void listKeys(
     listKeys(config, enumerate_paths, result, depth);
 }
 
+std::optional<bool> getOverridable(const Poco::Util::AbstractConfiguration & config, const std::string & path)
+{
+    std::string overridable_path = path + "[@overridable]";
+    if (config.has(overridable_path))
+        return config.getBool(overridable_path);
+    return {};
+}
+
+void setOverridable(Poco::Util::AbstractConfiguration & config, const std::string & path, const bool value)
+{
+    std::string overridable_path = path + "[@overridable]";
+    config.setBool(overridable_path, value);
+}
+
 template String getConfigValue<String>(const Poco::Util::AbstractConfiguration & config,
                                        const std::string & path);
 template UInt64 getConfigValue<UInt64>(const Poco::Util::AbstractConfiguration & config,
diff --git a/src/Common/NamedCollections/NamedCollectionConfiguration.h b/src/Common/NamedCollections/NamedCollectionConfiguration.h
index bde3bf09d77..e5493460f5e 100644
--- a/src/Common/NamedCollections/NamedCollectionConfiguration.h
+++ b/src/Common/NamedCollections/NamedCollectionConfiguration.h
@@ -43,7 +43,8 @@ void removeConfigValue(
     Poco::Util::AbstractConfiguration & config,
     const std::string & path);
 
-ConfigurationPtr createConfiguration(const std::string & root_name, const SettingsChanges & settings);
+ConfigurationPtr
+createConfiguration(const std::string & root_name, const SettingsChanges & settings, const SettingsChanges & overridability);
 
 /// Enumerate keys paths of the config recursively.
 /// E.g. if `enumerate_paths` = {"root.key1"} and config like
@@ -67,6 +68,10 @@ void listKeys(
     std::queue<std::string> enumerate_paths,
     std::set<std::string, std::less<>> & result,
     ssize_t depth);
+
+std::optional<bool> getOverridable(const Poco::Util::AbstractConfiguration & config, const std::string & path);
+
+void setOverridable(Poco::Util::AbstractConfiguration & config, const std::string & path, bool value);
 }
 
 }
diff --git a/src/Common/NamedCollections/NamedCollectionUtils.cpp b/src/Common/NamedCollections/NamedCollectionUtils.cpp
index cab844d6213..dd639402c86 100644
--- a/src/Common/NamedCollections/NamedCollectionUtils.cpp
+++ b/src/Common/NamedCollections/NamedCollectionUtils.cpp
@@ -199,6 +199,12 @@ public:
         for (const auto & [name, value] : create_query.changes)
             result_changes_map.emplace(name, value);
 
+        std::unordered_map<std::string, Field> result_overridability_map;
+        for (const auto & [name, value] : query.overridability)
+            result_overridability_map.emplace(name, value);
+        for (const auto & [name, value] : create_query.overridability)
+            result_overridability_map.emplace(name, value);
+
         for (const auto & delete_key : query.delete_keys)
         {
             auto it = result_changes_map.find(delete_key);
@@ -210,12 +216,20 @@ public:
                     delete_key);
             }
             else
+            {
                 result_changes_map.erase(it);
+                auto it_override = result_overridability_map.find(delete_key);
+                if (it_override != result_overridability_map.end())
+                    result_overridability_map.erase(it_override);
+            }
         }
 
         create_query.changes.clear();
         for (const auto & [name, value] : result_changes_map)
             create_query.changes.emplace_back(name, value);
+        create_query.overridability.clear();
+        for (const auto & [name, value] : result_overridability_map)
+            create_query.overridability.emplace_back(name, value);
 
         writeCreateQueryToMetadata(
             create_query,
@@ -244,8 +258,7 @@ private:
         const ASTCreateNamedCollectionQuery & query)
     {
         const auto & collection_name = query.collection_name;
-        const auto config = NamedCollectionConfiguration::createConfiguration(
-            collection_name, query.changes);
+        const auto config = NamedCollectionConfiguration::createConfiguration(collection_name, query.changes, query.overridability);
 
         std::set<std::string, std::less<>> keys;
         for (const auto & [name, _] : query.changes)
@@ -448,6 +461,9 @@ void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr contex
     for (const auto & [name, value] : query.changes)
         collection->setOrUpdate<String, true>(name, convertFieldToString(value));
 
+    for (const auto & [name, value] : query.overridability)
+        collection->setOverridable<true>(name, value.get<bool>());
+
     for (const auto & key : query.delete_keys)
         collection->remove<true>(key);
 }
diff --git a/src/Common/NamedCollections/NamedCollections.cpp b/src/Common/NamedCollections/NamedCollections.cpp
index 0a0f29a8a82..e24cf4dcc1e 100644
--- a/src/Common/NamedCollections/NamedCollections.cpp
+++ b/src/Common/NamedCollections/NamedCollections.cpp
@@ -223,6 +223,16 @@ public:
             keys.insert(key);
     }
 
+    bool getOverridable(const Key & key, const bool default_value)
+    {
+        const auto overridable = Configuration::getOverridable(*config, key);
+        if (overridable)
+            return *overridable;
+        return default_value;
+    }
+
+    void setOverridable(const Key & key, const bool value) { Configuration::setOverridable(*config, key, value); }
+
     ImplPtr createCopy(const std::string & collection_name_) const
     {
         return create(*config, collection_name_, "", keys);
@@ -414,6 +424,22 @@ template <typename T, bool Locked> void NamedCollection::setOrUpdate(const Key &
     pimpl->set<T>(key, value, true);
 }
 
+bool NamedCollection::getOverridable(const Key & key, bool default_value) const
+{
+    std::lock_guard lock(mutex);
+    return pimpl->getOverridable(key, default_value);
+}
+
+template <bool Locked>
+void NamedCollection::setOverridable(const Key & key, bool value)
+{
+    assertMutable();
+    std::unique_lock lock(mutex, std::defer_lock);
+    if constexpr (!Locked)
+        lock.lock();
+    return pimpl->setOverridable(key, value);
+}
+
 template <bool Locked> void NamedCollection::remove(const Key & key)
 {
     assertMutable();
@@ -519,6 +545,9 @@ template void NamedCollection::setOrUpdate<Float64, true>(const NamedCollection:
 template void NamedCollection::setOrUpdate<Float64, false>(const NamedCollection::Key & key, const Float64 & value);
 template void NamedCollection::setOrUpdate<bool, false>(const NamedCollection::Key & key, const bool & value);
 
+template void NamedCollection::setOverridable<false>(const NamedCollection::Key & key, const bool value);
+template void NamedCollection::setOverridable<true>(const NamedCollection::Key & key, const bool value);
+
 template void NamedCollection::remove<true>(const Key & key);
 template void NamedCollection::remove<false>(const Key & key);
 
diff --git a/src/Common/NamedCollections/NamedCollections.h b/src/Common/NamedCollections/NamedCollections.h
index 4a0f020db21..d95e59415fb 100644
--- a/src/Common/NamedCollections/NamedCollections.h
+++ b/src/Common/NamedCollections/NamedCollections.h
@@ -51,6 +51,11 @@ public:
 
     template <typename T, bool locked = false> void setOrUpdate(const Key & key, const T & value);
 
+    bool getOverridable(const Key & key, bool default_value) const;
+
+    template <bool locked = false>
+    void setOverridable(const Key & key, bool value);
+
     template <bool locked = false> void remove(const Key & key);
 
     MutableNamedCollectionPtr duplicate() const;
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 5c41c0b0829..0c9c1d21852 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -817,6 +817,7 @@ class IColumn;
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \
     M(Bool, print_pretty_type_names, false, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \
     M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \
+    M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0)\
 
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.
diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.cpp b/src/Parsers/ASTAlterNamedCollectionQuery.cpp
index 6363a7306bd..526a1d2332f 100644
--- a/src/Parsers/ASTAlterNamedCollectionQuery.cpp
+++ b/src/Parsers/ASTAlterNamedCollectionQuery.cpp
@@ -35,6 +35,9 @@ void ASTAlterNamedCollectionQuery::formatImpl(const IAST::FormatSettings & setti
                 settings.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value);
             else
                 settings.ostr << " = '[HIDDEN]'";
+            Field override_value;
+            if (overridability.tryGet(change.name, override_value))
+                settings.ostr << " " << (override_value.get<bool>() ? "" : "NOT ") << "OVERRIDABLE";
         }
     }
     if (!delete_keys.empty())
diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.h b/src/Parsers/ASTAlterNamedCollectionQuery.h
index 36cf9c65b81..26966b5c5f3 100644
--- a/src/Parsers/ASTAlterNamedCollectionQuery.h
+++ b/src/Parsers/ASTAlterNamedCollectionQuery.h
@@ -15,6 +15,7 @@ public:
     SettingsChanges changes;
     std::vector<std::string> delete_keys;
     bool if_exists = false;
+    SettingsChanges overridability;
 
     String getID(char) const override { return "AlterNamedCollectionQuery"; }
 
diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.cpp b/src/Parsers/ASTCreateNamedCollectionQuery.cpp
index 45ef8565148..b122fefbbb6 100644
--- a/src/Parsers/ASTCreateNamedCollectionQuery.cpp
+++ b/src/Parsers/ASTCreateNamedCollectionQuery.cpp
@@ -39,6 +39,9 @@ void ASTCreateNamedCollectionQuery::formatImpl(const IAST::FormatSettings & sett
             settings.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value);
         else
             settings.ostr << " = '[HIDDEN]'";
+        Field override_value;
+        if (overridability.tryGet(change.name, override_value))
+            settings.ostr << " " << (override_value.get<bool>() ? "" : "NOT ") << "OVERRIDABLE";
     }
 }
 
diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.h b/src/Parsers/ASTCreateNamedCollectionQuery.h
index 785aeb781b4..aeeba9ce411 100644
--- a/src/Parsers/ASTCreateNamedCollectionQuery.h
+++ b/src/Parsers/ASTCreateNamedCollectionQuery.h
@@ -14,6 +14,7 @@ public:
     std::string collection_name;
     SettingsChanges changes;
     bool if_not_exists = false;
+    SettingsChanges overridability;
 
     String getID(char) const override { return "CreateNamedCollectionQuery"; }
 
diff --git a/src/Parsers/ParserAlterNamedCollectionQuery.cpp b/src/Parsers/ParserAlterNamedCollectionQuery.cpp
index 8fb84f86246..fa064df644a 100644
--- a/src/Parsers/ParserAlterNamedCollectionQuery.cpp
+++ b/src/Parsers/ParserAlterNamedCollectionQuery.cpp
@@ -17,14 +17,16 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod
     ParserKeyword s_on("ON");
     ParserKeyword s_delete("DELETE");
     ParserIdentifier name_p;
-    ParserSetQuery set_p;
+    ParserKeyword s_set("SET");
+    ParserKeyword s_overridable("OVERRIDABLE");
+    ParserKeyword s_not_overridable("NOT OVERRIDABLE");
     ParserToken s_comma(TokenType::Comma);
 
     String cluster_str;
     bool if_exists = false;
 
     ASTPtr collection_name;
-    ASTPtr set;
+
     std::vector<std::string> delete_keys;
 
     if (!s_alter.ignore(pos, expected))
@@ -45,20 +47,27 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod
             return false;
     }
 
-    bool parsed_delete = false;
-    if (!set_p.parse(pos, set, expected))
+    SettingsChanges changes;
+    SettingsChanges overridability;
+    if (s_set.ignore(pos, expected))
     {
-        if (!s_delete.ignore(pos, expected))
-            return false;
+        while (true)
+        {
+            if (!changes.empty() && !s_comma.ignore(pos))
+                break;
 
-        parsed_delete = true;
-    }
-    else if (s_delete.ignore(pos, expected))
-    {
-        parsed_delete = true;
+            changes.push_back(SettingChange{});
+
+            if (!ParserSetQuery::parseNameValuePair(changes.back(), pos, expected))
+                return false;
+            if (s_not_overridable.ignore(pos, expected))
+                overridability.push_back(SettingChange{changes.back().name, false});
+            else if (s_overridable.ignore(pos, expected))
+                overridability.push_back(SettingChange{changes.back().name, true});
+        }
     }
 
-    if (parsed_delete)
+    if (s_delete.ignore(pos, expected))
     {
         while (true)
         {
@@ -78,8 +87,8 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod
     query->collection_name = getIdentifierName(collection_name);
     query->if_exists = if_exists;
     query->cluster = std::move(cluster_str);
-    if (set)
-        query->changes = set->as<ASTSetQuery>()->changes;
+    query->changes = changes;
+    query->overridability = overridability;
     query->delete_keys = delete_keys;
 
     node = query;
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 760ce73cf6f..fa87124a906 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -1517,6 +1517,8 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec
     ParserKeyword s_if_not_exists("IF NOT EXISTS");
     ParserKeyword s_on("ON");
     ParserKeyword s_as("AS");
+    ParserKeyword s_not_overridable("NOT OVERRIDABLE");
+    ParserKeyword s_overridable("OVERRIDABLE");
     ParserIdentifier name_p;
     ParserToken s_comma(TokenType::Comma);
 
@@ -1547,6 +1549,7 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec
         return false;
 
     SettingsChanges changes;
+    SettingsChanges overridability;
 
     while (true)
     {
@@ -1557,6 +1560,10 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec
 
         if (!ParserSetQuery::parseNameValuePair(changes.back(), pos, expected))
             return false;
+        if (s_not_overridable.ignore(pos, expected))
+            overridability.push_back(SettingChange{changes.back().name, false});
+        else if (s_overridable.ignore(pos, expected))
+            overridability.push_back(SettingChange{changes.back().name, true});
     }
 
     auto query = std::make_shared<ASTCreateNamedCollectionQuery>();
@@ -1565,6 +1572,7 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec
     query->if_not_exists = if_not_exists;
     query->changes = changes;
     query->cluster = std::move(cluster_str);
+    query->overridability = overridability;
 
     node = query;
     return true;
diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp
index f301cca92a1..7323752074d 100644
--- a/src/Storages/NamedCollectionsHelpers.cpp
+++ b/src/Storages/NamedCollectionsHelpers.cpp
@@ -92,14 +92,20 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
     if (asts.size() == 1)
         return collection_copy;
 
+    const auto allow_override_by_default = context->getSettings().allow_named_collection_override_by_default;
+
     for (auto * it = std::next(asts.begin()); it != asts.end(); ++it)
     {
         auto value_override = getKeyValueFromAST(*it, /* fallback_to_ast_value */complex_args != nullptr, context);
 
         if (!value_override && !(*it)->as<ASTFunction>())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected key-value argument or function");
-        if (!value_override)
+        if (!value_override && allow_override_by_default)
             continue;
+        else if (!value_override)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed");
+        else if (!collection_copy->getOverridable(value_override->first, allow_override_by_default))
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed for '{}'", value_override->first);
 
         if (const ASTPtr * value = std::get_if<ASTPtr>(&value_override->second))
         {
@@ -108,7 +114,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
         }
 
         const auto & [key, value] = *value_override;
-        collection_copy->setOrUpdate<String>(key, toString(std::get<Field>(value_override->second)));
+        collection_copy->setOrUpdate<String>(key, toString(std::get<Field>(value)));
     }
 
     return collection_copy;
@@ -128,8 +134,14 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
 
     Poco::Util::AbstractConfiguration::Keys keys;
     config.keys(config_prefix, keys);
+    const auto allow_override_by_default = context->getSettings().allow_named_collection_override_by_default;
     for (const auto & key : keys)
-        collection_copy->setOrUpdate<String>(key, config.getString(config_prefix + '.' + key));
+    {
+        if (collection_copy->getOverridable(key, allow_override_by_default))
+            collection_copy->setOrUpdate<String>(key, config.getString(config_prefix + '.' + key));
+        else
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed for '{}'", key);
+    }
 
     return collection_copy;
 }
diff --git a/tests/config/config.d/named_collection.xml b/tests/config/config.d/named_collection.xml
index 5b716a7b8da..01645ccecd5 100644
--- a/tests/config/config.d/named_collection.xml
+++ b/tests/config/config.d/named_collection.xml
@@ -37,5 +37,13 @@
             <access_key_id>test</access_key_id>
             <secret_access_key>testtest</secret_access_key>
         </s3_conn_db>
+        <url_override1>
+            <url overridable="0">http://127.0.0.1:8123?query=select+1</url>
+            <format overridable="1">RawBLOB</format>
+        </url_override1>
+        <url_override2>
+            <url>http://127.0.0.1:8123?query=select+1</url>
+            <format>RawBLOB</format>
+        </url_override2>
     </named_collections>
 </clickhouse>
diff --git a/tests/queries/0_stateless/02883_named_collections_override.reference b/tests/queries/0_stateless/02883_named_collections_override.reference
new file mode 100644
index 00000000000..78b3eeda987
--- /dev/null
+++ b/tests/queries/0_stateless/02883_named_collections_override.reference
@@ -0,0 +1,32 @@
+allow_named_collection_override_by_default=1 u1
+1\n
+1\n
+1
+allow_named_collection_override_by_default=1 u2
+1\n
+1\n
+2\n
+1
+allow_named_collection_override_by_default=0 u1
+1\n
+1
+allow_named_collection_override_by_default=0 u2
+1\n
+Test ALTER
+2\n
+1\n
+Test XML collections
+allow_named_collection_override_by_default=1 url_override1
+1\n
+1\n
+1
+allow_named_collection_override_by_default=1 url_override2
+1\n
+1\n
+2\n
+1
+allow_named_collection_override_by_default=0 url_override1
+1\n
+1
+allow_named_collection_override_by_default=0 url_override2
+1\n
diff --git a/tests/queries/0_stateless/02883_named_collections_override.sql b/tests/queries/0_stateless/02883_named_collections_override.sql
new file mode 100644
index 00000000000..e0a3e7c1911
--- /dev/null
+++ b/tests/queries/0_stateless/02883_named_collections_override.sql
@@ -0,0 +1,73 @@
+DROP NAMED COLLECTION IF EXISTS u1;
+DROP NAMED COLLECTION IF EXISTS u2;
+
+CREATE NAMED COLLECTION u1 AS
+    url = 'http://127.0.0.1:8123?query=select+1' NOT OVERRIDABLE,
+    format = 'RawBLOB' OVERRIDABLE;
+
+CREATE NAMED COLLECTION u2 AS
+    url = 'http://127.0.0.1:8123?query=select+1',
+    format = 'RawBLOB';
+
+SET allow_named_collection_override_by_default=1;
+SELECT 'allow_named_collection_override_by_default=1 u1';
+SELECT * FROM url(u1);
+SELECT * FROM url(u1, headers('Accept'='text/csv; charset=utf-8'));
+SELECT * FROM url(u1, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(u1, format='CSV');
+SELECT 'allow_named_collection_override_by_default=1 u2';
+SELECT * FROM url(u2);
+SELECT * FROM url(u2, headers('Accept'='text/csv; charset=utf-8'));
+SELECT * FROM url(u2, url='http://127.0.0.1:8123?query=select+2');
+SELECT * FROM url(u2, format='CSV');
+
+SET allow_named_collection_override_by_default=0;
+SELECT 'allow_named_collection_override_by_default=0 u1';
+SELECT * FROM url(u1);
+SELECT * FROM url(u1, headers('Accept'='text/csv; charset=utf-8')); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(u1, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(u1, format='CSV');
+SELECT 'allow_named_collection_override_by_default=0 u2';
+SELECT * FROM url(u2);
+SELECT * FROM url(u2, headers('Accept'='text/csv; charset=utf-8')); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(u2, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(u2, format='CSV'); -- { serverError BAD_ARGUMENTS }
+
+SELECT 'Test ALTER';
+
+ALTER NAMED COLLECTION u1 SET
+    url = 'http://127.0.0.1:8123?query=select+2' OVERRIDABLE,
+    format = 'RawBLOB' NOT OVERRIDABLE;
+
+SELECT * FROM url(u1);
+SELECT * FROM url(u1, url='http://127.0.0.1:8123?query=select+1');
+SELECT * FROM url(u1, format='CSV'); -- { serverError BAD_ARGUMENTS }
+
+DROP NAMED COLLECTION IF EXISTS u1;
+DROP NAMED COLLECTION IF EXISTS u2;
+
+SELECT 'Test XML collections';
+
+SET allow_named_collection_override_by_default=1;
+SELECT 'allow_named_collection_override_by_default=1 url_override1';
+SELECT * FROM url(url_override1);
+SELECT * FROM url(url_override1, headers('Accept'='text/csv; charset=utf-8'));
+SELECT * FROM url(url_override1, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(url_override1, format='CSV');
+SELECT 'allow_named_collection_override_by_default=1 url_override2';
+SELECT * FROM url(url_override2);
+SELECT * FROM url(url_override2, headers('Accept'='text/csv; charset=utf-8'));
+SELECT * FROM url(url_override2, url='http://127.0.0.1:8123?query=select+2');
+SELECT * FROM url(url_override2, format='CSV');
+
+SET allow_named_collection_override_by_default=0;
+SELECT 'allow_named_collection_override_by_default=0 url_override1';
+SELECT * FROM url(url_override1);
+SELECT * FROM url(url_override1, headers('Accept'='text/csv; charset=utf-8')); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(url_override1, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(url_override1, format='CSV');
+SELECT 'allow_named_collection_override_by_default=0 url_override2';
+SELECT * FROM url(url_override2);
+SELECT * FROM url(url_override2, headers('Accept'='text/csv; charset=utf-8')); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(url_override2, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(url_override2, format='CSV'); -- { serverError BAD_ARGUMENTS }

From 45e4e122fe65270b57a76483aa83e96c913464ea Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Tue, 31 Oct 2023 16:50:29 +0100
Subject: [PATCH 0271/1097] Update
 src/Common/NamedCollections/NamedCollections.cpp

Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
---
 .../NamedCollections/NamedCollectionConfiguration.cpp     | 4 ++--
 .../NamedCollections/NamedCollectionConfiguration.h       | 2 +-
 src/Common/NamedCollections/NamedCollections.cpp          | 8 ++++----
 src/Common/NamedCollections/NamedCollections.h            | 2 +-
 src/Storages/NamedCollectionsHelpers.cpp                  | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/Common/NamedCollections/NamedCollectionConfiguration.cpp b/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
index a49244c62aa..0cef6cc712e 100644
--- a/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
+++ b/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
@@ -123,7 +123,7 @@ template <typename T> void copyConfigValue(
             ErrorCodes::NOT_IMPLEMENTED,
             "Unsupported type in copyConfigValue(). "
             "Supported types are String, UInt64, Int64, Float64");
-    const auto overridable = getOverridable(from_config, from_path);
+    const auto overridable = isOverridable(from_config, from_path);
     if (overridable)
         setOverridable(to_config, to_path, *overridable);
 }
@@ -210,7 +210,7 @@ void listKeys(
     listKeys(config, enumerate_paths, result, depth);
 }
 
-std::optional<bool> getOverridable(const Poco::Util::AbstractConfiguration & config, const std::string & path)
+std::optional<bool> isOverridable(const Poco::Util::AbstractConfiguration & config, const std::string & path)
 {
     std::string overridable_path = path + "[@overridable]";
     if (config.has(overridable_path))
diff --git a/src/Common/NamedCollections/NamedCollectionConfiguration.h b/src/Common/NamedCollections/NamedCollectionConfiguration.h
index e5493460f5e..0bfc1af45d4 100644
--- a/src/Common/NamedCollections/NamedCollectionConfiguration.h
+++ b/src/Common/NamedCollections/NamedCollectionConfiguration.h
@@ -69,7 +69,7 @@ void listKeys(
     std::set<std::string, std::less<>> & result,
     ssize_t depth);
 
-std::optional<bool> getOverridable(const Poco::Util::AbstractConfiguration & config, const std::string & path);
+std::optional<bool> isOverridable(const Poco::Util::AbstractConfiguration & config, const std::string & path);
 
 void setOverridable(Poco::Util::AbstractConfiguration & config, const std::string & path, bool value);
 }
diff --git a/src/Common/NamedCollections/NamedCollections.cpp b/src/Common/NamedCollections/NamedCollections.cpp
index e24cf4dcc1e..2c6acaa2c0e 100644
--- a/src/Common/NamedCollections/NamedCollections.cpp
+++ b/src/Common/NamedCollections/NamedCollections.cpp
@@ -223,9 +223,9 @@ public:
             keys.insert(key);
     }
 
-    bool getOverridable(const Key & key, const bool default_value)
+    bool isOverridable(const Key & key, const bool default_value)
     {
-        const auto overridable = Configuration::getOverridable(*config, key);
+        const auto overridable = Configuration::isOverridable(*config, key);
         if (overridable)
             return *overridable;
         return default_value;
@@ -424,10 +424,10 @@ template <typename T, bool Locked> void NamedCollection::setOrUpdate(const Key &
     pimpl->set<T>(key, value, true);
 }
 
-bool NamedCollection::getOverridable(const Key & key, bool default_value) const
+bool NamedCollection::isOverridable(const Key & key, bool default_value) const
 {
     std::lock_guard lock(mutex);
-    return pimpl->getOverridable(key, default_value);
+    return pimpl->isOverridable(key, default_value);
 }
 
 template <bool Locked>
diff --git a/src/Common/NamedCollections/NamedCollections.h b/src/Common/NamedCollections/NamedCollections.h
index d95e59415fb..76ac1d9d3ed 100644
--- a/src/Common/NamedCollections/NamedCollections.h
+++ b/src/Common/NamedCollections/NamedCollections.h
@@ -51,7 +51,7 @@ public:
 
     template <typename T, bool locked = false> void setOrUpdate(const Key & key, const T & value);
 
-    bool getOverridable(const Key & key, bool default_value) const;
+    bool isOverridable(const Key & key, bool default_value) const;
 
     template <bool locked = false>
     void setOverridable(const Key & key, bool value);
diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp
index 7323752074d..a4d9311cdf2 100644
--- a/src/Storages/NamedCollectionsHelpers.cpp
+++ b/src/Storages/NamedCollectionsHelpers.cpp
@@ -104,7 +104,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
             continue;
         else if (!value_override)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed");
-        else if (!collection_copy->getOverridable(value_override->first, allow_override_by_default))
+        else if (!collection_copy->isOverridable(value_override->first, allow_override_by_default))
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed for '{}'", value_override->first);
 
         if (const ASTPtr * value = std::get_if<ASTPtr>(&value_override->second))
@@ -137,7 +137,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
     const auto allow_override_by_default = context->getSettings().allow_named_collection_override_by_default;
     for (const auto & key : keys)
     {
-        if (collection_copy->getOverridable(key, allow_override_by_default))
+        if (collection_copy->isOverridable(key, allow_override_by_default))
             collection_copy->setOrUpdate<String>(key, config.getString(config_prefix + '.' + key));
         else
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed for '{}'", key);

From 52e49f4ce36e61f21bdb1555151b29a148be6230 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Tue, 31 Oct 2023 18:16:39 +0100
Subject: [PATCH 0272/1097] Add explanatory comments

---
 src/Common/NamedCollections/NamedCollectionConfiguration.cpp | 2 ++
 src/Storages/NamedCollectionsHelpers.cpp                     | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Common/NamedCollections/NamedCollectionConfiguration.cpp b/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
index 0cef6cc712e..4f94a858123 100644
--- a/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
+++ b/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
@@ -212,6 +212,8 @@ void listKeys(
 
 std::optional<bool> isOverridable(const Poco::Util::AbstractConfiguration & config, const std::string & path)
 {
+    // XPath syntax to access path's attribute 'overridable'
+    // e.g. <url overridable=1>...</url>
     std::string overridable_path = path + "[@overridable]";
     if (config.has(overridable_path))
         return config.getBool(overridable_path);
diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp
index a4d9311cdf2..905de9719ab 100644
--- a/src/Storages/NamedCollectionsHelpers.cpp
+++ b/src/Storages/NamedCollectionsHelpers.cpp
@@ -102,7 +102,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected key-value argument or function");
         if (!value_override && allow_override_by_default)
             continue;
-        else if (!value_override)
+        else if (!value_override) // if allow_override_by_default is false we don't allow extra arguments
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed");
         else if (!collection_copy->isOverridable(value_override->first, allow_override_by_default))
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed for '{}'", value_override->first);

From 503f96b6bc05a7764753ed646995bc833bee26a5 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Thu, 2 Nov 2023 15:57:57 +0100
Subject: [PATCH 0273/1097] Fix tests

---
 .../02883_named_collections_override.sql       | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/02883_named_collections_override.sql b/tests/queries/0_stateless/02883_named_collections_override.sql
index e0a3e7c1911..4cb4e77d75b 100644
--- a/tests/queries/0_stateless/02883_named_collections_override.sql
+++ b/tests/queries/0_stateless/02883_named_collections_override.sql
@@ -1,11 +1,12 @@
 DROP NAMED COLLECTION IF EXISTS u1;
 DROP NAMED COLLECTION IF EXISTS u2;
+DROP NAMED COLLECTION IF EXISTS u3;
 
-CREATE NAMED COLLECTION u1 AS
+CREATE NAMED COLLECTION IF NOT EXISTS u1 AS
     url = 'http://127.0.0.1:8123?query=select+1' NOT OVERRIDABLE,
     format = 'RawBLOB' OVERRIDABLE;
 
-CREATE NAMED COLLECTION u2 AS
+CREATE NAMED COLLECTION IF NOT EXISTS u2 AS
     url = 'http://127.0.0.1:8123?query=select+1',
     format = 'RawBLOB';
 
@@ -35,16 +36,21 @@ SELECT * FROM url(u2, format='CSV'); -- { serverError BAD_ARGUMENTS }
 
 SELECT 'Test ALTER';
 
-ALTER NAMED COLLECTION u1 SET
+CREATE NAMED COLLECTION IF NOT EXISTS u3 AS
+    url = 'http://127.0.0.1:8123?query=select+1' NOT OVERRIDABLE,
+    format = 'RawBLOB' OVERRIDABLE;
+
+ALTER NAMED COLLECTION u3 SET
     url = 'http://127.0.0.1:8123?query=select+2' OVERRIDABLE,
     format = 'RawBLOB' NOT OVERRIDABLE;
 
-SELECT * FROM url(u1);
-SELECT * FROM url(u1, url='http://127.0.0.1:8123?query=select+1');
-SELECT * FROM url(u1, format='CSV'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url(u3);
+SELECT * FROM url(u3, url='http://127.0.0.1:8123?query=select+1');
+SELECT * FROM url(u3, format='CSV'); -- { serverError BAD_ARGUMENTS }
 
 DROP NAMED COLLECTION IF EXISTS u1;
 DROP NAMED COLLECTION IF EXISTS u2;
+DROP NAMED COLLECTION IF EXISTS u2;
 
 SELECT 'Test XML collections';
 

From b8e478195fc11c5dc65b1979519b2758cefda67e Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Fri, 3 Nov 2023 09:27:37 +0100
Subject: [PATCH 0274/1097] Simplify code

---
 src/Parsers/ParserAlterNamedCollectionQuery.cpp |  4 ++--
 src/Parsers/ParserCreateQuery.cpp               |  4 ++--
 src/Storages/NamedCollectionsHelpers.cpp        | 13 ++++++++-----
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/Parsers/ParserAlterNamedCollectionQuery.cpp b/src/Parsers/ParserAlterNamedCollectionQuery.cpp
index fa064df644a..cecc2bb0387 100644
--- a/src/Parsers/ParserAlterNamedCollectionQuery.cpp
+++ b/src/Parsers/ParserAlterNamedCollectionQuery.cpp
@@ -61,9 +61,9 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod
             if (!ParserSetQuery::parseNameValuePair(changes.back(), pos, expected))
                 return false;
             if (s_not_overridable.ignore(pos, expected))
-                overridability.push_back(SettingChange{changes.back().name, false});
+                overridability.emplace_back(changes.back().name, false);
             else if (s_overridable.ignore(pos, expected))
-                overridability.push_back(SettingChange{changes.back().name, true});
+                overridability.emplace_back(changes.back().name, true);
         }
     }
 
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index fa87124a906..c1a64cb16f2 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -1561,9 +1561,9 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec
         if (!ParserSetQuery::parseNameValuePair(changes.back(), pos, expected))
             return false;
         if (s_not_overridable.ignore(pos, expected))
-            overridability.push_back(SettingChange{changes.back().name, false});
+            overridability.emplace_back(changes.back().name, false);
         else if (s_overridable.ignore(pos, expected))
-            overridability.push_back(SettingChange{changes.back().name, true});
+            overridability.emplace_back(changes.back().name, true);
     }
 
     auto query = std::make_shared<ASTCreateNamedCollectionQuery>();
diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp
index 905de9719ab..fea3d27b839 100644
--- a/src/Storages/NamedCollectionsHelpers.cpp
+++ b/src/Storages/NamedCollectionsHelpers.cpp
@@ -98,12 +98,15 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
     {
         auto value_override = getKeyValueFromAST(*it, /* fallback_to_ast_value */complex_args != nullptr, context);
 
-        if (!value_override && !(*it)->as<ASTFunction>())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected key-value argument or function");
-        if (!value_override && allow_override_by_default)
-            continue;
-        else if (!value_override) // if allow_override_by_default is false we don't allow extra arguments
+        if (!value_override)
+        {
+            if(!(*it)->as<ASTFunction>())
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected key-value argument or function");
+            if (allow_override_by_default)
+                continue;
+            // if allow_override_by_default is false we don't allow extra arguments
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed");
+        }
         else if (!collection_copy->isOverridable(value_override->first, allow_override_by_default))
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed for '{}'", value_override->first);
 

From 1174689f8b5a542535ff0389ab5ac3f60673e37c Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Wed, 8 Nov 2023 17:36:20 +0100
Subject: [PATCH 0275/1097] Refactoring

---
 .../NamedCollectionConfiguration.cpp          | 61 +++++++++----
 .../NamedCollectionConfiguration.h            | 15 ++--
 .../NamedCollections/NamedCollectionUtils.cpp | 17 ++--
 .../NamedCollections/NamedCollections.cpp     | 90 ++++++++++---------
 .../NamedCollections/NamedCollections.h       |  9 +-
 src/Common/tests/gtest_named_collections.cpp  |  4 +-
 src/Parsers/ASTAlterNamedCollectionQuery.cpp  |  6 +-
 src/Parsers/ASTAlterNamedCollectionQuery.h    |  2 +-
 src/Parsers/ASTCreateNamedCollectionQuery.cpp |  6 +-
 src/Parsers/ASTCreateNamedCollectionQuery.h   |  2 +-
 .../ParserAlterNamedCollectionQuery.cpp       |  8 +-
 src/Parsers/ParserCreateQuery.cpp             |  6 +-
 src/Storages/NamedCollectionsHelpers.cpp      |  4 +-
 13 files changed, 133 insertions(+), 97 deletions(-)

diff --git a/src/Common/NamedCollections/NamedCollectionConfiguration.cpp b/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
index 4f94a858123..2bb0dcd9b69 100644
--- a/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
+++ b/src/Common/NamedCollections/NamedCollectionConfiguration.cpp
@@ -18,6 +18,8 @@ namespace ErrorCodes
 namespace NamedCollectionConfiguration
 {
 
+void setOverridable(Poco::Util::AbstractConfiguration & config, const std::string & path, bool value);
+
 bool hasConfigValue(
     const Poco::Util::AbstractConfiguration & config,
     const std::string & path)
@@ -72,11 +74,13 @@ template <typename T> T getConfigValueOrDefault(
     }
 }
 
-template<typename T> void setConfigValue(
+template <typename T>
+void setConfigValue(
     Poco::Util::AbstractConfiguration & config,
     const std::string & path,
     const T & value,
-    bool update)
+    bool update,
+    const std::optional<bool> is_overridable)
 {
     if (!update && config.has(path))
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key `{}` already exists", path);
@@ -96,6 +100,8 @@ template<typename T> void setConfigValue(
             ErrorCodes::NOT_IMPLEMENTED,
             "Unsupported type in setConfigValue(). "
             "Supported types are String, UInt64, Int64, Float64, bool");
+    if (is_overridable)
+        setOverridable(config, path, *is_overridable);
 }
 
 template <typename T> void copyConfigValue(
@@ -150,16 +156,19 @@ ConfigurationPtr createEmptyConfiguration(const std::string & root_name)
     return config;
 }
 
-ConfigurationPtr
-createConfiguration(const std::string & root_name, const SettingsChanges & settings, const SettingsChanges & overridability)
+ConfigurationPtr createConfiguration(
+    const std::string & root_name, const SettingsChanges & settings, const std::unordered_map<String, bool> & overridability)
 {
     namespace Configuration = NamedCollectionConfiguration;
 
     auto config = Configuration::createEmptyConfiguration(root_name);
     for (const auto & [name, value] : settings)
+    {
         Configuration::setConfigValue<String>(*config, name, convertFieldToString(value));
-    for (const auto & [name, value] : overridability)
-        Configuration::setOverridable(*config, name, value.get<bool>());
+        auto ovalue = overridability.find(name);
+        if (ovalue != overridability.end())
+            Configuration::setOverridable(*config, name, ovalue->second);
+    }
 
     return config;
 }
@@ -248,16 +257,36 @@ template Float64 getConfigValueOrDefault<Float64>(const Poco::Util::AbstractConf
 template bool getConfigValueOrDefault<bool>(const Poco::Util::AbstractConfiguration & config,
                                             const std::string & path, const bool * default_value);
 
-template void setConfigValue<String>(Poco::Util::AbstractConfiguration & config,
-                                     const std::string & path, const String & value, bool update);
-template void setConfigValue<UInt64>(Poco::Util::AbstractConfiguration & config,
-                                     const std::string & path, const UInt64 & value, bool update);
-template void setConfigValue<Int64>(Poco::Util::AbstractConfiguration & config,
-                                    const std::string & path, const Int64 & value, bool update);
-template void setConfigValue<Float64>(Poco::Util::AbstractConfiguration & config,
-                                      const std::string & path, const Float64 & value, bool update);
-template void setConfigValue<bool>(Poco::Util::AbstractConfiguration & config,
-                                   const std::string & path, const bool & value, bool update);
+template void setConfigValue<String>(
+    Poco::Util::AbstractConfiguration & config,
+    const std::string & path,
+    const String & value,
+    bool update,
+    const std::optional<bool> is_overridable);
+template void setConfigValue<UInt64>(
+    Poco::Util::AbstractConfiguration & config,
+    const std::string & path,
+    const UInt64 & value,
+    bool update,
+    const std::optional<bool> is_overridable);
+template void setConfigValue<Int64>(
+    Poco::Util::AbstractConfiguration & config,
+    const std::string & path,
+    const Int64 & value,
+    bool update,
+    const std::optional<bool> is_overridable);
+template void setConfigValue<Float64>(
+    Poco::Util::AbstractConfiguration & config,
+    const std::string & path,
+    const Float64 & value,
+    bool update,
+    const std::optional<bool> is_overridable);
+template void setConfigValue<bool>(
+    Poco::Util::AbstractConfiguration & config,
+    const std::string & path,
+    const bool & value,
+    bool update,
+    const std::optional<bool> is_overridable);
 
 template void copyConfigValue<String>(const Poco::Util::AbstractConfiguration & from_config, const std::string & from_path,
                                       Poco::Util::AbstractConfiguration & to_config, const std::string & to_path);
diff --git a/src/Common/NamedCollections/NamedCollectionConfiguration.h b/src/Common/NamedCollections/NamedCollectionConfiguration.h
index 0bfc1af45d4..c162e047349 100644
--- a/src/Common/NamedCollections/NamedCollectionConfiguration.h
+++ b/src/Common/NamedCollections/NamedCollectionConfiguration.h
@@ -1,7 +1,8 @@
 #pragma once
-#include <Poco/Util/AbstractConfiguration.h>
 #include <queue>
 #include <set>
+#include <Core/Field.h>
+#include <Poco/Util/AbstractConfiguration.h>
 
 namespace DB
 {
@@ -27,11 +28,13 @@ template <typename T> T getConfigValueOrDefault(
     const std::string & path,
     const T * default_value = nullptr);
 
-template<typename T> void setConfigValue(
+template <typename T>
+void setConfigValue(
     Poco::Util::AbstractConfiguration & config,
     const std::string & path,
     const T & value,
-    bool update = false);
+    bool update = false,
+    std::optional<bool> is_overridable = {});
 
 template <typename T> void copyConfigValue(
     const Poco::Util::AbstractConfiguration & from_config,
@@ -43,8 +46,8 @@ void removeConfigValue(
     Poco::Util::AbstractConfiguration & config,
     const std::string & path);
 
-ConfigurationPtr
-createConfiguration(const std::string & root_name, const SettingsChanges & settings, const SettingsChanges & overridability);
+ConfigurationPtr createConfiguration(
+    const std::string & root_name, const SettingsChanges & settings, const std::unordered_map<std::string, bool> & overridability);
 
 /// Enumerate keys paths of the config recursively.
 /// E.g. if `enumerate_paths` = {"root.key1"} and config like
@@ -70,8 +73,6 @@ void listKeys(
     ssize_t depth);
 
 std::optional<bool> isOverridable(const Poco::Util::AbstractConfiguration & config, const std::string & path);
-
-void setOverridable(Poco::Util::AbstractConfiguration & config, const std::string & path, bool value);
 }
 
 }
diff --git a/src/Common/NamedCollections/NamedCollectionUtils.cpp b/src/Common/NamedCollections/NamedCollectionUtils.cpp
index dd639402c86..9a28a9fc230 100644
--- a/src/Common/NamedCollections/NamedCollectionUtils.cpp
+++ b/src/Common/NamedCollections/NamedCollectionUtils.cpp
@@ -199,7 +199,7 @@ public:
         for (const auto & [name, value] : create_query.changes)
             result_changes_map.emplace(name, value);
 
-        std::unordered_map<std::string, Field> result_overridability_map;
+        std::unordered_map<std::string, bool> result_overridability_map;
         for (const auto & [name, value] : query.overridability)
             result_overridability_map.emplace(name, value);
         for (const auto & [name, value] : create_query.overridability)
@@ -227,9 +227,7 @@ public:
         create_query.changes.clear();
         for (const auto & [name, value] : result_changes_map)
             create_query.changes.emplace_back(name, value);
-        create_query.overridability.clear();
-        for (const auto & [name, value] : result_overridability_map)
-            create_query.overridability.emplace_back(name, value);
+        create_query.overridability = std::move(result_overridability_map);
 
         writeCreateQueryToMetadata(
             create_query,
@@ -459,10 +457,13 @@ void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr contex
     auto collection_lock = collection->lock();
 
     for (const auto & [name, value] : query.changes)
-        collection->setOrUpdate<String, true>(name, convertFieldToString(value));
-
-    for (const auto & [name, value] : query.overridability)
-        collection->setOverridable<true>(name, value.get<bool>());
+    {
+        auto it_override = query.overridability.find(name);
+        if (it_override != query.overridability.end())
+            collection->setOrUpdate<String, true>(name, convertFieldToString(value), it_override->second);
+        else
+            collection->setOrUpdate<String, true>(name, convertFieldToString(value), {});
+    }
 
     for (const auto & key : query.delete_keys)
         collection->remove<true>(key);
diff --git a/src/Common/NamedCollections/NamedCollections.cpp b/src/Common/NamedCollections/NamedCollections.cpp
index 2c6acaa2c0e..5b377e94b21 100644
--- a/src/Common/NamedCollections/NamedCollections.cpp
+++ b/src/Common/NamedCollections/NamedCollections.cpp
@@ -216,23 +216,22 @@ public:
         return Configuration::getConfigValueOrDefault<T>(*config, key, &default_value);
     }
 
-    template <typename T> void set(const Key & key, const T & value, bool update_if_exists)
+    template <typename T>
+    void set(const Key & key, const T & value, bool update_if_exists, const std::optional<bool> is_overridable)
     {
-        Configuration::setConfigValue<T>(*config, key, value, update_if_exists);
+        Configuration::setConfigValue<T>(*config, key, value, update_if_exists, is_overridable);
         if (!keys.contains(key))
             keys.insert(key);
     }
 
     bool isOverridable(const Key & key, const bool default_value)
     {
-        const auto overridable = Configuration::isOverridable(*config, key);
-        if (overridable)
-            return *overridable;
+        const auto is_overridable = Configuration::isOverridable(*config, key);
+        if (is_overridable)
+            return *is_overridable;
         return default_value;
     }
 
-    void setOverridable(const Key & key, const bool value) { Configuration::setOverridable(*config, key, value); }
-
     ImplPtr createCopy(const std::string & collection_name_) const
     {
         return create(*config, collection_name_, "", keys);
@@ -406,22 +405,24 @@ template <typename T> T NamedCollection::getAnyOrDefault(const std::initializer_
     return default_value;
 }
 
-template <typename T, bool Locked> void NamedCollection::set(const Key & key, const T & value)
+template <typename T, bool Locked>
+void NamedCollection::set(const Key & key, const T & value, const std::optional<bool> is_overridable)
 {
     assertMutable();
     std::unique_lock lock(mutex, std::defer_lock);
     if constexpr (!Locked)
         lock.lock();
-    pimpl->set<T>(key, value, false);
+    pimpl->set<T>(key, value, false, is_overridable);
 }
 
-template <typename T, bool Locked> void NamedCollection::setOrUpdate(const Key & key, const T & value)
+template <typename T, bool Locked>
+void NamedCollection::setOrUpdate(const Key & key, const T & value, const std::optional<bool> is_overridable)
 {
     assertMutable();
     std::unique_lock lock(mutex, std::defer_lock);
     if constexpr (!Locked)
         lock.lock();
-    pimpl->set<T>(key, value, true);
+    pimpl->set<T>(key, value, true, is_overridable);
 }
 
 bool NamedCollection::isOverridable(const Key & key, bool default_value) const
@@ -430,16 +431,6 @@ bool NamedCollection::isOverridable(const Key & key, bool default_value) const
     return pimpl->isOverridable(key, default_value);
 }
 
-template <bool Locked>
-void NamedCollection::setOverridable(const Key & key, bool value)
-{
-    assertMutable();
-    std::unique_lock lock(mutex, std::defer_lock);
-    if constexpr (!Locked)
-        lock.lock();
-    return pimpl->setOverridable(key, value);
-}
-
 template <bool Locked> void NamedCollection::remove(const Key & key)
 {
     assertMutable();
@@ -525,28 +516,43 @@ template Int64 NamedCollection::getAnyOrDefault<Int64>(const std::initializer_li
 template Float64 NamedCollection::getAnyOrDefault<Float64>(const std::initializer_list<NamedCollection::Key> & key, const Float64 & default_value) const;
 template bool NamedCollection::getAnyOrDefault<bool>(const std::initializer_list<NamedCollection::Key> & key, const bool & default_value) const;
 
-template void NamedCollection::set<String, true>(const NamedCollection::Key & key, const String & value);
-template void NamedCollection::set<String, false>(const NamedCollection::Key & key, const String & value);
-template void NamedCollection::set<UInt64, true>(const NamedCollection::Key & key, const UInt64 & value);
-template void NamedCollection::set<UInt64, false>(const NamedCollection::Key & key, const UInt64 & value);
-template void NamedCollection::set<Int64, true>(const NamedCollection::Key & key, const Int64 & value);
-template void NamedCollection::set<Int64, false>(const NamedCollection::Key & key, const Int64 & value);
-template void NamedCollection::set<Float64, true>(const NamedCollection::Key & key, const Float64 & value);
-template void NamedCollection::set<Float64, false>(const NamedCollection::Key & key, const Float64 & value);
-template void NamedCollection::set<bool, false>(const NamedCollection::Key & key, const bool & value);
+template void
+NamedCollection::set<String, true>(const NamedCollection::Key & key, const String & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::set<String, false>(const NamedCollection::Key & key, const String & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::set<UInt64, true>(const NamedCollection::Key & key, const UInt64 & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::set<UInt64, false>(const NamedCollection::Key & key, const UInt64 & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::set<Int64, true>(const NamedCollection::Key & key, const Int64 & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::set<Int64, false>(const NamedCollection::Key & key, const Int64 & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::set<Float64, true>(const NamedCollection::Key & key, const Float64 & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::set<Float64, false>(const NamedCollection::Key & key, const Float64 & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::set<bool, false>(const NamedCollection::Key & key, const bool & value, const std::optional<bool> is_overridable);
 
-template void NamedCollection::setOrUpdate<String, true>(const NamedCollection::Key & key, const String & value);
-template void NamedCollection::setOrUpdate<String, false>(const NamedCollection::Key & key, const String & value);
-template void NamedCollection::setOrUpdate<UInt64, true>(const NamedCollection::Key & key, const UInt64 & value);
-template void NamedCollection::setOrUpdate<UInt64, false>(const NamedCollection::Key & key, const UInt64 & value);
-template void NamedCollection::setOrUpdate<Int64, true>(const NamedCollection::Key & key, const Int64 & value);
-template void NamedCollection::setOrUpdate<Int64, false>(const NamedCollection::Key & key, const Int64 & value);
-template void NamedCollection::setOrUpdate<Float64, true>(const NamedCollection::Key & key, const Float64 & value);
-template void NamedCollection::setOrUpdate<Float64, false>(const NamedCollection::Key & key, const Float64 & value);
-template void NamedCollection::setOrUpdate<bool, false>(const NamedCollection::Key & key, const bool & value);
-
-template void NamedCollection::setOverridable<false>(const NamedCollection::Key & key, const bool value);
-template void NamedCollection::setOverridable<true>(const NamedCollection::Key & key, const bool value);
+template void NamedCollection::setOrUpdate<String, true>(
+    const NamedCollection::Key & key, const String & value, const std::optional<bool> is_overridable);
+template void NamedCollection::setOrUpdate<String, false>(
+    const NamedCollection::Key & key, const String & value, const std::optional<bool> is_overridable);
+template void NamedCollection::setOrUpdate<UInt64, true>(
+    const NamedCollection::Key & key, const UInt64 & value, const std::optional<bool> is_overridable);
+template void NamedCollection::setOrUpdate<UInt64, false>(
+    const NamedCollection::Key & key, const UInt64 & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::setOrUpdate<Int64, true>(const NamedCollection::Key & key, const Int64 & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::setOrUpdate<Int64, false>(const NamedCollection::Key & key, const Int64 & value, const std::optional<bool> is_overridable);
+template void NamedCollection::setOrUpdate<Float64, true>(
+    const NamedCollection::Key & key, const Float64 & value, const std::optional<bool> is_overridable);
+template void NamedCollection::setOrUpdate<Float64, false>(
+    const NamedCollection::Key & key, const Float64 & value, const std::optional<bool> is_overridable);
+template void
+NamedCollection::setOrUpdate<bool, false>(const NamedCollection::Key & key, const bool & value, const std::optional<bool> is_overridable);
 
 template void NamedCollection::remove<true>(const Key & key);
 template void NamedCollection::remove<false>(const Key & key);
diff --git a/src/Common/NamedCollections/NamedCollections.h b/src/Common/NamedCollections/NamedCollections.h
index 76ac1d9d3ed..de27f4e6083 100644
--- a/src/Common/NamedCollections/NamedCollections.h
+++ b/src/Common/NamedCollections/NamedCollections.h
@@ -47,15 +47,14 @@ public:
 
     std::unique_lock<std::mutex> lock();
 
-    template <typename T, bool locked = false> void set(const Key & key, const T & value);
+    template <typename T, bool locked = false>
+    void set(const Key & key, const T & value, std::optional<bool> is_overridable);
 
-    template <typename T, bool locked = false> void setOrUpdate(const Key & key, const T & value);
+    template <typename T, bool locked = false>
+    void setOrUpdate(const Key & key, const T & value, std::optional<bool> is_overridable);
 
     bool isOverridable(const Key & key, bool default_value) const;
 
-    template <bool locked = false>
-    void setOverridable(const Key & key, bool value);
-
     template <bool locked = false> void remove(const Key & key);
 
     MutableNamedCollectionPtr duplicate() const;
diff --git a/src/Common/tests/gtest_named_collections.cpp b/src/Common/tests/gtest_named_collections.cpp
index bd46d2668e4..e2482f6ba8b 100644
--- a/src/Common/tests/gtest_named_collections.cpp
+++ b/src/Common/tests/gtest_named_collections.cpp
@@ -77,7 +77,7 @@ key5:	5
 key6:	6.6
 )CONFIG");
 
-    collection2_copy->setOrUpdate<String>("key4", "value44");
+    collection2_copy->setOrUpdate<String>("key4", "value44", {});
     ASSERT_EQ(collection2_copy->get<String>("key4"), "value44");
     ASSERT_EQ(collection2->get<String>("key4"), "value4");
 
@@ -85,7 +85,7 @@ key6:	6.6
     ASSERT_EQ(collection2_copy->getOrDefault<String>("key4", "N"), "N");
     ASSERT_EQ(collection2->getOrDefault<String>("key4", "N"), "value4");
 
-    collection2_copy->setOrUpdate<String>("key4", "value45");
+    collection2_copy->setOrUpdate<String>("key4", "value45", {});
     ASSERT_EQ(collection2_copy->getOrDefault<String>("key4", "N"), "value45");
 
     NamedCollectionFactory::instance().remove("collection2_copy");
diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.cpp b/src/Parsers/ASTAlterNamedCollectionQuery.cpp
index 526a1d2332f..c3c40c42fb7 100644
--- a/src/Parsers/ASTAlterNamedCollectionQuery.cpp
+++ b/src/Parsers/ASTAlterNamedCollectionQuery.cpp
@@ -35,9 +35,9 @@ void ASTAlterNamedCollectionQuery::formatImpl(const IAST::FormatSettings & setti
                 settings.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value);
             else
                 settings.ostr << " = '[HIDDEN]'";
-            Field override_value;
-            if (overridability.tryGet(change.name, override_value))
-                settings.ostr << " " << (override_value.get<bool>() ? "" : "NOT ") << "OVERRIDABLE";
+            auto override_value = overridability.find(change.name);
+            if (override_value != overridability.end())
+                settings.ostr << " " << (override_value->second ? "" : "NOT ") << "OVERRIDABLE";
         }
     }
     if (!delete_keys.empty())
diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.h b/src/Parsers/ASTAlterNamedCollectionQuery.h
index 26966b5c5f3..dc2670a8896 100644
--- a/src/Parsers/ASTAlterNamedCollectionQuery.h
+++ b/src/Parsers/ASTAlterNamedCollectionQuery.h
@@ -15,7 +15,7 @@ public:
     SettingsChanges changes;
     std::vector<std::string> delete_keys;
     bool if_exists = false;
-    SettingsChanges overridability;
+    std::unordered_map<String, bool> overridability;
 
     String getID(char) const override { return "AlterNamedCollectionQuery"; }
 
diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.cpp b/src/Parsers/ASTCreateNamedCollectionQuery.cpp
index b122fefbbb6..54a1022102e 100644
--- a/src/Parsers/ASTCreateNamedCollectionQuery.cpp
+++ b/src/Parsers/ASTCreateNamedCollectionQuery.cpp
@@ -39,9 +39,9 @@ void ASTCreateNamedCollectionQuery::formatImpl(const IAST::FormatSettings & sett
             settings.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value);
         else
             settings.ostr << " = '[HIDDEN]'";
-        Field override_value;
-        if (overridability.tryGet(change.name, override_value))
-            settings.ostr << " " << (override_value.get<bool>() ? "" : "NOT ") << "OVERRIDABLE";
+        auto override_value = overridability.find(change.name);
+        if (override_value != overridability.end())
+            settings.ostr << " " << (override_value->second ? "" : "NOT ") << "OVERRIDABLE";
     }
 }
 
diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.h b/src/Parsers/ASTCreateNamedCollectionQuery.h
index aeeba9ce411..47f2352b4cd 100644
--- a/src/Parsers/ASTCreateNamedCollectionQuery.h
+++ b/src/Parsers/ASTCreateNamedCollectionQuery.h
@@ -14,7 +14,7 @@ public:
     std::string collection_name;
     SettingsChanges changes;
     bool if_not_exists = false;
-    SettingsChanges overridability;
+    std::unordered_map<String, bool> overridability;
 
     String getID(char) const override { return "CreateNamedCollectionQuery"; }
 
diff --git a/src/Parsers/ParserAlterNamedCollectionQuery.cpp b/src/Parsers/ParserAlterNamedCollectionQuery.cpp
index cecc2bb0387..9b33aa253ba 100644
--- a/src/Parsers/ParserAlterNamedCollectionQuery.cpp
+++ b/src/Parsers/ParserAlterNamedCollectionQuery.cpp
@@ -48,7 +48,7 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod
     }
 
     SettingsChanges changes;
-    SettingsChanges overridability;
+    std::unordered_map<String, bool> overridability;
     if (s_set.ignore(pos, expected))
     {
         while (true)
@@ -61,9 +61,9 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod
             if (!ParserSetQuery::parseNameValuePair(changes.back(), pos, expected))
                 return false;
             if (s_not_overridable.ignore(pos, expected))
-                overridability.emplace_back(changes.back().name, false);
+                overridability.emplace(changes.back().name, false);
             else if (s_overridable.ignore(pos, expected))
-                overridability.emplace_back(changes.back().name, true);
+                overridability.emplace(changes.back().name, true);
         }
     }
 
@@ -88,7 +88,7 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod
     query->if_exists = if_exists;
     query->cluster = std::move(cluster_str);
     query->changes = changes;
-    query->overridability = overridability;
+    query->overridability = std::move(overridability);
     query->delete_keys = delete_keys;
 
     node = query;
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index c1a64cb16f2..2ef37ac3f99 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -1549,7 +1549,7 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec
         return false;
 
     SettingsChanges changes;
-    SettingsChanges overridability;
+    std::unordered_map<String, bool> overridability;
 
     while (true)
     {
@@ -1561,9 +1561,9 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec
         if (!ParserSetQuery::parseNameValuePair(changes.back(), pos, expected))
             return false;
         if (s_not_overridable.ignore(pos, expected))
-            overridability.emplace_back(changes.back().name, false);
+            overridability.emplace(changes.back().name, false);
         else if (s_overridable.ignore(pos, expected))
-            overridability.emplace_back(changes.back().name, true);
+            overridability.emplace(changes.back().name, true);
     }
 
     auto query = std::make_shared<ASTCreateNamedCollectionQuery>();
diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp
index fea3d27b839..5ca875aeccc 100644
--- a/src/Storages/NamedCollectionsHelpers.cpp
+++ b/src/Storages/NamedCollectionsHelpers.cpp
@@ -117,7 +117,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
         }
 
         const auto & [key, value] = *value_override;
-        collection_copy->setOrUpdate<String>(key, toString(std::get<Field>(value)));
+        collection_copy->setOrUpdate<String>(key, toString(std::get<Field>(value)), {});
     }
 
     return collection_copy;
@@ -141,7 +141,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
     for (const auto & key : keys)
     {
         if (collection_copy->isOverridable(key, allow_override_by_default))
-            collection_copy->setOrUpdate<String>(key, config.getString(config_prefix + '.' + key));
+            collection_copy->setOrUpdate<String>(key, config.getString(config_prefix + '.' + key), {});
         else
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed for '{}'", key);
     }

From 178f2bbee5b37091a9d2dfde92d8ec9730f8a848 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Thu, 9 Nov 2023 14:23:56 +0100
Subject: [PATCH 0276/1097] Fix format

---
 src/Storages/NamedCollectionsHelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp
index 5ca875aeccc..8124ba651f9 100644
--- a/src/Storages/NamedCollectionsHelpers.cpp
+++ b/src/Storages/NamedCollectionsHelpers.cpp
@@ -100,7 +100,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
 
         if (!value_override)
         {
-            if(!(*it)->as<ASTFunction>())
+            if (!(*it)->as<ASTFunction>())
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected key-value argument or function");
             if (allow_override_by_default)
                 continue;

From 6fb7d44b62277c7c51e540c68c0c54a87bc24423 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 9 Nov 2023 17:30:53 +0300
Subject: [PATCH 0277/1097] Analyzer support EXPLAIN ESTIMATE

---
 src/Interpreters/InterpreterExplainQuery.cpp   | 18 ++++++++++++++----
 src/Storages/StorageDistributed.cpp            |  3 ---
 .../02911_analyzer_explain_estimate.reference  |  0
 .../02911_analyzer_explain_estimate.sql        |  5 +++++
 4 files changed, 19 insertions(+), 7 deletions(-)
 create mode 100644 tests/queries/0_stateless/02911_analyzer_explain_estimate.reference
 create mode 100644 tests/queries/0_stateless/02911_analyzer_explain_estimate.sql

diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index 39cc4df5c2d..7ec17292055 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -536,11 +536,21 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
 
             auto settings = checkAndGetSettings<QueryPlanSettings>(ast.getSettings());
             QueryPlan plan;
-            ContextPtr context;
+            ContextPtr context = getContext();
+
+            if (context->getSettingsRef().allow_experimental_analyzer)
+            {
+                InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions());
+                plan = std::move(interpreter).extractQueryPlan();
+                context = interpreter.getContext();
+            }
+            else
+            {
+                InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions());
+                interpreter.buildQueryPlan(plan);
+                context = interpreter.getContext();
+            }
 
-            InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions());
-            interpreter.buildQueryPlan(plan);
-            context = interpreter.getContext();
             // Collect the selected marks, rows, parts during build query pipeline.
             // Hold on to the returned QueryPipelineBuilderPtr because `plan` may have pointers into
             // it (through QueryPlanResourceHolder).
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 7705d0f193f..812ce40eee3 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -532,9 +532,6 @@ std::optional<QueryProcessingStage::Enum> StorageDistributed::getOptimizedQueryP
 
     const auto & query_node = query_info.query_tree->as<const QueryNode &>();
 
-    // std::cerr << query_node.dumpTree() << std::endl;
-    // std::cerr << query_info.table_expression->dumpTree() << std::endl;
-
     auto expr_contains_sharding_key = [&](const ListNode & exprs) -> bool
     {
         std::unordered_set<std::string> expr_columns;
diff --git a/tests/queries/0_stateless/02911_analyzer_explain_estimate.reference b/tests/queries/0_stateless/02911_analyzer_explain_estimate.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02911_analyzer_explain_estimate.sql b/tests/queries/0_stateless/02911_analyzer_explain_estimate.sql
new file mode 100644
index 00000000000..b082f2f33b2
--- /dev/null
+++ b/tests/queries/0_stateless/02911_analyzer_explain_estimate.sql
@@ -0,0 +1,5 @@
+-- Tags: distributed
+
+SET allow_experimental_analyzer = 1;
+
+EXPLAIN ESTIMATE SELECT 0 = 1048577, NULL, groupBitmapOr(bitmapBuild([toInt32(65537)])) FROM cluster(test_cluster_two_shards) WHERE NULL = 1048575;

From 124af73f1d51dd52b26ac6cb697c34a548dff29e Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Thu, 9 Nov 2023 17:54:45 +0100
Subject: [PATCH 0278/1097] Add support of arbitrary types to concat

---
 src/Functions/concat.cpp              | 35 ++++++++++++++-------------
 src/Functions/concatWithSeparator.cpp |  2 +-
 src/Functions/formatString.cpp        |  2 +-
 src/Functions/formatString.h          |  2 +-
 4 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp
index 9eb222d8c09..350cbee58a3 100644
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@@ -1,5 +1,6 @@
 #include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionsConversion.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/GatherUtils/Algorithms.h>
@@ -56,18 +57,6 @@ public:
                 getName(),
                 arguments.size());
 
-        for (const auto arg_idx : collections::range(0, arguments.size()))
-        {
-            const auto * arg = arguments[arg_idx].get();
-            if (!isStringOrFixedString(arg))
-                throw Exception(
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Illegal type {} of argument {} of function {}",
-                    arg->getName(),
-                    arg_idx + 1,
-                    getName());
-        }
-
         return std::make_shared<DataTypeString>();
     }
 
@@ -76,7 +65,7 @@ public:
         /// Format function is not proven to be faster for two arguments.
         /// Actually there is overhead of 2 to 5 extra instructions for each string for checking empty strings in FormatImpl.
         /// Though, benchmarks are really close, for most examples we saw executeBinary is slightly faster (0-3%).
-        /// For 3 and more arguments FormatImpl is much faster (up to 50-60%).
+        /// For 3 and more arguments FormatStringImpl is much faster (up to 50-60%).
         if (arguments.size() == 2)
             return executeBinary(arguments, input_rows_count);
         else
@@ -107,6 +96,7 @@ private:
         else
         {
             /// Fallback: use generic implementation for not very important cases.
+            /// Concat of arbitrary types also goes here.
             return executeFormatImpl(arguments, input_rows_count);
         }
 
@@ -145,8 +135,18 @@ private:
                 constant_strings[i] = const_col->getValue<String>();
             }
             else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
-                    column->getName(), getName());
+            {
+                // An arbitrary type argument: converting it to a StringColumn as if `toString` was called
+                ColumnsWithTypeAndName args;
+                args.emplace_back(column, arguments[i].type, "tmp");
+                const ColumnPtr converted_col_ptr = ConvertImplGenericToString<ColumnString>::execute(
+                    args, std::make_shared<DataTypeString>(), column->size());
+                const ColumnString * converted_col_str = assert_cast<const ColumnString *>(converted_col_ptr.get());
+                // Same as the normal `ColumnString` branch
+                has_column_string = true;
+                data[i] = &converted_col_str->getChars();
+                offsets[i] = &converted_col_str->getOffsets();
+            }
         }
 
         String pattern;
@@ -155,7 +155,7 @@ private:
         for (size_t i = 0; i < num_arguments; ++i)
             pattern += "{}";
 
-        FormatImpl::formatExecute(
+        FormatStringImpl::formatExecute(
             has_column_string,
             has_column_fixed_string,
             std::move(pattern),
@@ -185,7 +185,8 @@ using FunctionConcat = ConcatImpl<NameConcat, false>;
 using FunctionConcatAssumeInjective = ConcatImpl<NameConcatAssumeInjective, true>;
 
 
-/// Also works with arrays.
+/// Works with arrays via `arrayConcat`, maps via `mapConcat`, and tuples via `tupleConcat`.
+/// Additionally, allows concatenation of arbitrary types that can be cast to string using the corresponding default serialization.
 class ConcatOverloadResolver : public IFunctionOverloadResolver
 {
 public:
diff --git a/src/Functions/concatWithSeparator.cpp b/src/Functions/concatWithSeparator.cpp
index bfd1bc392db..f0a7afbbaa7 100644
--- a/src/Functions/concatWithSeparator.cpp
+++ b/src/Functions/concatWithSeparator.cpp
@@ -122,7 +122,7 @@ public:
         for (size_t i = 0; i < num_args; ++i)
             pattern += "{}";
 
-        FormatImpl::formatExecute(
+        FormatStringImpl::formatExecute(
             has_column_string,
             has_column_fixed_string,
             std::move(pattern),
diff --git a/src/Functions/formatString.cpp b/src/Functions/formatString.cpp
index ee6e26b775a..8e0b3a238cb 100644
--- a/src/Functions/formatString.cpp
+++ b/src/Functions/formatString.cpp
@@ -110,7 +110,7 @@ public:
                     column->getName(), getName());
         }
 
-        FormatImpl::formatExecute(
+        FormatStringImpl::formatExecute(
             has_column_string,
             has_column_fixed_string,
             std::move(pattern),
diff --git a/src/Functions/formatString.h b/src/Functions/formatString.h
index 44fbdac9378..30149e9a5b0 100644
--- a/src/Functions/formatString.h
+++ b/src/Functions/formatString.h
@@ -18,7 +18,7 @@
 namespace DB
 {
 
-struct FormatImpl
+struct FormatStringImpl
 {
     static constexpr size_t right_padding = 15;
 

From c28c1ac4a2932fd652280b8bbe087a33e4d7ff2c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 9 Nov 2023 18:22:24 +0100
Subject: [PATCH 0279/1097] Allow getting cache configuration from named
 collection

---
 .../Cached/registerDiskCache.cpp              |  7 ++-
 src/Interpreters/Cache/FileCacheSettings.cpp  | 43 ++++++++++++++++---
 src/Interpreters/Cache/FileCacheSettings.h    |  2 +
 tests/config/config.d/named_collection.xml    |  4 ++
 ...8_filesystem_cache_as_collection.reference |  2 +
 .../02908_filesystem_cache_as_collection.sql  |  8 ++++
 6 files changed, 60 insertions(+), 6 deletions(-)
 create mode 100644 tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
 create mode 100644 tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql

diff --git a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
index 2f80b4c9efd..182326bbdc3 100644
--- a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
+++ b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
@@ -4,6 +4,7 @@
 #include <Common/logger_useful.h>
 #include <Common/assert_cast.h>
 #include <Common/filesystemHelpers.h>
+#include <Common/NamedCollections/NamedCollections.h>
 #include <Disks/DiskFactory.h>
 #include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
 #include <Disks/ObjectStorages/DiskObjectStorage.h>
@@ -39,7 +40,11 @@ void registerDiskCache(DiskFactory & factory, bool /* global_skip_access_check *
         }
 
         FileCacheSettings file_cache_settings;
-        file_cache_settings.loadFromConfig(config, config_prefix);
+        auto predefined_configuration = config.has("cache_name") ? NamedCollectionFactory::instance().tryGet(config.getString("cache_name")) : nullptr;
+        if (predefined_configuration)
+            file_cache_settings.loadFromCollection(*predefined_configuration);
+        else
+            file_cache_settings.loadFromConfig(config, config_prefix);
 
         auto config_fs_caches_dir = context->getFilesystemCachesPath();
         if (config_fs_caches_dir.empty())
diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp
index 6f2f8c4b778..e81bd9ddc35 100644
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@@ -2,6 +2,7 @@
 
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/Exception.h>
+#include <Common/NamedCollections/NamedCollections.h>
 #include <IO/ReadHelpers.h>
 
 namespace DB
@@ -26,10 +27,6 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
     if (max_size == 0)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected non-zero size for cache configuration");
 
-    auto path = config.getString(config_prefix + ".path", "");
-    if (path.empty())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk Cache requires non-empty `path` field (cache base path) in config");
-
     max_elements = config.getUInt64(config_prefix + ".max_elements", FILECACHE_DEFAULT_MAX_ELEMENTS);
 
     if (config.has(config_prefix + ".max_file_segment_size"))
@@ -38,7 +35,6 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
     cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false);
     enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false);
     cache_hits_threshold = config.getUInt64(config_prefix + ".cache_hits_threshold", FILECACHE_DEFAULT_HITS_THRESHOLD);
-
     enable_bypass_cache_with_threshold = config.getUInt64(config_prefix + ".enable_bypass_cache_with_threshold", false);
 
     if (config.has(config_prefix + ".bypass_cache_threshold"))
@@ -54,4 +50,41 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
         load_metadata_threads = config.getUInt(config_prefix + ".load_metadata_threads");
 }
 
+void FileCacheSettings::loadFromCollection(const NamedCollection & collection)
+{
+    if (!collection.has("path"))
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected cache path (`path`) in configuration");
+
+    base_path = collection.get<String>("path");
+
+    if (!collection.has("max_size"))
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected cache size (`max_size`) in configuration");
+
+    max_size = parseWithSizeSuffix<uint64_t>(collection.get<String>("max_size"));
+    if (max_size == 0)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected non-zero size for cache configuration");
+
+    max_elements = collection.getOrDefault<UInt64>("max_elements", FILECACHE_DEFAULT_MAX_ELEMENTS);
+
+    if (collection.has("max_file_segment_size"))
+        max_file_segment_size = parseWithSizeSuffix<uint64_t>(collection.get<String>("max_file_segment_size"));
+
+    cache_on_write_operations = collection.getOrDefault<UInt64>("cache_on_write_operations", false);
+    enable_filesystem_query_cache_limit = collection.getOrDefault<UInt64>("enable_filesystem_query_cache_limit", false);
+    cache_hits_threshold = collection.getOrDefault<UInt64>("cache_hits_threshold", FILECACHE_DEFAULT_HITS_THRESHOLD);
+    enable_bypass_cache_with_threshold = collection.getOrDefault<UInt64>("enable_bypass_cache_with_threshold", false);
+
+    if (collection.has("bypass_cache_threshold"))
+        bypass_cache_threshold = parseWithSizeSuffix<uint64_t>(collection.get<String>("bypass_cache_threshold"));
+
+    if (collection.has("boundary_alignment"))
+        boundary_alignment = parseWithSizeSuffix<uint64_t>(collection.get<String>("boundary_alignment"));
+
+    if (collection.has("background_download_threads"))
+        background_download_threads = collection.get<UInt64>("background_download_threads");
+
+    if (collection.has("load_metadata_threads"))
+        load_metadata_threads = collection.get<UInt64>("load_metadata_threads");
+}
+
 }
diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h
index 9888b814a0b..9d15a32d3bf 100644
--- a/src/Interpreters/Cache/FileCacheSettings.h
+++ b/src/Interpreters/Cache/FileCacheSettings.h
@@ -8,6 +8,7 @@ namespace Poco { namespace Util { class AbstractConfiguration; } } // NOLINT(cpp
 
 namespace DB
 {
+class NamedCollection;
 
 struct FileCacheSettings
 {
@@ -31,6 +32,7 @@ struct FileCacheSettings
     size_t load_metadata_threads = FILECACHE_DEFAULT_LOAD_METADATA_THREADS;
 
     void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
+    void loadFromCollection(const NamedCollection & collection);
 };
 
 }
diff --git a/tests/config/config.d/named_collection.xml b/tests/config/config.d/named_collection.xml
index 5b716a7b8da..2c4b349a183 100644
--- a/tests/config/config.d/named_collection.xml
+++ b/tests/config/config.d/named_collection.xml
@@ -37,5 +37,9 @@
             <access_key_id>test</access_key_id>
             <secret_access_key>testtest</secret_access_key>
         </s3_conn_db>
+        <cache_collection>
+            <max_size>1Mi</max_size>
+            <path>collection</path>
+        </cache_collection>
     </named_collections>
 </clickhouse>
diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
new file mode 100644
index 00000000000..f5e0af6d507
--- /dev/null
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
@@ -0,0 +1,2 @@
+1048576	10000000	33554432	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection_sql	2	0	1
+1048576	10000000	33554432	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection	2	0	1
diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
new file mode 100644
index 00000000000..e878727359b
--- /dev/null
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
@@ -0,0 +1,8 @@
+CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi';
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (a Int32, b String)
+ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 's3_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql');
+DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME';
+CREATE TABLE test2 (a Int32, b String)
+ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 's3_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection');
+DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME_2';

From 1711bed63ee1c4ed028cb4f1a5d5a4595dbbf7cb Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 9 Nov 2023 14:21:42 +0100
Subject: [PATCH 0280/1097] Load base backups lazily (if a backup is not needed
 it won't be loaded).

---
 src/Backups/BackupImpl.cpp                    | 107 ++++++++++++------
 src/Backups/BackupImpl.h                      |  12 +-
 src/Backups/IBackup.h                         |   2 +-
 src/Common/ProfileEvents.cpp                  |   4 +
 ...915_lazy_loading_of_base_backups.reference |  12 ++
 .../02915_lazy_loading_of_base_backups.sh     |  85 ++++++++++++++
 6 files changed, 180 insertions(+), 42 deletions(-)
 create mode 100644 tests/queries/0_stateless/02915_lazy_loading_of_base_backups.reference
 create mode 100755 tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh

diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index bb97335d8fb..165cb63456a 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -3,6 +3,7 @@
 #include <Backups/BackupFileInfo.h>
 #include <Backups/BackupIO.h>
 #include <Backups/IBackupEntry.h>
+#include <Common/ProfileEvents.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <base/hex.h>
 #include <Common/logger_useful.h>
@@ -24,6 +25,14 @@
 #include <Poco/DOM/DOMParser.h>
 
 
+namespace ProfileEvents
+{
+    extern const Event BackupsOpenedForRead;
+    extern const Event BackupsOpenedForWrite;
+    extern const Event BackupReadMetadataMicroseconds;
+    extern const Event BackupWriteMetadataMicroseconds;
+}
+
 namespace DB
 {
 namespace ErrorCodes
@@ -89,12 +98,14 @@ BackupImpl::BackupImpl(
     , archive_params(archive_params_)
     , open_mode(OpenMode::READ)
     , reader(std::move(reader_))
+    , context(context_)
     , is_internal_backup(false)
     , version(INITIAL_BACKUP_VERSION)
     , base_backup_info(base_backup_info_)
     , use_same_s3_credentials_for_base_backup(use_same_s3_credentials_for_base_backup_)
+    , log(&Poco::Logger::get("BackupImpl"))
 {
-    open(context_);
+    open();
 }
 
 
@@ -115,6 +126,7 @@ BackupImpl::BackupImpl(
     , archive_params(archive_params_)
     , open_mode(OpenMode::WRITE)
     , writer(std::move(writer_))
+    , context(context_)
     , is_internal_backup(is_internal_backup_)
     , coordination(coordination_)
     , uuid(backup_uuid_)
@@ -124,7 +136,7 @@ BackupImpl::BackupImpl(
     , use_same_s3_credentials_for_base_backup(use_same_s3_credentials_for_base_backup_)
     , log(&Poco::Logger::get("BackupImpl"))
 {
-    open(context_);
+    open();
 }
 
 
@@ -140,9 +152,11 @@ BackupImpl::~BackupImpl()
     }
 }
 
-void BackupImpl::open(const ContextPtr & context)
+void BackupImpl::open()
 {
     std::lock_guard lock{mutex};
+    LOG_INFO(log, "{} backup: {}", ((open_mode == OpenMode::WRITE) ? "Writing" : "Reading"), backup_name_for_logging);
+    ProfileEvents::increment((open_mode == OpenMode::WRITE) ? ProfileEvents::BackupsOpenedForWrite : ProfileEvents::BackupsOpenedForRead);
 
     if (open_mode == OpenMode::WRITE)
     {
@@ -166,35 +180,8 @@ void BackupImpl::open(const ContextPtr & context)
     if (open_mode == OpenMode::READ)
         readBackupMetadata();
 
-    if (base_backup_info)
-    {
-        if (use_same_s3_credentials_for_base_backup)
-            backup_info.copyS3CredentialsTo(*base_backup_info);
-
-        BackupFactory::CreateParams params;
-        params.backup_info = *base_backup_info;
-        params.open_mode = OpenMode::READ;
-        params.context = context;
-        /// use_same_s3_credentials_for_base_backup should be inherited for base backups
-        params.use_same_s3_credentials_for_base_backup = use_same_s3_credentials_for_base_backup;
-
-        base_backup = BackupFactory::instance().createBackup(params);
-
-        if (open_mode == OpenMode::WRITE)
-        {
-            base_backup_uuid = base_backup->getUUID();
-        }
-        else if (base_backup_uuid != base_backup->getUUID())
-        {
-            throw Exception(
-                ErrorCodes::WRONG_BASE_BACKUP,
-                "Backup {}: The base backup {} has different UUID ({} != {})",
-                backup_name_for_logging,
-                base_backup->getNameForLogging(),
-                toString(base_backup->getUUID()),
-                (base_backup_uuid ? toString(*base_backup_uuid) : ""));
-        }
-    }
+    if ((open_mode == OpenMode::WRITE) && base_backup_info)
+        base_backup_uuid = getBaseBackupUnlocked()->getUUID();
 }
 
 void BackupImpl::close()
@@ -239,6 +226,42 @@ void BackupImpl::closeArchive()
     archive_writer.reset();
 }
 
+std::shared_ptr<const IBackup> BackupImpl::getBaseBackup() const
+{
+    std::lock_guard lock{mutex};
+    return getBaseBackupUnlocked();
+}
+
+std::shared_ptr<const IBackup> BackupImpl::getBaseBackupUnlocked() const
+{
+    if (!base_backup && base_backup_info)
+    {
+        if (use_same_s3_credentials_for_base_backup)
+            backup_info.copyS3CredentialsTo(*base_backup_info);
+
+        BackupFactory::CreateParams params;
+        params.backup_info = *base_backup_info;
+        params.open_mode = OpenMode::READ;
+        params.context = context;
+        /// use_same_s3_credentials_for_base_backup should be inherited for base backups
+        params.use_same_s3_credentials_for_base_backup = use_same_s3_credentials_for_base_backup;
+
+        base_backup = BackupFactory::instance().createBackup(params);
+
+        if ((open_mode == OpenMode::READ) && (base_backup_uuid != base_backup->getUUID()))
+        {
+            throw Exception(
+                ErrorCodes::WRONG_BASE_BACKUP,
+                "Backup {}: The base backup {} has different UUID ({} != {})",
+                backup_name_for_logging,
+                base_backup->getNameForLogging(),
+                toString(base_backup->getUUID()),
+                (base_backup_uuid ? toString(*base_backup_uuid) : ""));
+        }
+    }
+    return base_backup;
+}
+
 size_t BackupImpl::getNumFiles() const
 {
     std::lock_guard lock{mutex};
@@ -289,8 +312,10 @@ UInt64 BackupImpl::getNumReadBytes() const
 
 void BackupImpl::writeBackupMetadata()
 {
-    assert(!is_internal_backup);
+    LOG_TRACE(log, "Backup {}: Writing metadata", backup_name_for_logging);
+    auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::BackupWriteMetadataMicroseconds);
 
+    assert(!is_internal_backup);
     checkLockFile(true);
 
     std::unique_ptr<WriteBuffer> out;
@@ -374,11 +399,16 @@ void BackupImpl::writeBackupMetadata()
     out->finalize();
 
     uncompressed_size = size_of_entries + out->count();
+
+    LOG_TRACE(log, "Backup {}: Metadata was written", backup_name_for_logging);
 }
 
 
 void BackupImpl::readBackupMetadata()
 {
+    LOG_TRACE(log, "Backup {}: Reading metadata", backup_name_for_logging);
+    auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::BackupReadMetadataMicroseconds);
+
     using namespace XMLUtils;
 
     std::unique_ptr<ReadBuffer> in;
@@ -482,6 +512,8 @@ void BackupImpl::readBackupMetadata()
     compressed_size = uncompressed_size;
     if (!use_archive)
         setCompressedSize();
+
+    LOG_TRACE(log, "Backup {}: Metadata was read", backup_name_for_logging);
 }
 
 void BackupImpl::checkBackupDoesntExist() const
@@ -705,7 +737,8 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFileImpl(const SizeAndChecks
     if (info.base_size)
     {
         /// Make `base_read_buffer` if there is data for this backup entry in the base backup.
-        if (!base_backup)
+        auto base = getBaseBackup();
+        if (!base)
         {
             throw Exception(
                 ErrorCodes::NO_BASE_BACKUP,
@@ -713,7 +746,7 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFileImpl(const SizeAndChecks
                 backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
         }
 
-        if (!base_backup->fileExists(std::pair(info.base_size, info.base_checksum)))
+        if (!base->fileExists(std::pair(info.base_size, info.base_checksum)))
         {
             throw Exception(
                 ErrorCodes::WRONG_BASE_BACKUP,
@@ -721,7 +754,7 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFileImpl(const SizeAndChecks
                 backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
         }
 
-        base_read_buffer = base_backup->readFile(std::pair{info.base_size, info.base_checksum});
+        base_read_buffer = base->readFile(std::pair{info.base_size, info.base_checksum});
     }
 
     {
@@ -809,7 +842,7 @@ size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum,
     else if (info.size && (info.size == info.base_size))
     {
         /// Data comes completely from the base backup (nothing comes from this backup).
-        base_backup->copyFileToDisk(std::pair{info.base_size, info.base_checksum}, destination_disk, destination_path, write_mode);
+        getBaseBackup()->copyFileToDisk(std::pair{info.base_size, info.base_checksum}, destination_disk, destination_path, write_mode);
         file_copied = true;
     }
 
diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h
index 5b0254c22bf..6070db79aa6 100644
--- a/src/Backups/BackupImpl.h
+++ b/src/Backups/BackupImpl.h
@@ -60,7 +60,7 @@ public:
     OpenMode getOpenMode() const override { return open_mode; }
     time_t getTimestamp() const override { return timestamp; }
     UUID getUUID() const override { return *uuid; }
-    BackupPtr getBaseBackup() const override { return base_backup; }
+    BackupPtr getBaseBackup() const override;
     size_t getNumFiles() const override;
     UInt64 getTotalSize() const override;
     size_t getNumEntries() const override;
@@ -85,7 +85,7 @@ public:
     bool supportsWritingInMultipleThreads() const override { return !use_archive; }
 
 private:
-    void open(const ContextPtr & context);
+    void open();
     void close();
 
     void openArchive();
@@ -95,6 +95,9 @@ private:
     void writeBackupMetadata() TSA_REQUIRES(mutex);
     void readBackupMetadata() TSA_REQUIRES(mutex);
 
+    /// Returns the base backup or null if there is no base backup.
+    std::shared_ptr<const IBackup> getBaseBackupUnlocked() const TSA_REQUIRES(mutex);
+
     /// Checks that a new backup doesn't exist yet.
     void checkBackupDoesntExist() const;
 
@@ -118,6 +121,7 @@ private:
     const OpenMode open_mode;
     std::shared_ptr<IBackupWriter> writer;
     std::shared_ptr<IBackupReader> reader;
+    const ContextPtr context;
     const bool is_internal_backup;
     std::shared_ptr<IBackupCoordination> coordination;
 
@@ -138,8 +142,8 @@ private:
     mutable size_t num_read_files = 0;
     mutable UInt64 num_read_bytes = 0;
     int version;
-    std::optional<BackupInfo> base_backup_info;
-    std::shared_ptr<const IBackup> base_backup;
+    mutable std::optional<BackupInfo> base_backup_info;
+    mutable std::shared_ptr<const IBackup> base_backup;
     std::optional<UUID> base_backup_uuid;
     std::shared_ptr<IArchiveReader> archive_reader;
     std::shared_ptr<IArchiveWriter> archive_writer;
diff --git a/src/Backups/IBackup.h b/src/Backups/IBackup.h
index 660f7d5da22..783cad29b63 100644
--- a/src/Backups/IBackup.h
+++ b/src/Backups/IBackup.h
@@ -43,7 +43,7 @@ public:
     /// Returns UUID of the backup.
     virtual UUID getUUID() const = 0;
 
-    /// Returns the base backup (can be null).
+    /// Returns the base backup or null if there is no base backup.
     virtual std::shared_ptr<const IBackup> getBaseBackup() const = 0;
 
     /// Returns the number of files stored in the backup. Compare with getNumEntries().
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 58e860ebcaf..377edff71f6 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -546,6 +546,10 @@ The server successfully detected this situation and will download merged part fr
     M(IOUringCQEsCompleted, "Total number of successfully completed io_uring CQEs") \
     M(IOUringCQEsFailed, "Total number of completed io_uring CQEs with failures") \
     \
+    M(BackupsOpenedForRead, "Number of backups opened for reading") \
+    M(BackupsOpenedForWrite, "Number of backups opened for writing") \
+    M(BackupReadMetadataMicroseconds, "Time spent reading backup metadata from .backup file") \
+    M(BackupWriteMetadataMicroseconds, "Time spent writing backup metadata to .backup file") \
     M(BackupEntriesCollectorMicroseconds, "Time spent making backup entries") \
     M(BackupEntriesCollectorForTablesDataMicroseconds, "Time spent making backup entries for tables data") \
     M(BackupEntriesCollectorRunPostTasksMicroseconds, "Time spent running post tasks after making backup entries") \
diff --git a/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.reference b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.reference
new file mode 100644
index 00000000000..0ef77460514
--- /dev/null
+++ b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.reference
@@ -0,0 +1,12 @@
+BACKUP_CREATED
+BACKUP_CREATED
+BACKUP_CREATED
+RESTORED
+RESTORED
+RESTORED
+a	0	1
+b	1	1
+c	1	1
+r1	3	0
+r2	2	0
+r3	1	0
diff --git a/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh
new file mode 100755
index 00000000000..5f0f41a956b
--- /dev/null
+++ b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+a_backup_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_a
+a_backup="Disk('backups', '$a_backup_id')"
+
+b_backup_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_b
+b_backup="Disk('backups', '$b_backup_id')"
+
+c_backup_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_c
+c_backup="Disk('backups', '$c_backup_id')"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+DROP TABLE IF EXISTS tbl1;
+DROP TABLE IF EXISTS tbl2;
+DROP TABLE IF EXISTS tbl3;
+"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+CREATE TABLE tbl1 (a Int32) ENGINE = MergeTree() ORDER BY tuple();
+"
+
+# The following BACKUP command must write backup 'a'.
+${CLICKHOUSE_CLIENT} -nm --query "
+BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $a_backup SETTINGS id='$a_backup_id';
+" | grep -o "BACKUP_CREATED"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+CREATE TABLE tbl2 (a Int32) ENGINE = MergeTree() ORDER BY tuple();
+"
+
+# The following BACKUP command must read backup 'a' and write backup 'b'.
+${CLICKHOUSE_CLIENT} -nm --query "
+BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $b_backup SETTINGS id='$b_backup_id', base_backup=$a_backup;
+" | grep -o "BACKUP_CREATED"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+CREATE TABLE tbl3 (a Int32) ENGINE = MergeTree() ORDER BY tuple();
+"
+
+# The following BACKUP command must read only backup 'b' (and not 'a') and write backup 'c'.
+${CLICKHOUSE_CLIENT} -nm --query "
+BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $c_backup SETTINGS id='$c_backup_id', base_backup=$b_backup;
+" | grep -o "BACKUP_CREATED"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+DROP TABLE tbl1;
+DROP TABLE tbl2;
+DROP TABLE tbl3;
+"
+
+r1_restore_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_r1
+r2_restore_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_r2
+r3_restore_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_r3
+
+# The following RESTORE command must read all 3 backups 'a', 'b', c' because the table 'tbl1' was in the first backup.
+${CLICKHOUSE_CLIENT} -nm --query "
+RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl1 FROM $c_backup SETTINGS id='$r1_restore_id';
+" | grep -o "RESTORED"
+
+# The following RESTORE command must read only 2 backups 'b', c' (and not 'a') because the table 'tbl2' was in the second backup.
+${CLICKHOUSE_CLIENT} -nm --query "
+RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl2 FROM $c_backup SETTINGS id='$r2_restore_id';
+" | grep -o "RESTORED"
+
+# The following RESTORE command must read only 1 backup 'c' (and not 'a' or 'b') because the table 'tbl3' was in the third backup.
+${CLICKHOUSE_CLIENT} -nm --query "
+RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl3 FROM $c_backup SETTINGS id='$r3_restore_id';
+" | grep -o "RESTORED"
+
+all_ids="['$a_backup_id', '$b_backup_id', '$c_backup_id', '$r1_restore_id', '$r2_restore_id', '$r3_restore_id']"
+id_prefix_len=`expr "${CLICKHOUSE_TEST_UNIQUE_NAME}_" : '.*'`
+
+${CLICKHOUSE_CLIENT} -nm --query "
+SELECT substr(id, 1 + $id_prefix_len) as short_id, ProfileEvents['BackupsOpenedForRead'], ProfileEvents['BackupsOpenedForWrite'] FROM system.backups WHERE id IN ${all_ids} ORDER BY short_id
+"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+DROP TABLE tbl1;
+DROP TABLE tbl2;
+DROP TABLE tbl3;
+"

From 1dc565cd223480a8c7f9cdb2ec82808c7f60d9e8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 9 Nov 2023 20:17:03 +0000
Subject: [PATCH 0281/1097] Fix #56258

---
 src/Parsers/ASTFunction.cpp                   |  4 +-
 src/Parsers/ASTIdentifier.cpp                 |  7 +++-
 src/Parsers/ASTIdentifier.h                   |  2 +
 src/Parsers/ASTLiteral.cpp                    | 12 +++---
 src/Parsers/ASTQueryParameter.cpp             |  5 +++
 src/Parsers/ASTQueryParameter.h               |  2 +
 src/Parsers/ASTSubquery.cpp                   |  2 +-
 src/Parsers/ASTSubquery.h                     |  2 +-
 src/Parsers/ASTWithAlias.cpp                  |  7 ++++
 src/Parsers/ASTWithAlias.h                    |  4 +-
 .../02494_query_cache_bugs.reference          | 24 ++++++++++++
 .../0_stateless/02494_query_cache_bugs.sql    | 39 +++++++++++++++++++
 12 files changed, 96 insertions(+), 14 deletions(-)
 create mode 100644 tests/queries/0_stateless/02494_query_cache_bugs.reference
 create mode 100644 tests/queries/0_stateless/02494_query_cache_bugs.sql

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 267148ee62b..1ce85a437a2 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -601,9 +601,7 @@ ASTPtr ASTFunction::clone() const
 
 void ASTFunction::updateTreeHashImpl(SipHash & hash_state) const
 {
-    hash_state.update(name.size());
-    hash_state.update(name);
-    IAST::updateTreeHashImpl(hash_state);
+    ASTWithAlias::updateTreeHashImpl(hash_state);
 }
 
 template <typename Container>
diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp
index 042b4d9085d..a4dffaad9ab 100644
--- a/src/Parsers/ASTIdentifier.cpp
+++ b/src/Parsers/ASTIdentifier.cpp
@@ -87,6 +87,11 @@ void ASTIdentifier::setShortName(const String & new_name)
     semantic->table = table;
 }
 
+void ASTIdentifier::updateTreeHashImpl(SipHash & hash_state) const
+{
+    ASTWithAlias::updateTreeHashImpl(hash_state);
+}
+
 const String & ASTIdentifier::name() const
 {
     if (children.empty())
@@ -247,7 +252,7 @@ void ASTTableIdentifier::resetTable(const String & database_name, const String &
 void ASTTableIdentifier::updateTreeHashImpl(SipHash & hash_state) const
 {
     hash_state.update(uuid);
-    IAST::updateTreeHashImpl(hash_state);
+    ASTIdentifier::updateTreeHashImpl(hash_state);
 }
 
 String getIdentifierName(const IAST * ast)
diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h
index 0e030c797ce..b116920da53 100644
--- a/src/Parsers/ASTIdentifier.h
+++ b/src/Parsers/ASTIdentifier.h
@@ -47,6 +47,8 @@ public:
     const String & shortName() const { return name_parts.back(); }
     const String & name() const;
 
+    void updateTreeHashImpl(SipHash & hash_state) const override;
+
     void restoreTable();  // TODO(ilezhankin): get rid of this
     std::shared_ptr<ASTTableIdentifier> createTable() const;  // returns |nullptr| if identifier is not table.
 
diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp
index 425e5c73bee..3a9dd742f85 100644
--- a/src/Parsers/ASTLiteral.cpp
+++ b/src/Parsers/ASTLiteral.cpp
@@ -10,13 +10,6 @@
 namespace DB
 {
 
-void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const
-{
-    const char * prefix = "Literal_";
-    hash_state.update(prefix, strlen(prefix));
-    applyVisitor(FieldVisitorHash(hash_state), value);
-}
-
 ASTPtr ASTLiteral::clone() const
 {
     auto res = std::make_shared<ASTLiteral>(*this);
@@ -24,6 +17,11 @@ ASTPtr ASTLiteral::clone() const
     return res;
 }
 
+void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const
+{
+    ASTWithAlias::updateTreeHashImpl(hash_state);
+}
+
 namespace
 {
 
diff --git a/src/Parsers/ASTQueryParameter.cpp b/src/Parsers/ASTQueryParameter.cpp
index c10cced23ce..ff67fe9198e 100644
--- a/src/Parsers/ASTQueryParameter.cpp
+++ b/src/Parsers/ASTQueryParameter.cpp
@@ -23,4 +23,9 @@ void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const
     writeString(name, ostr);
 }
 
+void ASTQueryParameter::updateTreeHashImpl(SipHash & hash_state) const
+{
+    ASTWithAlias::updateTreeHashImpl(hash_state);
+}
+
 }
diff --git a/src/Parsers/ASTQueryParameter.h b/src/Parsers/ASTQueryParameter.h
index 858b23a0250..62e45dfd9de 100644
--- a/src/Parsers/ASTQueryParameter.h
+++ b/src/Parsers/ASTQueryParameter.h
@@ -21,6 +21,8 @@ public:
 
     ASTPtr clone() const override { return std::make_shared<ASTQueryParameter>(*this); }
 
+    void updateTreeHashImpl(SipHash & hash_state) const override;
+
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
     void appendColumnNameImpl(WriteBuffer & ostr) const override;
diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp
index 92adad666ed..4dc005e6cdd 100644
--- a/src/Parsers/ASTSubquery.cpp
+++ b/src/Parsers/ASTSubquery.cpp
@@ -55,7 +55,7 @@ void ASTSubquery::updateTreeHashImpl(SipHash & hash_state) const
 {
     if (!cte_name.empty())
         hash_state.update(cte_name);
-    IAST::updateTreeHashImpl(hash_state);
+    ASTWithAlias::updateTreeHashImpl(hash_state);
 }
 
 String ASTSubquery::getAliasOrColumnName() const
diff --git a/src/Parsers/ASTSubquery.h b/src/Parsers/ASTSubquery.h
index e4de766621a..9c30a15b145 100644
--- a/src/Parsers/ASTSubquery.h
+++ b/src/Parsers/ASTSubquery.h
@@ -14,7 +14,7 @@ class ASTSubquery : public ASTWithAlias
 public:
     // Stored the name when the subquery is defined in WITH clause. For example:
     // WITH (SELECT 1) AS a SELECT * FROM a AS b; cte_name will be `a`.
-    std::string cte_name;
+    String cte_name;
 
     /** Get the text that identifies this element. */
     String getID(char) const override { return "Subquery"; }
diff --git a/src/Parsers/ASTWithAlias.cpp b/src/Parsers/ASTWithAlias.cpp
index 1b5397654fd..2f77bb9080e 100644
--- a/src/Parsers/ASTWithAlias.cpp
+++ b/src/Parsers/ASTWithAlias.cpp
@@ -1,6 +1,7 @@
 #include <Parsers/ASTWithAlias.h>
 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
+#include <Common/SipHash.h>
 
 
 namespace DB
@@ -42,6 +43,12 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
     }
 }
 
+void ASTWithAlias::updateTreeHashImpl(SipHash & hash_state) const
+{
+    hash_state.update(alias);
+    IAST::updateTreeHashImpl(hash_state);
+}
+
 void ASTWithAlias::appendColumnName(WriteBuffer & ostr) const
 {
     if (prefer_alias_to_column_name && !alias.empty())
diff --git a/src/Parsers/ASTWithAlias.h b/src/Parsers/ASTWithAlias.h
index ea4419402b0..7833c88183d 100644
--- a/src/Parsers/ASTWithAlias.h
+++ b/src/Parsers/ASTWithAlias.h
@@ -27,10 +27,12 @@ public:
     void setAlias(const String & to) override { alias = to; }
 
     /// Calls formatImplWithoutAlias, and also outputs an alias. If necessary, encloses the entire expression in brackets.
-    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final;
+    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const final;
 
     virtual void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0;
 
+    void updateTreeHashImpl(SipHash & hash_state) const override;
+
 protected:
     virtual void appendColumnNameImpl(WriteBuffer & ostr) const = 0;
 };
diff --git a/tests/queries/0_stateless/02494_query_cache_bugs.reference b/tests/queries/0_stateless/02494_query_cache_bugs.reference
new file mode 100644
index 00000000000..448e1366ea7
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_bugs.reference
@@ -0,0 +1,24 @@
+-- Bug 56258: Check literals (ASTLiteral)
+Row 1:
+──────
+10: 10
+Row 1:
+──────
+x: 10
+2
+-- Bug 56258: Check functions (ASTFunction)
+Row 1:
+──────
+toUInt64(42): 42
+Row 1:
+──────
+x: 42
+2
+-- Bug 56258: Check identifiers (ASTIdentifier)
+Row 1:
+──────
+c: 1
+Row 1:
+──────
+x: 1
+2
diff --git a/tests/queries/0_stateless/02494_query_cache_bugs.sql b/tests/queries/0_stateless/02494_query_cache_bugs.sql
new file mode 100644
index 00000000000..74496e0f77a
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_bugs.sql
@@ -0,0 +1,39 @@
+-- Tags: no-parallel
+-- Tag no-parallel: Messes with internal cache
+
+-- Test for Bug 56258
+
+SYSTEM DROP QUERY CACHE;
+
+SELECT '-- Bug 56258: Check literals (ASTLiteral)';
+
+SELECT 10 FORMAT Vertical SETTINGS use_query_cache = 1;
+SELECT 10 AS x FORMAT Vertical SETTINGS use_query_cache = 1;
+
+SELECT count(*) FROM system.query_cache;
+
+SYSTEM DROP QUERY CACHE;
+
+SELECT '-- Bug 56258: Check functions (ASTFunction)';
+
+SELECT toUInt64(42) FORMAT Vertical SETTINGS use_query_cache = 1;
+SELECT toUInt64(42) AS x FORMAT Vertical SETTINGS use_query_cache = 1;
+
+SELECT count(*) FROM system.query_cache;
+
+SYSTEM DROP QUERY CACHE;
+
+SELECT '-- Bug 56258: Check identifiers (ASTIdentifier)';
+
+DROP TABLE IF EXISTS tab;
+
+CREATE TABLE tab(c UInt64) ENGINE = Memory AS SELECT 1;
+
+SELECT c FROM tab FORMAT Vertical SETTINGS use_query_cache = 1;
+SELECT c AS x FROM tab FORMAT Vertical SETTINGS use_query_cache = 1;
+
+SELECT count(*) FROM system.query_cache;
+
+DROP TABLE tab;
+
+SYSTEM DROP QUERY CACHE;

From 02035d9b90aed7e5fa251f0a7d862c3825df9b77 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 9 Nov 2023 23:37:00 +0100
Subject: [PATCH 0282/1097] Remove useless using

---
 src/AggregateFunctions/AggregateFunctionUniqCombined.cpp | 1 -
 src/Client/ReplxxLineReader.cpp                          | 1 -
 src/Dictionaries/HashedArrayDictionary.cpp               | 2 --
 src/Dictionaries/HashedDictionary.cpp                    | 2 --
 4 files changed, 6 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
index 1a8761cecb5..e532f2519c6 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
@@ -371,7 +371,6 @@ AggregateFunctionPtr createAggregateFunctionUniqCombined(bool use_64_bit_hash,
 
 void registerAggregateFunctionUniqCombined(AggregateFunctionFactory & factory)
 {
-    using namespace std::placeholders;
     factory.registerFunction("uniqCombined",
         [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
         {
diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp
index 49f44e3d0f9..681d06ce583 100644
--- a/src/Client/ReplxxLineReader.cpp
+++ b/src/Client/ReplxxLineReader.cpp
@@ -293,7 +293,6 @@ ReplxxLineReader::ReplxxLineReader(
     , word_break_characters(word_break_characters_)
     , editor(getEditor())
 {
-    using namespace std::placeholders;
     using Replxx = replxx::Replxx;
 
     if (!history_file_path.empty())
diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp
index 68c347af9df..21016025d96 100644
--- a/src/Dictionaries/HashedArrayDictionary.cpp
+++ b/src/Dictionaries/HashedArrayDictionary.cpp
@@ -900,8 +900,6 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory)
             return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex>>(dict_id, dict_struct, std::move(source_ptr), configuration);
     };
 
-    using namespace std::placeholders;
-
     factory.registerLayout("hashed_array",
         [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/)
         {
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 562857cd790..0556e2bb266 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -1246,8 +1246,6 @@ void registerDictionaryHashed(DictionaryFactory & factory)
         }
     };
 
-    using namespace std::placeholders;
-
     factory.registerLayout("hashed",
         [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Simple, /* sparse = */ false); }, false);
     factory.registerLayout("sparse_hashed",

From d86c5ba28b405b0d056c787098c52d8ced085791 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 9 Nov 2023 23:37:18 +0100
Subject: [PATCH 0283/1097] Better warning message

---
 src/Interpreters/Context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 138741a2f2b..ac552a3969c 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -846,7 +846,7 @@ Strings Context::getWarnings() const
         }
         res = res + "]" + (single_element ? " is" : " are")
             + " changed. "
-              "Please check 'select * from system.settings where changed and is_obsolete' and read the changelog.";
+              "Please check 'SELECT * FROM system.settings WHERE changed AND is_obsolete' and read the changelog at https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md";
         common_warnings.emplace_back(res);
     }
 

From b0df9e2c977b1dc31223904c537e302138058635 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 10 Nov 2023 01:34:03 +0100
Subject: [PATCH 0284/1097] Fix test

---
 tests/queries/0_stateless/01945_system_warnings.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference
index cfec2f63816..f77cdd275c9 100644
--- a/tests/queries/0_stateless/01945_system_warnings.reference
+++ b/tests/queries/0_stateless/01945_system_warnings.reference
@@ -1,5 +1,5 @@
 Server was built in debug mode. It will work slowly.
 0
-Obsolete setting [\'multiple_joins_rewriter_version\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog.
+Obsolete setting [\'multiple_joins_rewriter_version\'] is changed. Please check \'SELECT * FROM system.settings WHERE changed AND is_obsolete\' and read the changelog at https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md
 1
 1

From 0e9b41834127fa8945cc7c45607ed706879b1c0d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 10 Nov 2023 02:05:35 +0100
Subject: [PATCH 0285/1097] Simplify uniqCombined

---
 .../AggregateFunctionUniqCombined.cpp         | 113 ++++++------------
 1 file changed, 38 insertions(+), 75 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
index e532f2519c6..861312ec503 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
@@ -42,104 +42,44 @@ namespace ErrorCodes
 namespace
 {
 
-/** Hash function for uniqCombined/uniqCombined64 (based on Ret).
- */
-template <typename T, typename Ret>
-struct AggregateFunctionUniqCombinedTraits
-{
-    static Ret hash(T x)
-    {
-        if constexpr (sizeof(T) > sizeof(UInt64))
-            return static_cast<Ret>(DefaultHash64<T>(x));
-        else
-            return static_cast<Ret>(intHash64(x));
-    }
-};
-
-template <typename Ret>
-struct AggregateFunctionUniqCombinedTraits<UInt128, Ret>
-{
-    static Ret hash(UInt128 x)
-    {
-        return static_cast<Ret>(sipHash64(x));
-    }
-};
-
-template <typename Ret>
-struct AggregateFunctionUniqCombinedTraits<Float32, Ret>
-{
-    static Ret hash(Float32 x)
-    {
-        UInt64 res = bit_cast<UInt64>(x);
-        return static_cast<Ret>(intHash64(res));
-    }
-};
-
-template <typename Ret>
-struct AggregateFunctionUniqCombinedTraits<Float64, Ret>
-{
-    static Ret hash(Float64 x)
-    {
-        UInt64 res = bit_cast<UInt64>(x);
-        return static_cast<Ret>(intHash64(res));
-    }
-};
-
 // Unlike HashTableGrower always grows to power of 2.
 struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<>
 {
     void increaseSize() { increaseSizeDegree(1); }
 };
 
-template <typename Key, UInt8 K>
-struct AggregateFunctionUniqCombinedDataWithKey
+
+template <typename T, UInt8 K, typename HashValueType>
+struct AggregateFunctionUniqCombinedData
 {
+    using Key = std::conditional_t<
+        std::is_same_v<T, String> || std::is_same_v<T, IPv6>,
+        UInt64,
+        HashValueType>;
+
     // TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|,
     //                   at the moment gen-bias-data.py script doesn't work.
 
     // We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
     // The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
     // For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
-    using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>, 16, K - 5 + (sizeof(Key) == sizeof(UInt32)), K, TrivialHash, Key>;
 
-    Set set;
-};
-
-template <typename Key>
-struct AggregateFunctionUniqCombinedDataWithKey<Key, 17>
-{
-    using Set = CombinedCardinalityEstimator<Key,
+    /// Note: I don't recall what is special with '17' - probably it is one of the original functions that has to be compatible.
+    using Set = CombinedCardinalityEstimator<
+        Key,
         HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
         16,
-        12 + (sizeof(Key) == sizeof(UInt32)),
-        17,
+        K - 5 + (sizeof(Key) == sizeof(UInt32)),
+        K,
         TrivialHash,
         Key,
-        HyperLogLogBiasEstimator<UniqCombinedBiasData>,
+        std::conditional_t<K == 17, HyperLogLogBiasEstimator<UniqCombinedBiasData>, TrivialBiasEstimator>,
         HyperLogLogMode::FullFeatured>;
 
     Set set;
 };
 
 
-template <typename T, UInt8 K, typename HashValueType>
-struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey<HashValueType, K>
-{
-};
-
-
-/// For String keys, 64 bit hash is always used (both for uniqCombined and uniqCombined64),
-///  because of backwards compatibility (64 bit hash was already used for uniqCombined).
-template <UInt8 K, typename HashValueType>
-struct AggregateFunctionUniqCombinedData<String, K, HashValueType> : public AggregateFunctionUniqCombinedDataWithKey<UInt64 /*always*/, K>
-{
-};
-
-template <UInt8 K, typename HashValueType>
-struct AggregateFunctionUniqCombinedData<IPv6, K, HashValueType> : public AggregateFunctionUniqCombinedDataWithKey<UInt64 /*always*/, K>
-{
-};
-
 template <typename T, UInt8 K, typename HashValueType>
 class AggregateFunctionUniqCombined final
     : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>
@@ -169,7 +109,30 @@ public:
         else
         {
             const auto & value = assert_cast<const ColumnVector<T> &>(*columns[0]).getElement(row_num);
-            this->data(place).set.insert(AggregateFunctionUniqCombinedTraits<T, HashValueType>::hash(value));
+
+            HashValueType hash;
+
+            if constexpr (std::is_same_v<T, UInt128>)
+            {
+                /// This specialization exists due to historical circumstances.
+                /// Initially UInt128 was introduced only for UUID, and then the other big-integer types were added.
+                hash = static_cast<HashValueType>(sipHash64(value));
+            }
+            else if constexpr (std::is_floating_point_v<T>)
+            {
+                hash = static_cast<HashValueType>(intHash64(bit_cast<UInt64>(value)));
+            }
+            else if constexpr (sizeof(T) > sizeof(UInt64))
+            {
+                hash = static_cast<HashValueType>(DefaultHash64<T>(value));
+            }
+            else
+            {
+                /// This specialization exists also for compatibility with the initial implementation.
+                hash = static_cast<HashValueType>(intHash64(value));
+            }
+
+            this->data(place).set.insert(hash);
         }
     }
 

From 45146d14d3fc0f6ebd3c76fcda03e10aebdf8968 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 10 Nov 2023 02:24:18 +0100
Subject: [PATCH 0286/1097] sumMap: simplification (one lookup instead of two)

---
 .../AggregateFunctionSumMap.cpp               | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index a516721ee52..9dcca2bea77 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -222,10 +222,14 @@ public:
                 if (!keepKey(key))
                     continue;
 
-                decltype(merged_maps.begin()) it;
-                it = merged_maps.find(key);
+                auto [it, inserted] = merged_maps.emplace(key, Array());
 
-                if (it != merged_maps.end())
+                if (inserted)
+                {
+                    it->second.resize(size);
+                    it->second[col] = value;
+                }
+                else
                 {
                     if (!value.isNull())
                     {
@@ -235,15 +239,6 @@ public:
                             applyVisitor(Visitor(value), it->second[col]);
                     }
                 }
-                else
-                {
-                    // Create a value array for this key
-                    Array new_values;
-                    new_values.resize(size);
-                    new_values[col] = value;
-
-                    merged_maps.emplace(key, std::move(new_values));
-                }
             }
         }
     }

From 16578d97a384098ad5c1133db0c271ffa1c35985 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Thu, 9 Nov 2023 17:59:35 +0800
Subject: [PATCH 0287/1097] Fix two cases of projection analysis.

1. Fix projection analysis with partitions.
2. Apply alter conversions during projection analysis.
---
 .../optimizeUseAggregateProjection.cpp        | 105 ++++++++++--------
 .../optimizeUseNormalProjection.cpp           |  51 +++++----
 .../Optimizations/projectionsCommon.cpp       |  17 ++-
 .../Optimizations/projectionsCommon.h         |   3 +-
 .../QueryPlan/ReadFromMergeTree.cpp           |  17 +--
 src/Processors/QueryPlan/ReadFromMergeTree.h  |   9 +-
 src/Storages/MergeTree/MergeTreeData.cpp      |   9 +-
 src/Storages/MergeTree/MergeTreeData.h        |   5 -
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  10 +-
 .../MergeTree/MergeTreeDataSelectExecutor.h   |   8 +-
 src/Storages/MergeTree/PartitionPruner.cpp    |   2 +-
 src/Storages/MergeTree/PartitionPruner.h      |   5 +-
 src/Storages/StorageMergeTree.cpp             |   6 +-
 ...jection_analysis_reuse_partition.reference |   1 +
 ...710_projection_analysis_reuse_partition.sh |  16 +++
 ...rojection_with_alter_conversions.reference |   1 +
 ...1710_projection_with_alter_conversions.sql |  15 +++
 17 files changed, 169 insertions(+), 111 deletions(-)
 create mode 100644 tests/queries/0_stateless/01710_projection_analysis_reuse_partition.reference
 create mode 100755 tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh
 create mode 100644 tests/queries/0_stateless/01710_projection_with_alter_conversions.reference
 create mode 100644 tests/queries/0_stateless/01710_projection_with_alter_conversions.sql

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index b0f3ac70042..6b03a522b97 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -411,7 +411,6 @@ struct MinMaxProjectionCandidate
 {
     AggregateProjectionCandidate candidate;
     Block block;
-    MergeTreeData::DataPartsVector normal_parts;
 };
 
 struct AggregateProjectionCandidates
@@ -477,7 +476,6 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
         {
             // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG());
             AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(proj_dag)};
-            MergeTreeData::DataPartsVector minmax_projection_normal_parts;
 
             // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block {}", sample_block.dumpStructure());
             auto block = reading.getMergeTreeData().getMinMaxCountProjectionBlock(
@@ -486,13 +484,12 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
                 dag.filter_node != nullptr,
                 query_info,
                 parts,
-                minmax_projection_normal_parts,
                 max_added_blocks.get(),
                 context);
 
             // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block 2 {}", block.dumpStructure());
 
-            // minmax_count_projection cannot be used used when there is no data to process, because
+            // minmax_count_projection cannot be used when there is no data to process, because
             // it will produce incorrect result during constant aggregation.
             // See https://github.com/ClickHouse/ClickHouse/issues/36728
             if (block)
@@ -500,7 +497,6 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
                 MinMaxProjectionCandidate minmax;
                 minmax.candidate = std::move(candidate);
                 minmax.block = std::move(block);
-                minmax.normal_parts = std::move(minmax_projection_normal_parts);
                 minmax.candidate.projection = projection;
                 candidates.minmax_projection.emplace(std::move(minmax));
             }
@@ -509,6 +505,18 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
 
     if (!candidates.minmax_projection)
     {
+        auto it = std::find_if(agg_projections.begin(), agg_projections.end(), [&](const auto * projection)
+        {
+            return projection->name == context->getSettings().preferred_optimize_projection_name.value;
+        });
+
+        if (it != agg_projections.end())
+        {
+            const ProjectionDescription * preferred_projection = *it;
+            agg_projections.clear();
+            agg_projections.push_back(preferred_projection);
+        }
+
         candidates.real.reserve(agg_projections.size());
         for (const auto * projection : agg_projections)
         {
@@ -570,57 +578,66 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
 
     auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks, allow_implicit_projections);
 
-    AggregateProjectionCandidate * best_candidate = nullptr;
-    if (candidates.minmax_projection)
-        best_candidate = &candidates.minmax_projection->candidate;
-    else if (candidates.real.empty())
-        return false;
-
     const auto & parts = reading->getParts();
+    const auto & alter_conversions = reading->getAlterConvertionsForParts();
     const auto & query_info = reading->getQueryInfo();
     const auto metadata = reading->getStorageMetadata();
     ContextPtr context = reading->getContext();
     MergeTreeDataSelectExecutor reader(reading->getMergeTreeData());
-
-    auto ordinary_reading_select_result = reading->selectRangesToRead(parts, /* alter_conversions = */ {});
-    size_t ordinary_reading_marks = ordinary_reading_select_result->marks();
-
-    const auto & proj_name_from_settings = context->getSettings().preferred_optimize_projection_name.value;
-    bool found_best_candidate = false;
-
-    /// Selecting best candidate.
-    for (auto & candidate : candidates.real)
+    AggregateProjectionCandidate * best_candidate = nullptr;
+    if (candidates.minmax_projection)
     {
-        auto required_column_names = candidate.dag->getRequiredColumnsNames();
-        ActionDAGNodes added_filter_nodes;
-        if (candidates.has_filter)
-            added_filter_nodes.nodes.push_back(candidate.dag->getOutputs().front());
+        best_candidate = &candidates.minmax_projection->candidate;
+    }
+    else if (!candidates.real.empty())
+    {
+        auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions);
+        size_t ordinary_reading_marks = ordinary_reading_select_result->marks();
+        const auto & parts_with_ranges = ordinary_reading_select_result->partsWithRanges();
 
-        bool analyzed = analyzeProjectionCandidate(
-            candidate, *reading, reader, required_column_names, parts,
-            metadata, query_info, context, max_added_blocks, added_filter_nodes);
-
-        if (!analyzed)
-            continue;
-
-        if (candidate.sum_marks > ordinary_reading_marks)
-            continue;
-
-        if ((best_candidate == nullptr || best_candidate->sum_marks > candidate.sum_marks) && !found_best_candidate)
-            best_candidate = &candidate;
-        if (!proj_name_from_settings.empty() && candidate.projection->name == proj_name_from_settings)
+        /// Selecting best candidate.
+        for (auto & candidate : candidates.real)
         {
-            best_candidate = &candidate;
-            found_best_candidate = true;
+            auto required_column_names = candidate.dag->getRequiredColumnsNames();
+            ActionDAGNodes added_filter_nodes;
+            if (candidates.has_filter)
+                added_filter_nodes.nodes.push_back(candidate.dag->getOutputs().front());
+
+            bool analyzed = analyzeProjectionCandidate(
+                candidate,
+                *reading,
+                reader,
+                required_column_names,
+                parts_with_ranges,
+                metadata,
+                query_info,
+                context,
+                max_added_blocks,
+                added_filter_nodes);
+
+            if (!analyzed)
+                continue;
+
+            if (candidate.sum_marks > ordinary_reading_marks)
+                continue;
+
+            if (best_candidate == nullptr || best_candidate->sum_marks > candidate.sum_marks)
+                best_candidate = &candidate;
+        }
+
+        if (!best_candidate)
+        {
+            reading->setAnalyzedResult(std::move(ordinary_reading_select_result));
+            return false;
         }
     }
-
-    if (!best_candidate)
+    else
     {
-        reading->setAnalyzedResult(std::move(ordinary_reading_select_result));
         return false;
     }
 
+    chassert(best_candidate != nullptr);
+
     QueryPlanStepPtr projection_reading;
     bool has_ordinary_parts;
 
@@ -641,9 +658,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
                       .storage_id = reading->getMergeTreeData().getStorageID(),
                       .projection_name = candidates.minmax_projection->candidate.projection->name,
                   });
-        has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty();
-        if (has_ordinary_parts)
-            reading->resetParts(std::move(candidates.minmax_projection->normal_parts));
+        has_ordinary_parts = false;
     }
     else
     {
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
index 1d14b7b432a..6da919c3b2d 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
@@ -10,7 +10,7 @@
 #include <Storages/ProjectionsDescription.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
-#include <stack>
+#include <algorithm>
 
 namespace DB::QueryPlanOptimizations
 {
@@ -109,6 +109,19 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
     if (normal_projections.empty())
         return false;
 
+    ContextPtr context = reading->getContext();
+    auto it = std::find_if(normal_projections.begin(), normal_projections.end(), [&](const auto * projection)
+    {
+        return projection->name == context->getSettings().preferred_optimize_projection_name.value;
+    });
+
+    if (it != normal_projections.end())
+    {
+        const ProjectionDescription * preferred_projection = *it;
+        normal_projections.clear();
+        normal_projections.push_back(preferred_projection);
+    }
+
     QueryDAG query;
     {
         auto & child = iter->node->children[iter->next_child - 1];
@@ -124,30 +137,16 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
 
     const Names & required_columns = reading->getRealColumnNames();
     const auto & parts = reading->getParts();
+    const auto & alter_conversions = reading->getAlterConvertionsForParts();
     const auto & query_info = reading->getQueryInfo();
-    ContextPtr context = reading->getContext();
     MergeTreeDataSelectExecutor reader(reading->getMergeTreeData());
 
-    auto ordinary_reading_select_result = reading->selectRangesToRead(parts, /* alter_conversions = */ {});
+    auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions);
     size_t ordinary_reading_marks = ordinary_reading_select_result->marks();
+    const auto & parts_with_ranges = ordinary_reading_select_result->partsWithRanges();
 
     std::shared_ptr<PartitionIdToMaxBlock> max_added_blocks = getMaxAddedBlocks(reading);
 
-    // Here we iterate over the projections and check if we have the same projections as we specified in preferred_projection_name
-    bool is_projection_found = false;
-    const auto & proj_name_from_settings = context->getSettings().preferred_optimize_projection_name.value;
-    if (!proj_name_from_settings.empty())
-    {
-        for (const auto * projection : normal_projections)
-        {
-            if (projection->name == proj_name_from_settings)
-            {
-                is_projection_found = true;
-                break;
-            }
-        }
-    }
-
     for (const auto * projection : normal_projections)
     {
         if (!hasAllRequiredColumns(projection, required_columns))
@@ -161,8 +160,16 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
             added_filter_nodes.nodes.push_back(query.filter_node);
 
         bool analyzed = analyzeProjectionCandidate(
-            candidate, *reading, reader, required_columns, parts,
-            metadata, query_info, context, max_added_blocks, added_filter_nodes);
+            candidate,
+            *reading,
+            reader,
+            required_columns,
+            parts_with_ranges,
+            metadata,
+            query_info,
+            context,
+            max_added_blocks,
+            added_filter_nodes);
 
         if (!analyzed)
             continue;
@@ -170,9 +177,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
         if (candidate.sum_marks >= ordinary_reading_marks)
             continue;
 
-        if (!is_projection_found && (best_candidate == nullptr || candidate.sum_marks < best_candidate->sum_marks))
-            best_candidate = &candidate;
-        else if (is_projection_found && projection->name == proj_name_from_settings)
+        if (best_candidate == nullptr || candidate.sum_marks < best_candidate->sum_marks)
             best_candidate = &candidate;
     }
 
diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp
index 7ddda29cad4..c3b3449857b 100644
--- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp
+++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp
@@ -210,7 +210,7 @@ bool analyzeProjectionCandidate(
     const ReadFromMergeTree & reading,
     const MergeTreeDataSelectExecutor & reader,
     const Names & required_column_names,
-    const MergeTreeData::DataPartsVector & parts,
+    const RangesInDataParts & parts_with_ranges,
     const StorageMetadataPtr & metadata,
     const SelectQueryInfo & query_info,
     const ContextPtr & context,
@@ -219,14 +219,20 @@ bool analyzeProjectionCandidate(
 {
     MergeTreeData::DataPartsVector projection_parts;
     MergeTreeData::DataPartsVector normal_parts;
-    for (const auto & part : parts)
+    std::vector<AlterConversionsPtr> alter_conversions;
+    for (const auto & part_with_ranges : parts_with_ranges)
     {
-        const auto & created_projections = part->getProjectionParts();
+        const auto & created_projections = part_with_ranges.data_part->getProjectionParts();
         auto it = created_projections.find(candidate.projection->name);
         if (it != created_projections.end())
+        {
             projection_parts.push_back(it->second);
+        }
         else
-            normal_parts.push_back(part);
+        {
+            normal_parts.push_back(part_with_ranges.data_part);
+            alter_conversions.push_back(part_with_ranges.alter_conversions);
+        }
     }
 
     if (projection_parts.empty())
@@ -252,7 +258,8 @@ bool analyzeProjectionCandidate(
 
     if (!normal_parts.empty())
     {
-        auto normal_result_ptr = reading.selectRangesToRead(std::move(normal_parts), /* alter_conversions = */ {});
+        /// TODO: We can reuse existing analysis_result by filtering out projection parts
+        auto normal_result_ptr = reading.selectRangesToRead(std::move(normal_parts), std::move(alter_conversions));
 
         if (normal_result_ptr->error())
             return false;
diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h
index 35daccad115..055ca5d4084 100644
--- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h
+++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h
@@ -19,6 +19,7 @@ using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr<MergeTreeDataSelect
 class IMergeTreeDataPart;
 using DataPartPtr = std::shared_ptr<const IMergeTreeDataPart>;
 using DataPartsVector = std::vector<DataPartPtr>;
+struct RangesInDataParts;
 
 struct StorageInMemoryMetadata;
 using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
@@ -71,7 +72,7 @@ bool analyzeProjectionCandidate(
     const ReadFromMergeTree & reading,
     const MergeTreeDataSelectExecutor & reader,
     const Names & required_column_names,
-    const DataPartsVector & parts,
+    const RangesInDataParts & parts_with_ranges,
     const StorageMetadataPtr & metadata,
     const SelectQueryInfo & query_info,
     const ContextPtr & context,
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 5ee6a5da94b..617de8c8530 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -2258,10 +2258,7 @@ size_t MergeTreeDataSelectAnalysisResult::marks() const
     if (std::holds_alternative<std::exception_ptr>(result))
         std::rethrow_exception(std::get<std::exception_ptr>(result));
 
-    const auto & index_stats = std::get<ReadFromMergeTree::AnalysisResult>(result).index_stats;
-    if (index_stats.empty())
-        return 0;
-    return index_stats.back().num_granules_after;
+    return std::get<ReadFromMergeTree::AnalysisResult>(result).selected_marks;
 }
 
 UInt64 MergeTreeDataSelectAnalysisResult::rows() const
@@ -2269,9 +2266,15 @@ UInt64 MergeTreeDataSelectAnalysisResult::rows() const
     if (std::holds_alternative<std::exception_ptr>(result))
         std::rethrow_exception(std::get<std::exception_ptr>(result));
 
-    const auto & index_stats = std::get<ReadFromMergeTree::AnalysisResult>(result).index_stats;
-    if (index_stats.empty())
-        return 0;
     return std::get<ReadFromMergeTree::AnalysisResult>(result).selected_rows;
 }
+
+const RangesInDataParts & MergeTreeDataSelectAnalysisResult::partsWithRanges() const
+{
+    if (std::holds_alternative<std::exception_ptr>(result))
+        std::rethrow_exception(std::get<std::exception_ptr>(result));
+
+    return std::get<ReadFromMergeTree::AnalysisResult>(result).parts_with_ranges;
+}
+
 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h
index d5948ddd9bf..35310e14416 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.h
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.h
@@ -197,13 +197,9 @@ public:
     bool hasAnalyzedResult() const { return analyzed_result_ptr != nullptr; }
     void setAnalyzedResult(MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); }
 
-    void resetParts(MergeTreeData::DataPartsVector parts)
-    {
-        prepared_parts = std::move(parts);
-        alter_conversions_for_parts = {};
-    }
-
     const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; }
+    const std::vector<AlterConversionsPtr> & getAlterConvertionsForParts() const { return alter_conversions_for_parts; }
+
     const MergeTreeData & getMergeTreeData() const { return data; }
     size_t getMaxBlockSize() const { return block_size.max_block_size_rows; }
     size_t getNumStreams() const { return requested_num_streams; }
@@ -310,6 +306,7 @@ struct MergeTreeDataSelectAnalysisResult
     bool error() const;
     size_t marks() const;
     UInt64 rows() const;
+    const RangesInDataParts & partsWithRanges() const;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e8a0b290dc9..1c0f9208fef 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -6498,7 +6498,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
     bool has_filter,
     const SelectQueryInfo & query_info,
     const DataPartsVector & parts,
-    DataPartsVector & normal_parts,
     const PartitionIdToMaxBlock * max_block_numbers_to_read,
     ContextPtr query_context) const
 {
@@ -6623,11 +6622,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
                 continue;
         }
 
+        /// It's extremely rare that some parts have final marks while others don't. To make it
+        /// straightforward, disable minmax_count projection when `max(pk)' encounters any part with
+        /// no final mark.
         if (need_primary_key_max_column && !part->index_granularity.hasFinalMark())
-        {
-            normal_parts.push_back(part);
-            continue;
-        }
+            return {};
 
         real_parts.push_back(part);
         filter_column_data.back() = 1;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 07de33aaf58..54104849fe4 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -401,17 +401,12 @@ public:
     /// query_info - used to filter unneeded parts
     ///
     /// parts - part set to filter
-    ///
-    /// normal_parts - collects parts that don't have all the needed values to form the block.
-    /// Specifically, this is when a part doesn't contain a final mark and the related max value is
-    /// required.
     Block getMinMaxCountProjectionBlock(
         const StorageMetadataPtr & metadata_snapshot,
         const Names & required_columns,
         bool has_filter,
         const SelectQueryInfo & query_info,
         const DataPartsVector & parts,
-        DataPartsVector & normal_parts,
         const PartitionIdToMaxBlock * max_block_numbers_to_read,
         ContextPtr query_context) const;
 
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index be5e7c5a938..e521491c2d5 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -828,8 +828,8 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
 }
 
 void MergeTreeDataSelectExecutor::filterPartsByPartition(
-    std::optional<PartitionPruner> & partition_pruner,
-    std::optional<KeyCondition> & minmax_idx_condition,
+    const std::optional<PartitionPruner> & partition_pruner,
+    const std::optional<KeyCondition> & minmax_idx_condition,
     MergeTreeData::DataPartsVector & parts,
     std::vector<AlterConversionsPtr> & alter_conversions,
     const std::optional<std::unordered_set<String>> & part_values,
@@ -1288,6 +1288,8 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar
     selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried);
 
     std::optional<ReadFromMergeTree::Indexes> indexes;
+    /// NOTE: We don't need alter_conversions because the returned analysis_result is only used for:
+    /// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions.
     return ReadFromMergeTree::selectRangesToRead(
         std::move(parts),
         /*alter_conversions=*/ {},
@@ -1824,7 +1826,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead(
     const std::optional<std::unordered_set<String>> & part_values,
     const std::optional<KeyCondition> & minmax_idx_condition,
     const DataTypes & minmax_columns_types,
-    std::optional<PartitionPruner> & partition_pruner,
+    const std::optional<PartitionPruner> & partition_pruner,
     const PartitionIdToMaxBlock * max_block_numbers_to_read,
     PartFilterCounters & counters)
 {
@@ -1886,7 +1888,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
     MergeTreeData::PinnedPartUUIDsPtr pinned_part_uuids,
     const std::optional<KeyCondition> & minmax_idx_condition,
     const DataTypes & minmax_columns_types,
-    std::optional<PartitionPruner> & partition_pruner,
+    const std::optional<PartitionPruner> & partition_pruner,
     const PartitionIdToMaxBlock * max_block_numbers_to_read,
     ContextPtr query_context,
     PartFilterCounters & counters,
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index d5d8107db48..01c2da9dd63 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -126,7 +126,7 @@ private:
         const std::optional<std::unordered_set<String>> & part_values,
         const std::optional<KeyCondition> & minmax_idx_condition,
         const DataTypes & minmax_columns_types,
-        std::optional<PartitionPruner> & partition_pruner,
+        const std::optional<PartitionPruner> & partition_pruner,
         const PartitionIdToMaxBlock * max_block_numbers_to_read,
         PartFilterCounters & counters);
 
@@ -138,7 +138,7 @@ private:
         MergeTreeData::PinnedPartUUIDsPtr pinned_part_uuids,
         const std::optional<KeyCondition> & minmax_idx_condition,
         const DataTypes & minmax_columns_types,
-        std::optional<PartitionPruner> & partition_pruner,
+        const std::optional<PartitionPruner> & partition_pruner,
         const PartitionIdToMaxBlock * max_block_numbers_to_read,
         ContextPtr query_context,
         PartFilterCounters & counters,
@@ -178,8 +178,8 @@ public:
 
     /// Filter parts using minmax index and partition key.
     static void filterPartsByPartition(
-        std::optional<PartitionPruner> & partition_pruner,
-        std::optional<KeyCondition> & minmax_idx_condition,
+        const std::optional<PartitionPruner> & partition_pruner,
+        const std::optional<KeyCondition> & minmax_idx_condition,
         MergeTreeData::DataPartsVector & parts,
         std::vector<AlterConversionsPtr> & alter_conversions,
         const std::optional<std::unordered_set<String>> & part_values,
diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp
index 97bb9f3b4d4..a5df08e3df9 100644
--- a/src/Storages/MergeTree/PartitionPruner.cpp
+++ b/src/Storages/MergeTree/PartitionPruner.cpp
@@ -31,7 +31,7 @@ PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAG
 {
 }
 
-bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part)
+bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) const
 {
     if (part.isEmpty())
         return true;
diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h
index 7f1b74795c4..e8a740b1524 100644
--- a/src/Storages/MergeTree/PartitionPruner.h
+++ b/src/Storages/MergeTree/PartitionPruner.h
@@ -16,14 +16,15 @@ public:
     PartitionPruner(const StorageMetadataPtr & metadata, const SelectQueryInfo & query_info, ContextPtr context, bool strict);
     PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict);
 
-    bool canBePruned(const IMergeTreeDataPart & part);
+    bool canBePruned(const IMergeTreeDataPart & part) const;
 
     bool isUseless() const { return useless; }
 
     const KeyCondition & getKeyCondition() const { return partition_condition; }
 
 private:
-    std::unordered_map<String, bool> partition_filter_map;
+    /// Cache already analyzed partitions.
+    mutable std::unordered_map<String, bool> partition_filter_map;
 
     /// partition_key is adjusted here (with substitution from modulo to moduloLegacy).
     KeyDescription partition_key;
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 474171ba1b1..b8e82e2bfa9 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -341,6 +341,8 @@ void StorageMergeTree::alter(
                     prev_mutation = it->first;
             }
 
+            /// Always wait previous mutations synchronously, because alters
+            /// should be executed in sequential order.
             if (prev_mutation != 0)
             {
                 LOG_DEBUG(log, "Cannot change metadata with barrier alter query, will wait for mutation {}", prev_mutation);
@@ -368,9 +370,7 @@ void StorageMergeTree::alter(
             resetObjectColumnsFromActiveParts(parts_lock);
         }
 
-        /// Always execute required mutations synchronously, because alters
-        /// should be executed in sequential order.
-        if (!maybe_mutation_commands.empty())
+        if (!maybe_mutation_commands.empty() && local_context->getSettingsRef().alter_sync > 0)
             waitForMutation(mutation_version, false);
     }
 
diff --git a/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.reference b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.reference
new file mode 100644
index 00000000000..47b07da250f
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.reference
@@ -0,0 +1 @@
+Selected 2/2 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
diff --git a/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh
new file mode 100755
index 00000000000..4cdbb083c04
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} -q "drop table if exists t"
+${CLICKHOUSE_CLIENT} -q "create table t(s LowCardinality(String), e DateTime64(3), projection p1 (select * order by s, e)) engine MergeTree partition by toYYYYMM(e) order by tuple()"
+${CLICKHOUSE_CLIENT} -q "insert into t select 'AAP', toDateTime('2023-07-01') + 360 * number from numbers(50000)"
+${CLICKHOUSE_CLIENT} -q "insert into t select 'AAPL', toDateTime('2023-07-01') + 360 * number from numbers(50000)"
+
+CLICKHOUSE_CLIENT_DEBUG_LOG=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g')
+
+${CLICKHOUSE_CLIENT_DEBUG_LOG} -q "select count() from t where e >= '2023-11-08 00:00:00.000' and e < '2023-11-09 00:00:00.000' and s in ('AAPL') format Null" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$"
+
+${CLICKHOUSE_CLIENT} -q "drop table t"
diff --git a/tests/queries/0_stateless/01710_projection_with_alter_conversions.reference b/tests/queries/0_stateless/01710_projection_with_alter_conversions.reference
new file mode 100644
index 00000000000..9874d6464ab
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_with_alter_conversions.reference
@@ -0,0 +1 @@
+1	2
diff --git a/tests/queries/0_stateless/01710_projection_with_alter_conversions.sql b/tests/queries/0_stateless/01710_projection_with_alter_conversions.sql
new file mode 100644
index 00000000000..649a07b9b5f
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_with_alter_conversions.sql
@@ -0,0 +1,15 @@
+drop table if exists t;
+
+create table t (i int, j int, projection p (select i order by i)) engine MergeTree order by tuple();
+
+insert into t values (1, 2);
+
+system stop merges t;
+
+set alter_sync = 0;
+
+alter table t rename column j to k;
+
+select * from t;
+
+drop table t;

From 51ca7d05fb39c4b18aabc1741c0cd4830ae198f5 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Thu, 9 Nov 2023 21:45:51 +0800
Subject: [PATCH 0288/1097] Fix tests

---
 .../Optimizations/optimizeUseAggregateProjection.cpp      | 8 ++++++++
 .../Optimizations/optimizeUseNormalProjection.cpp         | 8 ++++++++
 tests/queries/0_stateless/01710_projections.sql           | 2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index 6b03a522b97..c5e42e76653 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -593,6 +593,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
     {
         auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions);
         size_t ordinary_reading_marks = ordinary_reading_select_result->marks();
+
+        /// Nothing to read. Ignore projections.
+        if (ordinary_reading_marks == 0)
+        {
+            reading->setAnalyzedResult(std::move(ordinary_reading_select_result));
+            return false;
+        }
+
         const auto & parts_with_ranges = ordinary_reading_select_result->partsWithRanges();
 
         /// Selecting best candidate.
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
index 6da919c3b2d..6880d21facb 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
@@ -143,6 +143,14 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
 
     auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions);
     size_t ordinary_reading_marks = ordinary_reading_select_result->marks();
+
+    /// Nothing to read. Ignore projections.
+    if (ordinary_reading_marks == 0)
+    {
+        reading->setAnalyzedResult(std::move(ordinary_reading_select_result));
+        return false;
+    }
+
     const auto & parts_with_ranges = ordinary_reading_select_result->partsWithRanges();
 
     std::shared_ptr<PartitionIdToMaxBlock> max_added_blocks = getMaxAddedBlocks(reading);
diff --git a/tests/queries/0_stateless/01710_projections.sql b/tests/queries/0_stateless/01710_projections.sql
index a96339e30fa..7c45792847e 100644
--- a/tests/queries/0_stateless/01710_projections.sql
+++ b/tests/queries/0_stateless/01710_projections.sql
@@ -1,6 +1,6 @@
 drop table if exists projection_test;
 
-create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by (toStartOfTenMinutes(datetime), domain) settings index_granularity_bytes = 10000000;
+create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by toStartOfTenMinutes(datetime) settings index_granularity_bytes = 10000000;
 
 insert into projection_test with rowNumberInAllBlocks() as id select 1, toDateTime('2020-10-24 00:00:00') + (id / 20), toString(id % 100), * from generateRandom('x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64)', 10, 10, 1) limit 1000 settings max_threads = 1;
 

From a18768382fe54ab6551e237873bd9c027c92dd8f Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 10 Nov 2023 10:22:48 +0800
Subject: [PATCH 0289/1097] Fix tests again

---
 .../0_stateless/01710_projection_analysis_reuse_partition.sh    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh
index 4cdbb083c04..ba8b3818ba3 100755
--- a/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh
+++ b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 ${CLICKHOUSE_CLIENT} -q "drop table if exists t"
-${CLICKHOUSE_CLIENT} -q "create table t(s LowCardinality(String), e DateTime64(3), projection p1 (select * order by s, e)) engine MergeTree partition by toYYYYMM(e) order by tuple()"
+${CLICKHOUSE_CLIENT} -q "create table t(s LowCardinality(String), e DateTime64(3), projection p1 (select * order by s, e)) engine MergeTree partition by toYYYYMM(e) order by tuple() settings index_granularity = 8192, index_granularity_bytes = '100M'"
 ${CLICKHOUSE_CLIENT} -q "insert into t select 'AAP', toDateTime('2023-07-01') + 360 * number from numbers(50000)"
 ${CLICKHOUSE_CLIENT} -q "insert into t select 'AAPL', toDateTime('2023-07-01') + 360 * number from numbers(50000)"
 

From 8c253b9e3e46c416623047d2df876eab0e2eb5ba Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 10 Nov 2023 05:23:50 +0100
Subject: [PATCH 0290/1097] Remove C++ templates

---
 src/Dictionaries/RangeHashedDictionary.h      |   2 -
 src/Functions/CMakeLists.txt                  |   2 +
 src/Functions/CastOverloadResolver.cpp        | 146 ++++++++++++++++++
 src/Functions/CastOverloadResolver.h          | 141 ++---------------
 src/Functions/FunctionsConversion.h           |  23 +--
 src/Functions/FunctionsStringArray.h          |   9 +-
 src/Functions/formatDateTime.cpp              |  17 +-
 src/Functions/parseDateTime.cpp               |   5 +-
 src/Functions/toBool.cpp                      |  17 +-
 src/Interpreters/ActionsDAG.cpp               |   9 +-
 src/Interpreters/castColumn.cpp               |  28 ++--
 src/Interpreters/castColumn.h                 |   8 +-
 .../Formats/Impl/ArrowFieldIndexUtil.h        |  19 ++-
 src/Storages/HDFS/StorageHDFS.cpp             |   1 -
 src/Storages/KVStorageUtils.cpp               |   4 +
 src/Storages/MergeTree/KeyCondition.cpp       |   7 +-
 .../MergeTreeSplitPrewhereIntoReadSteps.cpp   |   5 +-
 src/Storages/PartitionedSink.cpp              |   4 +-
 src/Storages/StorageS3.cpp                    |   2 -
 src/Storages/System/StorageSystemBackups.cpp  |   1 +
 20 files changed, 247 insertions(+), 203 deletions(-)

diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 624a57d65b5..9be9fa1d0d4 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -227,9 +227,7 @@ private:
     struct KeyAttribute final
     {
         RangeStorageTypeContainer<KeyAttributeContainerType> container;
-
         RangeStorageTypeContainer<InvalidIntervalsContainerType> invalid_intervals_container;
-
     };
 
     void createAttributes();
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 31f7f24eb13..57904a8ca1c 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -9,6 +9,7 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources
     FunctionHelpers.cpp
     extractTimeZoneFromFunctionArguments.cpp
     FunctionsLogical.cpp
+    CastOverloadResolver.cpp
 )
 extract_into_parent_list(clickhouse_functions_headers dbms_headers
     IFunction.h
@@ -16,6 +17,7 @@ extract_into_parent_list(clickhouse_functions_headers dbms_headers
     FunctionHelpers.h
     extractTimeZoneFromFunctionArguments.h
     FunctionsLogical.h
+    CastOverloadResolver.h
 )
 
 add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources})
diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp
index 20a08e3b60b..7fc46db50f1 100644
--- a/src/Functions/CastOverloadResolver.cpp
+++ b/src/Functions/CastOverloadResolver.cpp
@@ -1,10 +1,156 @@
 #include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsConversion.h>
 #include <Functions/CastOverloadResolver.h>
+#include <Interpreters/parseColumnsListForTableFunction.h>
 
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+/** CastInternal does not preserve nullability of the data type,
+  * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1).
+  *
+  * Cast preserves nullability according to setting `cast_keep_nullable`,
+  * i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1.
+  */
+template <CastType cast_type, bool internal, typename CastName, typename FunctionName>
+class CastOverloadResolverImpl : public IFunctionOverloadResolver
+{
+public:
+    using MonotonicityForRange = FunctionCastBase::MonotonicityForRange;
+
+    static constexpr auto name = cast_type == CastType::accurate
+        ? CastName::accurate_cast_name
+        : (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name);
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 2; }
+
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
+    explicit CastOverloadResolverImpl(ContextPtr context_, std::optional<CastDiagnostic> diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_)
+        : context(context_)
+        , diagnostic(std::move(diagnostic_))
+        , keep_nullable(keep_nullable_)
+        , data_type_validation_settings(data_type_validation_settings_)
+    {
+    }
+
+    static FunctionOverloadResolverPtr create(ContextPtr context)
+    {
+        const auto & settings_ref = context->getSettingsRef();
+
+        if constexpr (internal)
+            return createImpl(context, {}, false /*keep_nullable*/);
+
+        return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref));
+    }
+
+    static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional<CastDiagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
+    {
+        assert(!internal || !keep_nullable);
+        return std::make_unique<CastOverloadResolverImpl>(context, std::move(diagnostic), keep_nullable, data_type_validation_settings);
+    }
+
+    static FunctionOverloadResolverPtr createImpl(std::optional<CastDiagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
+    {
+        assert(!internal || !keep_nullable);
+        return std::make_unique<CastOverloadResolverImpl>(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings);
+    }
+
+protected:
+
+    FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
+    {
+        DataTypes data_types(arguments.size());
+
+        for (size_t i = 0; i < arguments.size(); ++i)
+            data_types[i] = arguments[i].type;
+
+        auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get());
+        return std::make_unique<FunctionCast<FunctionName>>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type);
+    }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        const auto & column = arguments.back().column;
+        if (!column)
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
+                "Instead there is non-constant column of type {}", getName(), arguments.back().type->getName());
+
+        const auto * type_col = checkAndGetColumnConst<ColumnString>(column.get());
+        if (!type_col)
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
+                "Instead there is a column with the following structure: {}", getName(), column->dumpStructure());
+
+        DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue<String>());
+        validateDataType(type, data_type_validation_settings);
+
+        if constexpr (cast_type == CastType::accurateOrNull)
+            return makeNullable(type);
+
+        if constexpr (internal)
+            return type;
+
+        if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable())
+            return makeNullable(type);
+
+        return type;
+    }
+
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForNothing() const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+
+private:
+    ContextPtr context;
+    std::optional<CastDiagnostic> diagnostic;
+    bool keep_nullable;
+    DataTypeValidationSettings data_type_validation_settings;
+};
+
+
+struct CastOverloadName
+{
+    static constexpr auto cast_name = "CAST";
+    static constexpr auto accurate_cast_name = "accurateCast";
+    static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull";
+};
+
+struct CastInternalOverloadName
+{
+    static constexpr auto cast_name = "_CAST";
+    static constexpr auto accurate_cast_name = "accurate_Cast";
+    static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull";
+};
+
+template <CastType cast_type>
+using CastOverloadResolver = CastOverloadResolverImpl<cast_type, false, CastOverloadName, CastName>;
+
+template <CastType cast_type>
+using CastInternalOverloadResolver = CastOverloadResolverImpl<cast_type, true, CastInternalOverloadName, CastInternalName>;
+
+
+FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic)
+{
+    switch (type)
+    {
+        case CastType::nonAccurate:
+            return CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(diagnostic);
+        case CastType::accurate:
+            return CastInternalOverloadResolver<CastType::accurate>::createImpl(diagnostic);
+        case CastType::accurateOrNull:
+            return CastInternalOverloadResolver<CastType::accurateOrNull>::createImpl(diagnostic);
+    }
+}
+
+
 REGISTER_FUNCTION(CastOverloadResolvers)
 {
     factory.registerFunction<CastInternalOverloadResolver<CastType::nonAccurate>>({}, FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/CastOverloadResolver.h b/src/Functions/CastOverloadResolver.h
index 670cd364a29..4346478e5b6 100644
--- a/src/Functions/CastOverloadResolver.h
+++ b/src/Functions/CastOverloadResolver.h
@@ -1,138 +1,29 @@
 #pragma once
-#include <Functions/FunctionsConversion.h>
-#include <Interpreters/parseColumnsListForTableFunction.h>
+
+#include <memory>
+#include <optional>
+#include <Interpreters/Context_fwd.h>
+
 
 namespace DB
 {
 
-namespace ErrorCodes
+class IFunctionOverloadResolver;
+using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
+
+enum class CastType
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
-
-/** CastInternal does not preserve nullability of the data type,
-  * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1).
-  *
-  * Cast preserves nullability according to setting `cast_keep_nullable`,
-  * i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1.
-  */
-template <CastType cast_type, bool internal, typename CastName, typename FunctionName>
-class CastOverloadResolverImpl : public IFunctionOverloadResolver
-{
-public:
-    using MonotonicityForRange = FunctionCastBase::MonotonicityForRange;
-    using Diagnostic = FunctionCastBase::Diagnostic;
-
-    static constexpr auto name = cast_type == CastType::accurate
-        ? CastName::accurate_cast_name
-        : (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name);
-
-    String getName() const override { return name; }
-
-    size_t getNumberOfArguments() const override { return 2; }
-
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
-
-    explicit CastOverloadResolverImpl(ContextPtr context_, std::optional<Diagnostic> diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_)
-        : context(context_)
-        , diagnostic(std::move(diagnostic_))
-        , keep_nullable(keep_nullable_)
-        , data_type_validation_settings(data_type_validation_settings_)
-    {
-    }
-
-    static FunctionOverloadResolverPtr create(ContextPtr context)
-    {
-        const auto & settings_ref = context->getSettingsRef();
-
-        if constexpr (internal)
-            return createImpl(context, {}, false /*keep_nullable*/);
-
-        return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref));
-    }
-
-    static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional<Diagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
-    {
-        assert(!internal || !keep_nullable);
-        return std::make_unique<CastOverloadResolverImpl>(context, std::move(diagnostic), keep_nullable, data_type_validation_settings);
-    }
-
-    static FunctionOverloadResolverPtr createImpl(std::optional<Diagnostic> diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {})
-    {
-        assert(!internal || !keep_nullable);
-        return std::make_unique<CastOverloadResolverImpl>(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings);
-    }
-
-protected:
-
-    FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
-    {
-        DataTypes data_types(arguments.size());
-
-        for (size_t i = 0; i < arguments.size(); ++i)
-            data_types[i] = arguments[i].type;
-
-        auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get());
-        return std::make_unique<FunctionCast<FunctionName>>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type);
-    }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        const auto & column = arguments.back().column;
-        if (!column)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
-                "Instead there is non-constant column of type {}", getName(), arguments.back().type->getName());
-
-        const auto * type_col = checkAndGetColumnConst<ColumnString>(column.get());
-        if (!type_col)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. "
-                "Instead there is a column with the following structure: {}", getName(), column->dumpStructure());
-
-        DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue<String>());
-        validateDataType(type, data_type_validation_settings);
-
-        if constexpr (cast_type == CastType::accurateOrNull)
-            return makeNullable(type);
-
-        if constexpr (internal)
-            return type;
-
-        if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable())
-            return makeNullable(type);
-
-        return type;
-    }
-
-    bool useDefaultImplementationForNulls() const override { return false; }
-    bool useDefaultImplementationForNothing() const override { return false; }
-    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
-
-private:
-    ContextPtr context;
-    std::optional<Diagnostic> diagnostic;
-    bool keep_nullable;
-    DataTypeValidationSettings data_type_validation_settings;
+    nonAccurate,
+    accurate,
+    accurateOrNull
 };
 
-
-struct CastOverloadName
+struct CastDiagnostic
 {
-    static constexpr auto cast_name = "CAST";
-    static constexpr auto accurate_cast_name = "accurateCast";
-    static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull";
+    std::string column_from;
+    std::string column_to;
 };
 
-struct CastInternalOverloadName
-{
-    static constexpr auto cast_name = "_CAST";
-    static constexpr auto accurate_cast_name = "accurate_Cast";
-    static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull";
-};
-
-template <CastType cast_type>
-using CastOverloadResolver = CastOverloadResolverImpl<cast_type, false, CastOverloadName, CastName>;
-
-template <CastType cast_type>
-using CastInternalOverloadResolver = CastOverloadResolverImpl<cast_type, true, CastInternalOverloadName, CastInternalName>;
+FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic);
 
 }
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index d5f1f175a37..b0a262ff36b 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -53,6 +53,7 @@
 #include <Functions/toFixedString.h>
 #include <Functions/TransformDateTime64.h>
 #include <Functions/FunctionsCodingIP.h>
+#include <Functions/CastOverloadResolver.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <Columns/ColumnLowCardinality.h>
 #include <Interpreters/Context.h>
@@ -3127,14 +3128,8 @@ class ExecutableFunctionCast : public IExecutableFunction
 public:
     using WrapperType = std::function<ColumnPtr(ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t)>;
 
-    struct Diagnostic
-    {
-        std::string column_from;
-        std::string column_to;
-    };
-
     explicit ExecutableFunctionCast(
-            WrapperType && wrapper_function_, const char * name_, std::optional<Diagnostic> diagnostic_)
+            WrapperType && wrapper_function_, const char * name_, std::optional<CastDiagnostic> diagnostic_)
             : wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {}
 
     String getName() const override { return name; }
@@ -3170,24 +3165,16 @@ protected:
 private:
     WrapperType wrapper_function;
     const char * name;
-    std::optional<Diagnostic> diagnostic;
+    std::optional<CastDiagnostic> diagnostic;
 };
 
 struct CastName { static constexpr auto name = "CAST"; };
 struct CastInternalName { static constexpr auto name = "_CAST"; };
 
-enum class CastType
-{
-    nonAccurate,
-    accurate,
-    accurateOrNull
-};
-
 class FunctionCastBase : public IFunctionBase
 {
 public:
     using MonotonicityForRange = std::function<Monotonicity(const IDataType &, const Field &, const Field &)>;
-    using Diagnostic = ExecutableFunctionCast::Diagnostic;
 };
 
 template <typename FunctionName>
@@ -3201,7 +3188,7 @@ public:
             , MonotonicityForRange && monotonicity_for_range_
             , const DataTypes & argument_types_
             , const DataTypePtr & return_type_
-            , std::optional<Diagnostic> diagnostic_
+            , std::optional<CastDiagnostic> diagnostic_
             , CastType cast_type_)
         : cast_name(cast_name_), monotonicity_for_range(std::move(monotonicity_for_range_))
         , argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_))
@@ -3251,7 +3238,7 @@ private:
     DataTypes argument_types;
     DataTypePtr return_type;
 
-    std::optional<Diagnostic> diagnostic;
+    std::optional<CastDiagnostic> diagnostic;
     CastType cast_type;
     ContextPtr context;
 
diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h
index d7d7e3b5100..22b19daf149 100644
--- a/src/Functions/FunctionsStringArray.h
+++ b/src/Functions/FunctionsStringArray.h
@@ -9,15 +9,16 @@
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionHelpers.h>
-#include <Functions/FunctionsConversion.h>
 #include <Functions/IFunction.h>
 #include <Functions/Regexps.h>
+#include <Interpreters/Context.h>
 #include <IO/WriteHelpers.h>
-#include <Interpreters/Context_fwd.h>
+#include <Interpreters/castColumn.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/assert_cast.h>
 #include <Common/typeid_cast.h>
 
+
 namespace DB
 {
 
@@ -910,9 +911,7 @@ private:
         }
         else
         {
-            ColumnsWithTypeAndName cols;
-            cols.emplace_back(col_arr.getDataPtr(), nested_type, "tmp");
-            return ConvertImplGenericToString<ColumnString>::execute(cols, std::make_shared<DataTypeString>(), col_arr.size());
+            return castColumn({col_arr.getDataPtr(), nested_type, "tmp"}, std::make_shared<DataTypeString>());
         }
     }
 
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index 7b93f5e063a..1fb47832418 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -9,12 +9,14 @@
 #include <Functions/DateTimeTransforms.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
-#include <Functions/FunctionsConversion.h>
 #include <Functions/IFunction.h>
 #include <Functions/castTypeToEither.h>
 #include <Functions/extractTimeZoneFromFunctionArguments.h>
 #include <Functions/numLiteralChars.h>
 
+#include <Interpreters/Context.h>
+#include <Interpreters/castColumn.h>
+
 #include <IO/WriteHelpers.h>
 
 #include <Common/Concepts.h>
@@ -803,18 +805,7 @@ public:
         {
             if (arguments.size() == 1)
             {
-                if (!castType(arguments[0].type.get(), [&](const auto & type)
-                    {
-                        using FromDataType = std::decay_t<decltype(type)>;
-                        res = ConvertImpl<FromDataType, DataTypeDateTime, Name>::execute(arguments, result_type, input_rows_count);
-                        return true;
-                    }))
-                {
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                                    "Illegal column {} of function {}, must be Integer, Date, Date32, DateTime "
-                                    "or DateTime64 when arguments size is 1.",
-                                    arguments[0].column->getName(), getName());
-                }
+                return castColumn(arguments[0], result_type);
             }
             else
             {
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index fdab85c4640..c6721b29c1c 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -7,15 +7,16 @@
 
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
-#include <Functions/FunctionsConversion.h>
 #include <Functions/IFunction.h>
 #include <Functions/castTypeToEither.h>
 #include <Functions/numLiteralChars.h>
 
+#include <Interpreters/Context.h>
+
 #include <IO/WriteHelpers.h>
-#include <base/types.h>
 #include <boost/algorithm/string/case_conv.hpp>
 
+
 namespace DB
 {
 namespace ErrorCodes
diff --git a/src/Functions/toBool.cpp b/src/Functions/toBool.cpp
index 765da0c3206..6f2c436c1ea 100644
--- a/src/Functions/toBool.cpp
+++ b/src/Functions/toBool.cpp
@@ -1,6 +1,5 @@
 #include <Functions/IFunction.h>
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsConversion.h>
 #include <Functions/CastOverloadResolver.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeString.h>
@@ -12,6 +11,14 @@ namespace
 {
     class FunctionToBool : public IFunction
     {
+    private:
+        ContextPtr context;
+
+        static String getReturnTypeName(const DataTypePtr & argument)
+        {
+            return argument->isNullable() ? "Nullable(Bool)" : "Bool";
+        }
+
     public:
         static constexpr auto name = "toBool";
 
@@ -32,8 +39,7 @@ namespace
 
         DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
         {
-            auto bool_type = DataTypeFactory::instance().get("Bool");
-            return arguments[0]->isNullable() ? makeNullable(bool_type) : bool_type;
+            return DataTypeFactory::instance().get(getReturnTypeName(arguments[0]));
         }
 
         ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
@@ -42,18 +48,17 @@ namespace
             {
                 arguments[0],
                 {
-                    DataTypeString().createColumnConst(arguments[0].column->size(), arguments[0].type->isNullable() ? "Nullable(Bool)" : "Bool"),
+                    DataTypeString().createColumnConst(arguments[0].column->size(), getReturnTypeName(arguments[0].type)),
                     std::make_shared<DataTypeString>(),
                     ""
                 }
             };
 
-            FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl();
+            FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
             auto func_cast = func_builder_cast->build(cast_args);
             return func_cast->execute(cast_args, result_type, arguments[0].column->size());
         }
     };
-
 }
 
 REGISTER_FUNCTION(ToBool)
diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 61dd84d6c1d..3aad4cf9247 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -3,9 +3,9 @@
 #include <Analyzer/FunctionNode.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeString.h>
+#include <Columns/ColumnConst.h>
 #include <Functions/IFunction.h>
 #include <Functions/IFunctionAdaptors.h>
-#include <Functions/FunctionsConversion.h>
 #include <Functions/materialize.h>
 #include <Functions/FunctionsLogical.h>
 #include <Functions/CastOverloadResolver.h>
@@ -21,6 +21,7 @@
 #include <base/sort.h>
 #include <Common/JSONBuilder.h>
 
+
 namespace DB
 {
 
@@ -248,7 +249,7 @@ const ActionsDAG::Node & ActionsDAG::addCast(const Node & node_to_cast, const Da
 
     const auto * cast_type_constant_node = &addColumn(std::move(column));
     ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node};
-    FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl();
+    FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
 
     return addFunction(func_builder_cast, std::move(children), result_name);
 }
@@ -1381,9 +1382,9 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
             const auto * right_arg = &actions_dag->addColumn(std::move(column));
             const auto * left_arg = dst_node;
 
-            FunctionCastBase::Diagnostic diagnostic = {dst_node->result_name, res_elem.name};
+            CastDiagnostic diagnostic = {dst_node->result_name, res_elem.name};
             FunctionOverloadResolverPtr func_builder_cast
-                = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(std::move(diagnostic));
+                = createInternalCastOverloadResolver(CastType::nonAccurate, std::move(diagnostic));
 
             NodeRawConstPtrs children = { left_arg, right_arg };
             dst_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {});
diff --git a/src/Interpreters/castColumn.cpp b/src/Interpreters/castColumn.cpp
index 44e669a21ab..906dfb84b14 100644
--- a/src/Interpreters/castColumn.cpp
+++ b/src/Interpreters/castColumn.cpp
@@ -1,13 +1,16 @@
 #include <Interpreters/castColumn.h>
-
-#include <Functions/FunctionsConversion.h>
 #include <Functions/CastOverloadResolver.h>
+#include <Functions/IFunction.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Core/ColumnsWithTypeAndName.h>
+#include <Core/Field.h>
+
 
 namespace DB
 {
 
-template <CastType cast_type = CastType::nonAccurate>
-static ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache = nullptr)
+static ColumnPtr castColumn(CastType cast_type, const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache = nullptr)
 {
     if (arg.type->equals(*type) && cast_type != CastType::accurateOrNull)
         return arg.column;
@@ -23,37 +26,34 @@ static ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr
             ""
         }
     };
-    auto get_cast_func = [&arguments]
+    auto get_cast_func = [cast_type, &arguments]
     {
-        FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver<cast_type>::createImpl();
+
+        FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(cast_type, {});
         return func_builder_cast->build(arguments);
     };
 
     FunctionBasePtr func_cast = cache ? cache->getOrSet(cast_type, from_name, to_name, std::move(get_cast_func)) : get_cast_func();
 
-    if constexpr (cast_type == CastType::accurateOrNull)
-    {
+    if (cast_type == CastType::accurateOrNull)
         return func_cast->execute(arguments, makeNullable(type), arg.column->size());
-    }
     else
-    {
         return func_cast->execute(arguments, type, arg.column->size());
-    }
 }
 
 ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache)
 {
-    return castColumn<CastType::nonAccurate>(arg, type, cache);
+    return castColumn(CastType::nonAccurate, arg, type, cache);
 }
 
 ColumnPtr castColumnAccurate(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache)
 {
-    return castColumn<CastType::accurate>(arg, type, cache);
+    return castColumn(CastType::accurate, arg, type, cache);
 }
 
 ColumnPtr castColumnAccurateOrNull(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache)
 {
-    return castColumn<CastType::accurateOrNull>(arg, type, cache);
+    return castColumn(CastType::accurateOrNull, arg, type, cache);
 }
 
 }
diff --git a/src/Interpreters/castColumn.h b/src/Interpreters/castColumn.h
index 8d2c05025bb..b9ed3403d0f 100644
--- a/src/Interpreters/castColumn.h
+++ b/src/Interpreters/castColumn.h
@@ -2,11 +2,15 @@
 
 #include <tuple>
 #include <Core/ColumnWithTypeAndName.h>
-#include <Functions/FunctionsConversion.h>
+#include <Functions/CastOverloadResolver.h>
+
 
 namespace DB
 {
 
+class IFunctionBase;
+using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
+
 struct InternalCastFunctionCache
 {
 private:
@@ -15,7 +19,7 @@ private:
     std::map<std::tuple<CastType, String, String>, FunctionBasePtr> impl;
     mutable std::mutex mutex;
 public:
-    template<typename Getter>
+    template <typename Getter>
     FunctionBasePtr getOrSet(CastType cast_type, const String & from, const String & to, Getter && getter)
     {
         std::lock_guard lock{mutex};
diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h
index cf8c5848db5..24ffdc10581 100644
--- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h
+++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h
@@ -1,22 +1,33 @@
 #pragma once
+
 #include "config.h"
+
 #if USE_PARQUET || USE_ORC
+
 #include <unordered_map>
 #include <Core/Block.h>
 #include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/NestedUtils.h>
 #include <arrow/type.h>
 #include <arrow/type_fwd.h>
 #include <boost/algorithm/string/case_conv.hpp>
 #include <Common/Exception.h>
+
+
 namespace arrow
 {
-class Schema;
-class DataType;
-class Field;
+    class Schema;
+    class DataType;
+    class Field;
 }
+
+
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int THERE_IS_NO_COLUMN;
@@ -211,5 +222,7 @@ private:
         }
     }
 };
+
 }
+
 #endif
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 2e0e09c4b18..d827353ad8e 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -35,7 +35,6 @@
 
 #include <Formats/ReadSchemaUtils.h>
 #include <Formats/FormatFactory.h>
-#include <Functions/FunctionsConversion.h>
 
 #include <QueryPipeline/QueryPipeline.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp
index 8238886db4e..1884671a41d 100644
--- a/src/Storages/KVStorageUtils.cpp
+++ b/src/Storages/KVStorageUtils.cpp
@@ -1,6 +1,7 @@
 #include <Storages/KVStorageUtils.h>
 
 #include <Columns/ColumnSet.h>
+#include <Columns/ColumnConst.h>
 
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
@@ -12,6 +13,9 @@
 #include <Interpreters/convertFieldToType.h>
 #include <Interpreters/evaluateConstantExpression.h>
 
+#include <Functions/IFunction.h>
+
+
 namespace DB
 {
 
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 4ac1662c741..2932bce4262 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -1,6 +1,9 @@
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Storages/MergeTree/BoolMask.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeString.h>
 #include <DataTypes/FieldToDataType.h>
 #include <DataTypes/getLeastSupertype.h>
 #include <DataTypes/Utils.h>
@@ -10,7 +13,6 @@
 #include <Interpreters/castColumn.h>
 #include <Interpreters/misc.h>
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsConversion.h>
 #include <Functions/indexHint.h>
 #include <Functions/CastOverloadResolver.h>
 #include <Functions/IFunction.h>
@@ -18,6 +20,7 @@
 #include <Common/MortonUtils.h>
 #include <Common/typeid_cast.h>
 #include <Columns/ColumnSet.h>
+#include <Columns/ColumnConst.h>
 #include <Interpreters/convertFieldToType.h>
 #include <Interpreters/Set.h>
 #include <Parsers/queryToString.h>
@@ -1842,7 +1845,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme
                         ColumnsWithTypeAndName arguments{
                             {nullptr, key_expr_type, ""},
                             {DataTypeString().createColumnConst(1, common_type_maybe_nullable->getName()), common_type_maybe_nullable, ""}};
-                        FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl();
+                        FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
                         auto func_cast = func_builder_cast->build(arguments);
 
                         /// If we know the given range only contains one value, then we treat all functions as positive monotonic.
diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
index f858cb95846..43e3b0c505a 100644
--- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
+++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
@@ -1,9 +1,12 @@
 #include <Functions/CastOverloadResolver.h>
 #include <Functions/FunctionsLogical.h>
+#include <Functions/IFunctionAdaptors.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/MergeTree/MergeTreeRangeReader.h>
+#include <DataTypes/DataTypeString.h>
 #include <Interpreters/ExpressionActions.h>
 
+
 namespace DB
 {
 
@@ -160,7 +163,7 @@ const ActionsDAG::Node & addCast(
 
     const auto * cast_type_constant_node = &dag->addColumn(std::move(column));
     ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node};
-    FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl();
+    FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
 
     return addFunction(dag, func_builder_cast, std::move(children), node_remap);
 }
diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp
index 18442a8691f..71c1dd7ab69 100644
--- a/src/Storages/PartitionedSink.cpp
+++ b/src/Storages/PartitionedSink.cpp
@@ -2,8 +2,6 @@
 
 #include <Common/ArenaUtils.h>
 
-#include <Functions/FunctionsConversion.h>
-
 #include <Interpreters/Context.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
@@ -34,7 +32,7 @@ PartitionedSink::PartitionedSink(
     , sample_block(sample_block_)
 {
     ASTs arguments(1, partition_by);
-    ASTPtr partition_by_string = makeASTFunction(FunctionToString::name, std::move(arguments));
+    ASTPtr partition_by_string = makeASTFunction("toString", std::move(arguments));
 
     auto syntax_result = TreeRewriter(context).analyze(partition_by_string, sample_block.getNamesAndTypesList());
     partition_by_expr = ExpressionAnalyzer(partition_by_string, syntax_result, context).getActions(false);
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index a33e5884bf5..63ed84680c9 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -6,8 +6,6 @@
 
 #include <Common/isValidUTF8.h>
 
-#include <Functions/FunctionsConversion.h>
-
 #include <IO/S3Common.h>
 #include <IO/S3/Requests.h>
 #include <IO/ParallelReadBuffer.h>
diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp
index 23578054492..46ab70ff04a 100644
--- a/src/Storages/System/StorageSystemBackups.cpp
+++ b/src/Storages/System/StorageSystemBackups.cpp
@@ -5,6 +5,7 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeMap.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
 #include <Interpreters/Context.h>

From 60a9974adc208240a958806a3e9857bb569e23d4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 10 Nov 2023 05:34:29 +0100
Subject: [PATCH 0291/1097] Fix build

---
 src/Functions/modulo.cpp                       | 2 +-
 src/TableFunctions/TableFunctionDictionary.cpp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp
index 8b8919f7b26..cbc2ec2cd0a 100644
--- a/src/Functions/modulo.cpp
+++ b/src/Functions/modulo.cpp
@@ -176,7 +176,7 @@ REGISTER_FUNCTION(PositiveModulo)
 {
     factory.registerFunction<FunctionPositiveModulo>(FunctionDocumentation
         {
-            .description=R"(
+            .description = R"(
 Calculates the remainder when dividing `a` by `b`. Similar to function `modulo` except that `positiveModulo` always return non-negative number.
 Returns the difference between `a` and the nearest integer not greater than `a` divisible by `b`.
 In other words, the function returning the modulus (modulo) in the terms of Modular Arithmetic.
diff --git a/src/TableFunctions/TableFunctionDictionary.cpp b/src/TableFunctions/TableFunctionDictionary.cpp
index 7506036b1b2..41619f81581 100644
--- a/src/TableFunctions/TableFunctionDictionary.cpp
+++ b/src/TableFunctions/TableFunctionDictionary.cpp
@@ -8,6 +8,7 @@
 
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <Interpreters/evaluateConstantExpression.h>
+#include <Interpreters/Context.h>
 
 #include <Storages/StorageDictionary.h>
 #include <Storages/checkAndGetLiteralArgument.h>

From 7a44e2f478f41c4bf3f20f5f73ec3a6e30e88b2f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 10 Nov 2023 05:36:44 +0100
Subject: [PATCH 0292/1097] Miscellaneous

---
 src/Parsers/ParserSystemQuery.cpp              | 5 -----
 src/TableFunctions/TableFunctionDictionary.cpp | 1 -
 2 files changed, 6 deletions(-)

diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index a26fdc1396b..acf458ea583 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -7,11 +7,6 @@
 #include <Parsers/parseDatabaseAndTableName.h>
 
 #include <magic_enum.hpp>
-#include <base/EnumReflection.h>
-
-namespace ErrorCodes
-{
-}
 
 
 namespace DB
diff --git a/src/TableFunctions/TableFunctionDictionary.cpp b/src/TableFunctions/TableFunctionDictionary.cpp
index 41619f81581..5249487f1f5 100644
--- a/src/TableFunctions/TableFunctionDictionary.cpp
+++ b/src/TableFunctions/TableFunctionDictionary.cpp
@@ -73,7 +73,6 @@ ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr c
     /// otherwise, we get table structure by dictionary structure.
     auto dictionary_structure = external_loader.getDictionaryStructure(dictionary_name, context);
     return ColumnsDescription(StorageDictionary::getNamesAndTypes(dictionary_structure));
-
 }
 
 StoragePtr TableFunctionDictionary::executeImpl(

From 7fb04acc032d1abcc4ad93aab0cc4d33aebd9579 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 10 Nov 2023 06:13:21 +0100
Subject: [PATCH 0293/1097] Miscellaneous

---
 src/Interpreters/InterpreterSystemQuery.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 8fac8deeca5..7161df696d1 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -17,12 +17,10 @@
 #include <Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h>
 #include <Interpreters/EmbeddedDictionaries.h>
 #include <Interpreters/ActionLocksManager.h>
-#include <Interpreters/InterpreterDropQuery.h>
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/InterpreterRenameQuery.h>
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
-#include <Interpreters/PartLog.h>
 #include <Interpreters/QueryThreadLog.h>
 #include <Interpreters/QueryViewsLog.h>
 #include <Interpreters/SessionLog.h>
@@ -57,7 +55,6 @@
 #include <Storages/HDFS/StorageHDFS.h>
 #include <Storages/System/StorageSystemFilesystemCache.h>
 #include <Parsers/ASTSystemQuery.h>
-#include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Common/ThreadFuzzer.h>
 #include <csignal>

From 96f73139b63d38a92e9e35db49b1c0158a1f164f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 10 Nov 2023 06:13:55 +0100
Subject: [PATCH 0294/1097] Check for large translation units

---
 CMakeLists.txt                           |  7 +++++++
 programs/CMakeLists.txt                  |  5 +++++
 utils/check-style/check-large-objects.sh | 10 ++++++++++
 3 files changed, 22 insertions(+)
 create mode 100755 utils/check-style/check-large-objects.sh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0d1ef22b2aa..ef97c13fa1c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -622,3 +622,10 @@ if (NATIVE_BUILD_TARGETS
         COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS}
         COMMAND_ECHO STDOUT)
 endif ()
+
+if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
+    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
+else ()
+    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)
+endif ()
+option(CHECK_LARGE_OBJECT_SIZES "Check that there are no large object files after build." ${CHECK_LARGE_OBJECT_SIZES_DEFAULT})
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index eb4a898d472..8496452e6ea 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -432,6 +432,11 @@ if (USE_BINARY_HASH)
     add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .clickhouse.hash=hash clickhouse COMMENT "Adding section '.clickhouse.hash' to clickhouse binary" VERBATIM)
 endif()
 
+if (CHECK_LARGE_OBJECT_SIZES)
+    add_custom_command(TARGET clickhouse POST_BUILD
+        COMMAND "${CMAKE_SOURCE_DIR}/utils/check-style/check-large-objects.sh" "${CMAKE_BINARY_DIR}")
+endif ()
+
 if (SPLIT_DEBUG_SYMBOLS)
     clickhouse_split_debug_symbols(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH clickhouse)
 else()
diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh
new file mode 100755
index 00000000000..c598ff0e99c
--- /dev/null
+++ b/utils/check-style/check-large-objects.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+# Check that there are no new translation units compiled to an object file larger than a certain size.
+
+if find $1 -name '*.o' | xargs wc -c | grep -v total | sort -rn | awk '{ if ($1 > 50000000) print }' \
+    | grep -v -P 'CastOverloadResolver|AggregateFunctionMax|AggregateFunctionMin|RangeHashedDictionary|Aggregator|AggregateFunctionUniq'
+then
+    echo "^ It's not allowed to have so large translation units."
+    exit 1
+fi

From 2544a15c94bff918f4a9aab934d771f9e2c604d8 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 10 Nov 2023 05:59:24 +0000
Subject: [PATCH 0295/1097] optimization, comment

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 19 +++++++++++--------
 .../InterpreterSelectQueryAnalyzer.cpp        |  3 ++-
 tests/broken_tests.txt                        |  1 +
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index b7bd606b6cf..4e9e3e5f350 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -4724,17 +4724,20 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
         else
         {
             /// Replace storage with values storage of insertion block
-            if (auto * query_node = in_second_argument->as<QueryNode>())
+            if (StoragePtr storage = scope.context->getViewSource())
             {
-                auto table_expression = extractLeftTableExpression(query_node->getJoinTree());
-                if (auto * query_table_node = table_expression->as<TableNode>())
+                if (auto * query_node = in_second_argument->as<QueryNode>())
                 {
-                    if (StoragePtr storage = scope.context->getViewSource(); storage && query_table_node->getStorageID().getFullNameNotQuoted() == storage->getStorageID().getFullTableName())
+                    auto table_expression = extractLeftTableExpression(query_node->getJoinTree());
+                    if (auto * query_table_node = table_expression->as<TableNode>())
                     {
-                        auto replacement_table_expression = std::make_shared<TableNode>(storage, scope.context);
-                        if (std::optional<TableExpressionModifiers> table_expression_modifiers = query_table_node->getTableExpressionModifiers())
-                            replacement_table_expression->setTableExpressionModifiers(*table_expression_modifiers);
-                        in_second_argument = in_second_argument->cloneAndReplace(table_expression, std::move(replacement_table_expression));
+                        if (query_table_node->getStorageID().getFullNameNotQuoted() == storage->getStorageID().getFullNameNotQuoted())
+                        {
+                            auto replacement_table_expression = std::make_shared<TableNode>(storage, scope.context);
+                            if (std::optional<TableExpressionModifiers> table_expression_modifiers = query_table_node->getTableExpressionModifiers())
+                                replacement_table_expression->setTableExpressionModifiers(*table_expression_modifiers);
+                            in_second_argument = in_second_argument->cloneAndReplace(table_expression, std::move(replacement_table_expression));
+                        }
                     }
                 }
             }
diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
index 9e675f0f25a..93073a0e10d 100644
--- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
+++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
@@ -109,7 +109,8 @@ void replaceStorageInQueryTree(QueryTreeNodePtr & query_tree, const ContextPtr &
         }
     }
 
-    if (auto * table_node = table_expression_to_replace->as<TableNode>(); table_node && table_node->getStorageID().getFullNameNotQuoted() != storage->getStorageID().getFullTableName())
+    /// Don't replace storage if table name differs
+    if (auto * table_node = table_expression_to_replace->as<TableNode>(); table_node && table_node->getStorageID().getFullNameNotQuoted() != storage->getStorageID().getFullNameNotQuoted())
         return;
 
     auto replacement_table_expression = std::make_shared<TableNode>(storage, context);
diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt
index 38a51194279..faee1c5b295 100644
--- a/tests/broken_tests.txt
+++ b/tests/broken_tests.txt
@@ -33,6 +33,7 @@
 01232_extremes
 01244_optimize_distributed_group_by_sharding_key
 01247_optimize_distributed_group_by_sharding_key_dist_on_dist
+01268_mv_scalars
 01268_shard_avgweighted
 01270_optimize_skip_unused_shards_low_cardinality
 01319_optimize_skip_unused_shards_nesting

From 5fea9f9dc6fd33baf332affc458d2bbbd82f4d0f Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 10 Nov 2023 09:25:57 +0000
Subject: [PATCH 0296/1097] Small fixes

---
 src/Common/ZooKeeper/ZooKeeper.h              | 19 +++++++++
 .../ZooKeeper/ZooKeeperWithFaultInjection.h   |  5 +++
 src/Storages/StorageKeeperMap.cpp             |  5 ++-
 .../02911_backup_restore_keeper_map.sh        | 39 ++++++++++---------
 4 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index c41d1d8dbab..785842b94bd 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -135,6 +135,16 @@ struct MultiReadResponses
             responses);
     }
 
+    /// If Keeper/ZooKeeper doesn't support MultiRead feature we will dispatch
+    /// asynchronously all the read requests separately
+    /// Sometimes it's important to process all requests instantly
+    /// e.g. we want to trigger exceptions while we are in the ZK client retry loop
+    void waitForResponses()
+    {
+        if (auto * responses_with_futures = std::get_if<ResponsesWithFutures>(&responses))
+            responses_with_futures->waitForResponses();
+    }
+
 private:
     using RegularResponses = std::vector<Coordination::ResponsePtr>;
     using FutureResponses = std::vector<std::future<ResponseType>>;
@@ -158,6 +168,15 @@ private:
             return *cached_responses[index];
         }
 
+        void waitForResponses()
+        {
+            for (size_t i = 0; i < size(); ++i)
+            {
+                if (!cached_responses[i].has_value())
+                    cached_responses[i] = future_responses[i].get();
+            }
+        }
+
         size_t size() const { return future_responses.size(); }
     };
 
diff --git a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
index 4887e896e9b..be4642c2988 100644
--- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
+++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
@@ -242,6 +242,11 @@ public:
         return access("get", !paths.empty() ? paths.front() : "", [&]() { return keeper->get(paths); });
     }
 
+    zkutil::ZooKeeper::MultiTryGetResponse tryGet(const std::vector<std::string> & paths)
+    {
+        return access("tryGet", !paths.empty() ? paths.front() : "", [&]() { return keeper->tryGet(paths); });
+    }
+
     bool exists(const std::string & path, Coordination::Stat * stat = nullptr, const zkutil::EventPtr & watch = nullptr)
     {
         return access("exists", path, [&]() { return keeper->exists(path, stat, watch); });
diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp
index 15ebc4d92d1..3032973c411 100644
--- a/src/Storages/StorageKeeperMap.cpp
+++ b/src/Storages/StorageKeeperMap.cpp
@@ -703,13 +703,14 @@ private:
             for (const auto & key : keys)
                 keys_full_path.push_back(data_zookeeper_path / key);
 
-            zkutil::ZooKeeper::MultiGetResponse data;
+            zkutil::ZooKeeper::MultiTryGetResponse data;
             auto holder = with_retries->createRetriesControlHolder("getKeeperMapDataKeys");
             holder.retries_ctl.retryLoop(
             [&, &zk = holder.faulty_zookeeper]
             {
                 with_retries->renewZooKeeper(zk);
-                data = zk->get(keys_full_path);
+                data = zk->tryGet(keys_full_path);
+                data.waitForResponses();
             });
 
             for (size_t i = 0; i < keys.size(); ++i)
diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
index 6c463beb221..ae7c22f6820 100755
--- a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
+++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
@@ -4,44 +4,45 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
+database_name="$CLICKHOUSE_DATABASE"_02911_keeper_map
 $CLICKHOUSE_CLIENT -nm -q "
-    DROP DATABASE IF EXISTS 02911_keeper_map;
-    CREATE DATABASE 02911_keeper_map;
-    CREATE TABLE 02911_keeper_map.02911_backup_restore_keeper_map1 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
-    CREATE TABLE 02911_keeper_map.02911_backup_restore_keeper_map2 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
-    CREATE TABLE 02911_keeper_map.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key;
+    DROP DATABASE IF EXISTS $database_name;
+    CREATE DATABASE $database_name;
+    CREATE TABLE $database_name.02911_backup_restore_keeper_map1 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
+    CREATE TABLE $database_name.02911_backup_restore_keeper_map2 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
+    CREATE TABLE $database_name.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key;
 
-    INSERT INTO 02911_keeper_map.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;
-    INSERT INTO 02911_keeper_map.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;
+    INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;
+    INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;
 "
 
-backup_path="$CLICKHOUSE_DATABASE/02911_keeper_map"
+backup_path="$database_name"
 for i in $(seq 1 3); do
-    $CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map$i;"
+    $CLICKHOUSE_CLIENT -q "SELECT count() FROM $database_name.02911_backup_restore_keeper_map$i;"
 done
 
-$CLICKHOUSE_CLIENT -q "BACKUP DATABASE 02911_keeper_map TO Disk('backups', '$backup_path');" > /dev/null
+$CLICKHOUSE_CLIENT -q "BACKUP DATABASE $database_name TO Disk('backups', '$backup_path');" > /dev/null
 
-$CLICKHOUSE_CLIENT -q "DROP DATABASE 02911_keeper_map SYNC;"
+$CLICKHOUSE_CLIENT -q "DROP DATABASE $database_name SYNC;"
 
 for i in $(seq 1 3); do
-    $CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map$i;" 2>&1 | grep -Fq "UNKNOWN_DATABASE" && echo 'OK' || echo 'ERROR'
+    $CLICKHOUSE_CLIENT -q "SELECT count() FROM $database_name.02911_backup_restore_keeper_map$i;" 2>&1 | grep -Fq "UNKNOWN_DATABASE" && echo 'OK' || echo 'ERROR'
 done
 
-$CLICKHOUSE_CLIENT -q "RESTORE DATABASE 02911_keeper_map FROM Disk('backups', '$backup_path');" > /dev/null
+$CLICKHOUSE_CLIENT -q "RESTORE DATABASE $database_name FROM Disk('backups', '$backup_path');" > /dev/null
 
 for i in $(seq 1 3); do
-    $CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map$i;"
+    $CLICKHOUSE_CLIENT -q "SELECT count() FROM $database_name.02911_backup_restore_keeper_map$i;"
 done
 
-$CLICKHOUSE_CLIENT -q "DROP TABLE 02911_keeper_map.02911_backup_restore_keeper_map3 SYNC;"
+$CLICKHOUSE_CLIENT -q "DROP TABLE $database_name.02911_backup_restore_keeper_map3 SYNC;"
 
-$CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map3;" 2>&1 | grep -Fq "UNKNOWN_TABLE" && echo 'OK' || echo 'ERROR'
+$CLICKHOUSE_CLIENT -q "SELECT count() FROM $database_name.02911_backup_restore_keeper_map3;" 2>&1 | grep -Fq "UNKNOWN_TABLE" && echo 'OK' || echo 'ERROR'
 
-$CLICKHOUSE_CLIENT -q "RESTORE TABLE 02911_keeper_map.02911_backup_restore_keeper_map3 FROM Disk('backups', '$backup_path');" > /dev/null
+$CLICKHOUSE_CLIENT -q "RESTORE TABLE $database_name.02911_backup_restore_keeper_map3 FROM Disk('backups', '$backup_path');" > /dev/null
 
 for i in $(seq 1 3); do
-    $CLICKHOUSE_CLIENT -q "SELECT count() FROM 02911_keeper_map.02911_backup_restore_keeper_map$i;"
+    $CLICKHOUSE_CLIENT -q "SELECT count() FROM $database_name.02911_backup_restore_keeper_map$i;"
 done
 
-$CLICKHOUSE_CLIENT -q "DROP DATABASE 02911_keeper_map SYNC;"
\ No newline at end of file
+$CLICKHOUSE_CLIENT -q "DROP DATABASE $database_name SYNC;"
\ No newline at end of file

From b0addc66a837bb087cd9f507f4a84d32cef32908 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 10 Nov 2023 12:16:32 +0100
Subject: [PATCH 0297/1097] Update 02908_filesystem_cache_as_collection.sql

---
 .../0_stateless/02908_filesystem_cache_as_collection.sql      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
index e878727359b..08fb8d76e71 100644
--- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
@@ -1,8 +1,8 @@
 CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi';
 DROP TABLE IF EXISTS test;
 CREATE TABLE test (a Int32, b String)
-ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 's3_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql');
+ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql');
 DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME';
 CREATE TABLE test2 (a Int32, b String)
-ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 's3_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection');
+ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection');
 DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME_2';

From 687eaeb18644092809d9edf58c601513bd4b2264 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 10 Nov 2023 12:34:25 +0100
Subject: [PATCH 0298/1097] Update InterpreterExplainQuery.cpp

---
 src/Interpreters/InterpreterExplainQuery.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index 7ec17292055..8f4b6b13739 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -541,14 +541,14 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
             if (context->getSettingsRef().allow_experimental_analyzer)
             {
                 InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions());
-                plan = std::move(interpreter).extractQueryPlan();
                 context = interpreter.getContext();
+                plan = std::move(interpreter).extractQueryPlan();
             }
             else
             {
                 InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions());
-                interpreter.buildQueryPlan(plan);
                 context = interpreter.getContext();
+                interpreter.buildQueryPlan(plan);
             }
 
             // Collect the selected marks, rows, parts during build query pipeline.

From 214ac112a75703470c6dc607299f02d58a14f093 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 10 Nov 2023 12:48:03 +0100
Subject: [PATCH 0299/1097] Looks fixed

---
 src/Interpreters/Cache/FileCache.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index ed8cc547fbc..914292c003d 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -361,6 +361,7 @@ void FileCache::fillHolesWithEmptyFileSegments(
                 locked_key.getKey(), current_pos, hole_size, FileSegment::State::DETACHED, settings);
 
             file_segments.insert(it, file_segment);
+            ++added;
         }
         else
         {
@@ -395,14 +396,6 @@ void FileCache::fillHolesWithEmptyFileSegments(
         return;
     }
 
-    if (file_segments_limit && file_segments.size() >= file_segments_limit)
-    {
-        std::string res;
-        for (const auto & f : file_segments)
-            res += " - " + f->range().toString();
-        LOG_ERROR(log, "Limit: {}, file segments: {}, added: {}, range: {}, file_segments: {}",
-                  file_segments_limit, file_segments.size(), added, range.toString(), res);
-    }
     chassert(!file_segments_limit || file_segments.size() < file_segments_limit);
 
     if (current_pos <= range.right)

From 4223b82fb4a5fc6fc11b0ac279899dc91d437974 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 10 Nov 2023 11:50:16 +0000
Subject: [PATCH 0300/1097] Revert "Fix #56258"

This reverts commit 1dc565cd223480a8c7f9cdb2ec82808c7f60d9e8.
---
 src/Parsers/ASTFunction.cpp                   |  4 +-
 src/Parsers/ASTIdentifier.cpp                 |  7 +---
 src/Parsers/ASTIdentifier.h                   |  2 -
 src/Parsers/ASTLiteral.cpp                    | 12 +++---
 src/Parsers/ASTQueryParameter.cpp             |  5 ---
 src/Parsers/ASTQueryParameter.h               |  2 -
 src/Parsers/ASTSubquery.cpp                   |  2 +-
 src/Parsers/ASTSubquery.h                     |  2 +-
 src/Parsers/ASTWithAlias.cpp                  |  7 ----
 src/Parsers/ASTWithAlias.h                    |  4 +-
 .../02494_query_cache_bugs.reference          | 24 ------------
 .../0_stateless/02494_query_cache_bugs.sql    | 39 -------------------
 12 files changed, 14 insertions(+), 96 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02494_query_cache_bugs.reference
 delete mode 100644 tests/queries/0_stateless/02494_query_cache_bugs.sql

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 1ce85a437a2..267148ee62b 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -601,7 +601,9 @@ ASTPtr ASTFunction::clone() const
 
 void ASTFunction::updateTreeHashImpl(SipHash & hash_state) const
 {
-    ASTWithAlias::updateTreeHashImpl(hash_state);
+    hash_state.update(name.size());
+    hash_state.update(name);
+    IAST::updateTreeHashImpl(hash_state);
 }
 
 template <typename Container>
diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp
index a4dffaad9ab..042b4d9085d 100644
--- a/src/Parsers/ASTIdentifier.cpp
+++ b/src/Parsers/ASTIdentifier.cpp
@@ -87,11 +87,6 @@ void ASTIdentifier::setShortName(const String & new_name)
     semantic->table = table;
 }
 
-void ASTIdentifier::updateTreeHashImpl(SipHash & hash_state) const
-{
-    ASTWithAlias::updateTreeHashImpl(hash_state);
-}
-
 const String & ASTIdentifier::name() const
 {
     if (children.empty())
@@ -252,7 +247,7 @@ void ASTTableIdentifier::resetTable(const String & database_name, const String &
 void ASTTableIdentifier::updateTreeHashImpl(SipHash & hash_state) const
 {
     hash_state.update(uuid);
-    ASTIdentifier::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state);
 }
 
 String getIdentifierName(const IAST * ast)
diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h
index b116920da53..0e030c797ce 100644
--- a/src/Parsers/ASTIdentifier.h
+++ b/src/Parsers/ASTIdentifier.h
@@ -47,8 +47,6 @@ public:
     const String & shortName() const { return name_parts.back(); }
     const String & name() const;
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
-
     void restoreTable();  // TODO(ilezhankin): get rid of this
     std::shared_ptr<ASTTableIdentifier> createTable() const;  // returns |nullptr| if identifier is not table.
 
diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp
index 3a9dd742f85..425e5c73bee 100644
--- a/src/Parsers/ASTLiteral.cpp
+++ b/src/Parsers/ASTLiteral.cpp
@@ -10,6 +10,13 @@
 namespace DB
 {
 
+void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const
+{
+    const char * prefix = "Literal_";
+    hash_state.update(prefix, strlen(prefix));
+    applyVisitor(FieldVisitorHash(hash_state), value);
+}
+
 ASTPtr ASTLiteral::clone() const
 {
     auto res = std::make_shared<ASTLiteral>(*this);
@@ -17,11 +24,6 @@ ASTPtr ASTLiteral::clone() const
     return res;
 }
 
-void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const
-{
-    ASTWithAlias::updateTreeHashImpl(hash_state);
-}
-
 namespace
 {
 
diff --git a/src/Parsers/ASTQueryParameter.cpp b/src/Parsers/ASTQueryParameter.cpp
index ff67fe9198e..c10cced23ce 100644
--- a/src/Parsers/ASTQueryParameter.cpp
+++ b/src/Parsers/ASTQueryParameter.cpp
@@ -23,9 +23,4 @@ void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const
     writeString(name, ostr);
 }
 
-void ASTQueryParameter::updateTreeHashImpl(SipHash & hash_state) const
-{
-    ASTWithAlias::updateTreeHashImpl(hash_state);
-}
-
 }
diff --git a/src/Parsers/ASTQueryParameter.h b/src/Parsers/ASTQueryParameter.h
index 62e45dfd9de..858b23a0250 100644
--- a/src/Parsers/ASTQueryParameter.h
+++ b/src/Parsers/ASTQueryParameter.h
@@ -21,8 +21,6 @@ public:
 
     ASTPtr clone() const override { return std::make_shared<ASTQueryParameter>(*this); }
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
-
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
     void appendColumnNameImpl(WriteBuffer & ostr) const override;
diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp
index 4dc005e6cdd..92adad666ed 100644
--- a/src/Parsers/ASTSubquery.cpp
+++ b/src/Parsers/ASTSubquery.cpp
@@ -55,7 +55,7 @@ void ASTSubquery::updateTreeHashImpl(SipHash & hash_state) const
 {
     if (!cte_name.empty())
         hash_state.update(cte_name);
-    ASTWithAlias::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state);
 }
 
 String ASTSubquery::getAliasOrColumnName() const
diff --git a/src/Parsers/ASTSubquery.h b/src/Parsers/ASTSubquery.h
index 9c30a15b145..e4de766621a 100644
--- a/src/Parsers/ASTSubquery.h
+++ b/src/Parsers/ASTSubquery.h
@@ -14,7 +14,7 @@ class ASTSubquery : public ASTWithAlias
 public:
     // Stored the name when the subquery is defined in WITH clause. For example:
     // WITH (SELECT 1) AS a SELECT * FROM a AS b; cte_name will be `a`.
-    String cte_name;
+    std::string cte_name;
 
     /** Get the text that identifies this element. */
     String getID(char) const override { return "Subquery"; }
diff --git a/src/Parsers/ASTWithAlias.cpp b/src/Parsers/ASTWithAlias.cpp
index 2f77bb9080e..1b5397654fd 100644
--- a/src/Parsers/ASTWithAlias.cpp
+++ b/src/Parsers/ASTWithAlias.cpp
@@ -1,7 +1,6 @@
 #include <Parsers/ASTWithAlias.h>
 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
-#include <Common/SipHash.h>
 
 
 namespace DB
@@ -43,12 +42,6 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
     }
 }
 
-void ASTWithAlias::updateTreeHashImpl(SipHash & hash_state) const
-{
-    hash_state.update(alias);
-    IAST::updateTreeHashImpl(hash_state);
-}
-
 void ASTWithAlias::appendColumnName(WriteBuffer & ostr) const
 {
     if (prefer_alias_to_column_name && !alias.empty())
diff --git a/src/Parsers/ASTWithAlias.h b/src/Parsers/ASTWithAlias.h
index 7833c88183d..ea4419402b0 100644
--- a/src/Parsers/ASTWithAlias.h
+++ b/src/Parsers/ASTWithAlias.h
@@ -27,12 +27,10 @@ public:
     void setAlias(const String & to) override { alias = to; }
 
     /// Calls formatImplWithoutAlias, and also outputs an alias. If necessary, encloses the entire expression in brackets.
-    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const final;
+    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final;
 
     virtual void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0;
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
-
 protected:
     virtual void appendColumnNameImpl(WriteBuffer & ostr) const = 0;
 };
diff --git a/tests/queries/0_stateless/02494_query_cache_bugs.reference b/tests/queries/0_stateless/02494_query_cache_bugs.reference
deleted file mode 100644
index 448e1366ea7..00000000000
--- a/tests/queries/0_stateless/02494_query_cache_bugs.reference
+++ /dev/null
@@ -1,24 +0,0 @@
--- Bug 56258: Check literals (ASTLiteral)
-Row 1:
-──────
-10: 10
-Row 1:
-──────
-x: 10
-2
--- Bug 56258: Check functions (ASTFunction)
-Row 1:
-──────
-toUInt64(42): 42
-Row 1:
-──────
-x: 42
-2
--- Bug 56258: Check identifiers (ASTIdentifier)
-Row 1:
-──────
-c: 1
-Row 1:
-──────
-x: 1
-2
diff --git a/tests/queries/0_stateless/02494_query_cache_bugs.sql b/tests/queries/0_stateless/02494_query_cache_bugs.sql
deleted file mode 100644
index 74496e0f77a..00000000000
--- a/tests/queries/0_stateless/02494_query_cache_bugs.sql
+++ /dev/null
@@ -1,39 +0,0 @@
--- Tags: no-parallel
--- Tag no-parallel: Messes with internal cache
-
--- Test for Bug 56258
-
-SYSTEM DROP QUERY CACHE;
-
-SELECT '-- Bug 56258: Check literals (ASTLiteral)';
-
-SELECT 10 FORMAT Vertical SETTINGS use_query_cache = 1;
-SELECT 10 AS x FORMAT Vertical SETTINGS use_query_cache = 1;
-
-SELECT count(*) FROM system.query_cache;
-
-SYSTEM DROP QUERY CACHE;
-
-SELECT '-- Bug 56258: Check functions (ASTFunction)';
-
-SELECT toUInt64(42) FORMAT Vertical SETTINGS use_query_cache = 1;
-SELECT toUInt64(42) AS x FORMAT Vertical SETTINGS use_query_cache = 1;
-
-SELECT count(*) FROM system.query_cache;
-
-SYSTEM DROP QUERY CACHE;
-
-SELECT '-- Bug 56258: Check identifiers (ASTIdentifier)';
-
-DROP TABLE IF EXISTS tab;
-
-CREATE TABLE tab(c UInt64) ENGINE = Memory AS SELECT 1;
-
-SELECT c FROM tab FORMAT Vertical SETTINGS use_query_cache = 1;
-SELECT c AS x FROM tab FORMAT Vertical SETTINGS use_query_cache = 1;
-
-SELECT count(*) FROM system.query_cache;
-
-DROP TABLE tab;
-
-SYSTEM DROP QUERY CACHE;

From 2d46340803c3b5a5a4ba33e29430fc6a6547e4cb Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 10 Nov 2023 13:27:28 +0100
Subject: [PATCH 0301/1097] Update 02908_filesystem_cache_as_collection.sql

---
 .../0_stateless/02908_filesystem_cache_as_collection.sql        | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
index 08fb8d76e71..ed9041a0800 100644
--- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
@@ -1,3 +1,5 @@
+-- Tags: no-fasttest
+
 CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi';
 DROP TABLE IF EXISTS test;
 CREATE TABLE test (a Int32, b String)

From a7fb6a30f8ac8751ce63972e22d6e829d3ecdb5d Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 10 Nov 2023 13:29:01 +0100
Subject: [PATCH 0302/1097] Better

---
 src/Interpreters/Cache/FileCache.cpp   | 77 +++++++++++++-------------
 src/Interpreters/Cache/FileCache.h     | 16 +++++-
 src/Interpreters/Cache/FileSegment.cpp |  1 -
 3 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 914292c003d..e998d2a3639 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -156,7 +156,7 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
     {
         auto file_segment = std::make_shared<FileSegment>(
             locked_key.getKey(), range.left, range.size(), FileSegment::State::DETACHED);
-        return {file_segment};
+        return { file_segment };
     }
 
     if (locked_key.empty())
@@ -296,7 +296,6 @@ FileSegments FileCache::splitRangeIntoFileSegments(
         current_pos += current_file_segment_size;
     }
 
-    assert(file_segments.empty() || file_segments_limit > 0 || offset + size - 1 == file_segments.back()->range().right);
     return file_segments;
 }
 
@@ -321,7 +320,7 @@ void FileCache::fillHolesWithEmptyFileSegments(
     assert(!file_segments.empty());
 
     auto it = file_segments.begin();
-    size_t added = 0;
+    size_t processed_count = 0;
     auto segment_range = (*it)->range();
 
     size_t current_pos;
@@ -334,12 +333,17 @@ void FileCache::fillHolesWithEmptyFileSegments(
 
         current_pos = segment_range.right + 1;
         ++it;
-        ++added;
+        ++processed_count;
     }
     else
         current_pos = range.left;
 
-    while (current_pos <= range.right && it != file_segments.end() && (!file_segments_limit || added < file_segments_limit))
+    auto is_limit_reached = [&]() -> bool
+    {
+        return file_segments_limit && processed_count >= file_segments_limit;
+    };
+
+    while (current_pos <= range.right && it != file_segments.end() && !is_limit_reached())
     {
         segment_range = (*it)->range();
 
@@ -347,7 +351,7 @@ void FileCache::fillHolesWithEmptyFileSegments(
         {
             current_pos = segment_range.right + 1;
             ++it;
-            ++added;
+            ++processed_count;
             continue;
         }
 
@@ -361,7 +365,7 @@ void FileCache::fillHolesWithEmptyFileSegments(
                 locked_key.getKey(), current_pos, hole_size, FileSegment::State::DETACHED, settings);
 
             file_segments.insert(it, file_segment);
-            ++added;
+            ++processed_count;
         }
         else
         {
@@ -371,28 +375,32 @@ void FileCache::fillHolesWithEmptyFileSegments(
             {
                 auto metadata_it = addFileSegment(locked_key, r.left, r.size(), FileSegment::State::EMPTY, settings, nullptr);
                 hole.push_back(metadata_it->second->file_segment);
-                ++added;
+                ++processed_count;
 
-                if (file_segments_limit && added == file_segments_limit)
-                {
-                    file_segments.splice(it, std::move(hole));
-                    file_segments.erase(it, file_segments.end());
-                    return;
-                }
+                if (is_limit_reached())
+                    break;
             }
             file_segments.splice(it, std::move(hole));
         }
 
+        if (is_limit_reached())
+            break;
+
         current_pos = segment_range.right + 1;
         ++it;
-        ++added;
+        ++processed_count;
     }
 
-    if (file_segments_limit && added == file_segments_limit)
+    auto erase_unprocessed = [&]()
     {
         chassert(file_segments.size() >= file_segments_limit);
         file_segments.erase(it, file_segments.end());
         chassert(file_segments.size() == file_segments_limit);
+    };
+
+    if (is_limit_reached())
+    {
+        erase_unprocessed();
         return;
     }
 
@@ -422,16 +430,15 @@ void FileCache::fillHolesWithEmptyFileSegments(
             {
                 auto metadata_it = addFileSegment(locked_key, r.left, r.size(), FileSegment::State::EMPTY, settings, nullptr);
                 hole.push_back(metadata_it->second->file_segment);
-                ++added;
+                ++processed_count;
 
-                if (file_segments_limit && added == file_segments_limit)
-                {
-                    file_segments.splice(it, std::move(hole));
-                    file_segments.erase(it, file_segments.end());
-                    return;
-                }
+                if (is_limit_reached())
+                    break;
             }
             file_segments.splice(it, std::move(hole));
+
+            if (is_limit_reached())
+                erase_unprocessed();
         }
     }
 }
@@ -589,23 +596,16 @@ FileCache::getOrSet(
         fillHolesWithEmptyFileSegments(
             *locked_key, file_segments, range, file_segments_limit, /* fill_with_detached */false, settings);
 
-        chassert(!file_segments_limit || file_segments.size() <= file_segments_limit);
-
         if (!file_segments.front()->range().contains(offset))
         {
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} to include {} "
                             "(end offset: {}, aligned offset: {}, aligned end offset: {})",
                             file_segments.front()->range().toString(), offset, range.right, aligned_offset, aligned_end_offset);
         }
-
-        chassert(file_segments_limit ? file_segments.back()->range().left <= range.right : file_segments.back()->range().contains(range.right));
     }
 
-    while (file_segments_limit && file_segments.size() > file_segments_limit)
-        file_segments.pop_back();
-
-    if (file_segments.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty list of file segments for offset {}, size {} (file size: {})", offset, size, file_size);
+    chassert(file_segments_limit ? file_segments.back()->range().left <= range.right : file_segments.back()->range().contains(range.right));
+    chassert(!file_segments_limit || file_segments.size() <= file_segments_limit);
 
     return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
 }
@@ -625,14 +625,17 @@ FileSegmentsHolderPtr FileCache::get(const Key & key, size_t offset, size_t size
         auto file_segments = getImpl(*locked_key, range, file_segments_limit);
         if (!file_segments.empty())
         {
+            if (file_segments_limit)
+            {
+                chassert(file_segments.size() <= file_segments_limit);
+                if (file_segments.size() == file_segments_limit)
+                    range.right = file_segments.back()->range().right;
+            }
+
             fillHolesWithEmptyFileSegments(
                 *locked_key, file_segments, range, file_segments_limit, /* fill_with_detached */true, CreateFileSegmentSettings{});
 
-            if (file_segments_limit)
-            {
-                while (file_segments.size() > file_segments_limit)
-                    file_segments.pop_back();
-            }
+            chassert(!file_segments_limit || file_segments.size() <= file_segments_limit);
             return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
         }
     }
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 523ff90e33e..7485c7d2b90 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -209,18 +209,28 @@ private:
     std::unique_ptr<ThreadFromGlobalPool> cleanup_thread;
 
     void assertInitialized() const;
-
     void assertCacheCorrectness();
 
     void loadMetadata();
     void loadMetadataImpl();
     void loadMetadataForKeys(const std::filesystem::path & keys_dir);
 
-    /// bool - if `file_segments_limit` reached or not.
-    FileSegments getImpl(const LockedKey & locked_key, const FileSegment::Range & range, size_t file_segments_limit) const;
+    /// Get all file segments from cache which intersect with `range`.
+    /// If `file_segments_limit` > 0, return no more than first file_segments_limit
+    /// file segments.
+    FileSegments getImpl(
+        const LockedKey & locked_key,
+        const FileSegment::Range & range,
+        size_t file_segments_limit) const;
 
+    /// Split range into subranges by max_file_segment_size,
+    /// each subrange size must be less or equal to max_file_segment_size.
     std::vector<FileSegment::Range> splitRange(size_t offset, size_t size);
 
+    /// Split range into subranges by max_file_segment_size (same as in splitRange())
+    /// and create a new file segment for each subrange.
+    /// If `file_segments_limit` > 0, create no more than first file_segments_limit
+    /// file segments.
     FileSegments splitRangeIntoFileSegments(
         LockedKey & locked_key,
         size_t offset,
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 59134229997..794dd663bee 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -933,7 +933,6 @@ FileSegmentsHolder::FileSegmentsHolder(FileSegments && file_segments_)
 
 FileSegmentsHolder::~FileSegmentsHolder()
 {
-    ProfileEvents::increment(ProfileEvents::FilesystemCacheUnusedHoldFileSegments, file_segments.size());
     ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FileSegmentHolderCompleteMicroseconds);
 
     ProfileEvents::increment(ProfileEvents::FilesystemCacheUnusedHoldFileSegments, file_segments.size());

From b898e90bc157b7ce69b3ff2da7e887b68f039d26 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 10 Nov 2023 14:31:00 +0100
Subject: [PATCH 0303/1097] Better

---
 src/Interpreters/Cache/FileCacheSettings.cpp | 95 +++++++++-----------
 src/Interpreters/Cache/FileCacheSettings.h   |  6 ++
 2 files changed, 48 insertions(+), 53 deletions(-)

diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp
index e81bd9ddc35..2ac54006a8e 100644
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@@ -13,78 +13,67 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
+void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetString get_string)
 {
-    if (!config.has(config_prefix + ".path"))
+    auto config_parse_size = [&](std::string_view key) { return parseWithSizeSuffix<uint64_t>(get_string(key)); };
+
+    if (!has("path"))
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected cache path (`path`) in configuration");
 
-    base_path = config.getString(config_prefix + ".path");
+    base_path = get_string("path");
 
-    if (!config.has(config_prefix + ".max_size"))
+    if (!has("max_size"))
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected cache size (`max_size`) in configuration");
 
-    max_size = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".max_size"));
+    max_size = config_parse_size("max_size");
     if (max_size == 0)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected non-zero size for cache configuration");
 
-    max_elements = config.getUInt64(config_prefix + ".max_elements", FILECACHE_DEFAULT_MAX_ELEMENTS);
+    if (has("max_elements"))
+        max_elements = get_uint("max_elements");
 
-    if (config.has(config_prefix + ".max_file_segment_size"))
-        max_file_segment_size = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".max_file_segment_size"));
+    if (has("max_file_segment_size"))
+        max_file_segment_size = config_parse_size("max_file_segment_size");
 
-    cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false);
-    enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false);
-    cache_hits_threshold = config.getUInt64(config_prefix + ".cache_hits_threshold", FILECACHE_DEFAULT_HITS_THRESHOLD);
-    enable_bypass_cache_with_threshold = config.getUInt64(config_prefix + ".enable_bypass_cache_with_threshold", false);
+    if (has("cache_on_write_operations"))
+        cache_on_write_operations = get_uint("cache_on_write_operations");
 
-    if (config.has(config_prefix + ".bypass_cache_threshold"))
-        bypass_cache_threshold = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".bypass_cache_threshold"));
+    if (has("enable_filesystem_query_cache_limit"))
+        enable_filesystem_query_cache_limit = get_uint("enable_filesystem_query_cache_limit");
 
-    if (config.has(config_prefix + ".boundary_alignment"))
-        boundary_alignment = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".boundary_alignment"));
+    if (has("cache_hits_threshold"))
+        cache_hits_threshold = get_uint("cache_hits_threshold");
 
-    if (config.has(config_prefix + ".background_download_threads"))
-        background_download_threads = config.getUInt(config_prefix + ".background_download_threads");
+    if (has("enable_bypass_cache_with_threshold"))
+        enable_bypass_cache_with_threshold = get_uint("enable_bypass_cache_with_threshold");
 
-    if (config.has(config_prefix + ".load_metadata_threads"))
-        load_metadata_threads = config.getUInt(config_prefix + ".load_metadata_threads");
+    if (has("bypass_cache_threshold"))
+        bypass_cache_threshold = config_parse_size("bypass_cache_threshold");
+
+    if (has("boundary_alignment"))
+        boundary_alignment = config_parse_size("boundary_alignment");
+
+    if (has("background_download_threads"))
+        background_download_threads = get_uint("background_download_threads");
+
+    if (has("load_metadata_threads"))
+        load_metadata_threads = get_uint("load_metadata_threads");
+}
+
+void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
+{
+    auto config_has = [&](std::string_view key) { return config.has(fmt::format("{}.{}", config_prefix, key)); };
+    auto config_get_uint = [&](std::string_view key) { return config.getUInt(fmt::format("{}.{}", config_prefix, key)); };
+    auto config_get_string = [&](std::string_view key) { return config.getString(fmt::format("{}.{}", config_prefix, key)); };
+    loadImpl(std::move(config_has), std::move(config_get_uint), std::move(config_get_string));
 }
 
 void FileCacheSettings::loadFromCollection(const NamedCollection & collection)
 {
-    if (!collection.has("path"))
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected cache path (`path`) in configuration");
-
-    base_path = collection.get<String>("path");
-
-    if (!collection.has("max_size"))
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected cache size (`max_size`) in configuration");
-
-    max_size = parseWithSizeSuffix<uint64_t>(collection.get<String>("max_size"));
-    if (max_size == 0)
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected non-zero size for cache configuration");
-
-    max_elements = collection.getOrDefault<UInt64>("max_elements", FILECACHE_DEFAULT_MAX_ELEMENTS);
-
-    if (collection.has("max_file_segment_size"))
-        max_file_segment_size = parseWithSizeSuffix<uint64_t>(collection.get<String>("max_file_segment_size"));
-
-    cache_on_write_operations = collection.getOrDefault<UInt64>("cache_on_write_operations", false);
-    enable_filesystem_query_cache_limit = collection.getOrDefault<UInt64>("enable_filesystem_query_cache_limit", false);
-    cache_hits_threshold = collection.getOrDefault<UInt64>("cache_hits_threshold", FILECACHE_DEFAULT_HITS_THRESHOLD);
-    enable_bypass_cache_with_threshold = collection.getOrDefault<UInt64>("enable_bypass_cache_with_threshold", false);
-
-    if (collection.has("bypass_cache_threshold"))
-        bypass_cache_threshold = parseWithSizeSuffix<uint64_t>(collection.get<String>("bypass_cache_threshold"));
-
-    if (collection.has("boundary_alignment"))
-        boundary_alignment = parseWithSizeSuffix<uint64_t>(collection.get<String>("boundary_alignment"));
-
-    if (collection.has("background_download_threads"))
-        background_download_threads = collection.get<UInt64>("background_download_threads");
-
-    if (collection.has("load_metadata_threads"))
-        load_metadata_threads = collection.get<UInt64>("load_metadata_threads");
+    auto config_has = [&](std::string_view key) { return collection.has(std::string(key)); };
+    auto config_get_uint = [&](std::string_view key) { return collection.get<UInt64>(std::string(key)); };
+    auto config_get_string = [&](std::string_view key) { return collection.get<String>(std::string(key)); };
+    loadImpl(std::move(config_has), std::move(config_get_uint), std::move(config_get_string));
 }
 
 }
diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h
index 9d15a32d3bf..bf0dd492bc9 100644
--- a/src/Interpreters/Cache/FileCacheSettings.h
+++ b/src/Interpreters/Cache/FileCacheSettings.h
@@ -33,6 +33,12 @@ struct FileCacheSettings
 
     void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
     void loadFromCollection(const NamedCollection & collection);
+
+private:
+    using FuncHas = std::function<bool(std::string_view)>;
+    using FuncGetUInt = std::function<size_t(std::string_view)>;
+    using FuncGetString = std::function<std::string(std::string_view)>;
+    void loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetString get_string);
 };
 
 }

From 8ad124ef64d55472ead638b533040381b4fb5c8d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 10 Nov 2023 12:15:23 +0000
Subject: [PATCH 0304/1097]  Fix #56258, attempt II

---
 src/Interpreters/Cache/QueryCache.cpp         |  7 +++-
 src/Parsers/ASTColumnsMatcher.cpp             |  8 ++--
 src/Parsers/ASTColumnsMatcher.h               |  4 +-
 src/Parsers/ASTColumnsTransformers.cpp        | 22 +++++------
 src/Parsers/ASTColumnsTransformers.h          |  8 ++--
 src/Parsers/ASTFunction.cpp                   |  4 +-
 src/Parsers/ASTFunction.h                     |  2 +-
 .../ASTFunctionWithKeyValueArguments.cpp      |  8 ++--
 .../ASTFunctionWithKeyValueArguments.h        |  4 +-
 src/Parsers/ASTIdentifier.cpp                 |  9 ++++-
 src/Parsers/ASTIdentifier.h                   |  4 +-
 src/Parsers/ASTInsertQuery.cpp                |  4 +-
 src/Parsers/ASTInsertQuery.h                  |  2 +-
 src/Parsers/ASTLiteral.cpp                    |  6 +--
 src/Parsers/ASTLiteral.h                      |  2 +-
 src/Parsers/ASTOrderByElement.cpp             |  4 +-
 src/Parsers/ASTOrderByElement.h               |  2 +-
 src/Parsers/ASTQueryParameter.cpp             |  5 +++
 src/Parsers/ASTQueryParameter.h               |  2 +
 src/Parsers/ASTSelectQuery.cpp                |  4 +-
 src/Parsers/ASTSelectQuery.h                  |  2 +-
 src/Parsers/ASTSetQuery.cpp                   |  2 +-
 src/Parsers/ASTSetQuery.h                     |  2 +-
 src/Parsers/ASTSubquery.cpp                   |  4 +-
 src/Parsers/ASTSubquery.h                     |  4 +-
 src/Parsers/ASTTablesInSelectQuery.cpp        | 12 +++---
 src/Parsers/ASTTablesInSelectQuery.h          |  6 +--
 src/Parsers/ASTTransactionControl.cpp         |  2 +-
 src/Parsers/ASTTransactionControl.h           |  4 +-
 src/Parsers/ASTWithAlias.cpp                  |  8 ++++
 src/Parsers/ASTWithAlias.h                    |  4 +-
 src/Parsers/IAST.cpp                          | 12 +++---
 src/Parsers/IAST.h                            |  8 ++--
 .../02494_query_cache_bugs.reference          | 24 ++++++++++++
 .../0_stateless/02494_query_cache_bugs.sql    | 39 +++++++++++++++++++
 35 files changed, 167 insertions(+), 77 deletions(-)
 create mode 100644 tests/queries/0_stateless/02494_query_cache_bugs.reference
 create mode 100644 tests/queries/0_stateless/02494_query_cache_bugs.sql

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 603f7e2b254..2a67d7b93dd 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -147,14 +147,17 @@ QueryCache::Key::Key(ASTPtr ast_, const String & user_name_)
 {
 }
 
+/// Hashing of ASTs must consider aliases (issue #56258)
+const bool dont_ignore_aliases = false;
+
 bool QueryCache::Key::operator==(const Key & other) const
 {
-    return ast->getTreeHash() == other.ast->getTreeHash();
+    return ast->getTreeHash(dont_ignore_aliases) == other.ast->getTreeHash(dont_ignore_aliases);
 }
 
 size_t QueryCache::KeyHasher::operator()(const Key & key) const
 {
-    IAST::Hash hash = key.ast->getTreeHash();
+    IAST::Hash hash = key.ast->getTreeHash(dont_ignore_aliases);
     return hash.low64;
 }
 
diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp
index aff7d9fa833..30b172ecbb8 100644
--- a/src/Parsers/ASTColumnsMatcher.cpp
+++ b/src/Parsers/ASTColumnsMatcher.cpp
@@ -46,11 +46,11 @@ void ASTColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
     writeChar(')', ostr);
 }
 
-void ASTColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state) const
+void ASTColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(original_pattern.size());
     hash_state.update(original_pattern);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 void ASTColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
@@ -201,11 +201,11 @@ const std::shared_ptr<re2::RE2> & ASTQualifiedColumnsRegexpMatcher::getMatcher()
     return column_matcher;
 }
 
-void ASTQualifiedColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state) const
+void ASTQualifiedColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(original_pattern.size());
     hash_state.update(original_pattern);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
diff --git a/src/Parsers/ASTColumnsMatcher.h b/src/Parsers/ASTColumnsMatcher.h
index f31a8bd9a22..6fc5581a4eb 100644
--- a/src/Parsers/ASTColumnsMatcher.h
+++ b/src/Parsers/ASTColumnsMatcher.h
@@ -27,7 +27,7 @@ public:
     const String & getPattern() const;
     const std::shared_ptr<re2::RE2> & getMatcher() const;
     bool isColumnMatching(const String & column_name) const;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
     ASTPtr expression;
     ASTPtr transformers;
@@ -65,7 +65,7 @@ public:
     const std::shared_ptr<re2::RE2> & getMatcher() const;
     void setPattern(String pattern, bool set_matcher = true);
     void setMatcher(std::shared_ptr<re2::RE2> matcher);
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
     ASTPtr qualifier;
     ASTPtr transformers;
diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp
index 27d56dec283..6976683678e 100644
--- a/src/Parsers/ASTColumnsTransformers.cpp
+++ b/src/Parsers/ASTColumnsTransformers.cpp
@@ -151,15 +151,15 @@ void ASTColumnsApplyTransformer::appendColumnName(WriteBuffer & ostr) const
     }
 }
 
-void ASTColumnsApplyTransformer::updateTreeHashImpl(SipHash & hash_state) const
+void ASTColumnsApplyTransformer::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(func_name.size());
     hash_state.update(func_name);
     if (parameters)
-        parameters->updateTreeHashImpl(hash_state);
+        parameters->updateTreeHashImpl(hash_state, ignore_aliases);
 
     if (lambda)
-        lambda->updateTreeHashImpl(hash_state);
+        lambda->updateTreeHashImpl(hash_state, ignore_aliases);
 
     hash_state.update(lambda_arg.size());
     hash_state.update(lambda_arg);
@@ -167,7 +167,7 @@ void ASTColumnsApplyTransformer::updateTreeHashImpl(SipHash & hash_state) const
     hash_state.update(column_name_prefix.size());
     hash_state.update(column_name_prefix);
 
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
@@ -216,13 +216,13 @@ void ASTColumnsExceptTransformer::appendColumnName(WriteBuffer & ostr) const
         writeChar(')', ostr);
 }
 
-void ASTColumnsExceptTransformer::updateTreeHashImpl(SipHash & hash_state) const
+void ASTColumnsExceptTransformer::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(is_strict);
     hash_state.update(original_pattern.size());
     hash_state.update(original_pattern);
 
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
@@ -312,14 +312,14 @@ void ASTColumnsReplaceTransformer::Replacement::appendColumnName(WriteBuffer & o
     writeProbablyBackQuotedString(name, ostr);
 }
 
-void ASTColumnsReplaceTransformer::Replacement::updateTreeHashImpl(SipHash & hash_state) const
+void ASTColumnsReplaceTransformer::Replacement::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     assert(children.size() == 1);
 
     hash_state.update(name.size());
     hash_state.update(name);
-    children[0]->updateTreeHashImpl(hash_state);
-    IAST::updateTreeHashImpl(hash_state);
+    children[0]->updateTreeHashImpl(hash_state, ignore_aliases);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
@@ -361,10 +361,10 @@ void ASTColumnsReplaceTransformer::appendColumnName(WriteBuffer & ostr) const
         writeChar(')', ostr);
 }
 
-void ASTColumnsReplaceTransformer::updateTreeHashImpl(SipHash & hash_state) const
+void ASTColumnsReplaceTransformer::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(is_strict);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 void ASTColumnsReplaceTransformer::replaceChildren(ASTPtr & node, const ASTPtr & replacement, const String & name)
diff --git a/src/Parsers/ASTColumnsTransformers.h b/src/Parsers/ASTColumnsTransformers.h
index e42949ebfd8..a2a138e13c9 100644
--- a/src/Parsers/ASTColumnsTransformers.h
+++ b/src/Parsers/ASTColumnsTransformers.h
@@ -48,7 +48,7 @@ public:
     }
     void transform(ASTs & nodes) const override;
     void appendColumnName(WriteBuffer & ostr) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
     // Case 1  APPLY (quantile(0.9))
     String func_name;
@@ -80,7 +80,7 @@ public:
     const std::shared_ptr<re2::RE2> & getMatcher() const;
     bool isColumnMatching(const String & column_name) const;
     void appendColumnName(WriteBuffer & ostr) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
@@ -103,7 +103,7 @@ public:
         }
 
         void appendColumnName(WriteBuffer & ostr) const override;
-        void updateTreeHashImpl(SipHash & hash_state) const override;
+        void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
         String name;
 
@@ -121,7 +121,7 @@ public:
     }
     void transform(ASTs & nodes) const override;
     void appendColumnName(WriteBuffer & ostr) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 267148ee62b..80d9f2fb4a5 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -599,11 +599,11 @@ ASTPtr ASTFunction::clone() const
 }
 
 
-void ASTFunction::updateTreeHashImpl(SipHash & hash_state) const
+void ASTFunction::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(name.size());
     hash_state.update(name);
-    IAST::updateTreeHashImpl(hash_state);
+    ASTWithAlias::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 template <typename Container>
diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h
index 4a036c5e94a..fe30b7c6e95 100644
--- a/src/Parsers/ASTFunction.h
+++ b/src/Parsers/ASTFunction.h
@@ -63,7 +63,7 @@ public:
 
     ASTPtr clone() const override;
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
     ASTSelectWithUnionQuery * tryGetQueryArgument() const;
 
diff --git a/src/Parsers/ASTFunctionWithKeyValueArguments.cpp b/src/Parsers/ASTFunctionWithKeyValueArguments.cpp
index 2c28e342610..a5467bef363 100644
--- a/src/Parsers/ASTFunctionWithKeyValueArguments.cpp
+++ b/src/Parsers/ASTFunctionWithKeyValueArguments.cpp
@@ -53,12 +53,12 @@ bool ASTPair::hasSecretParts() const
 }
 
 
-void ASTPair::updateTreeHashImpl(SipHash & hash_state) const
+void ASTPair::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(first.size());
     hash_state.update(first);
     hash_state.update(second_with_brackets);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 
@@ -92,12 +92,12 @@ void ASTFunctionWithKeyValueArguments::formatImpl(const FormatSettings & setting
 }
 
 
-void ASTFunctionWithKeyValueArguments::updateTreeHashImpl(SipHash & hash_state) const
+void ASTFunctionWithKeyValueArguments::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(name.size());
     hash_state.update(name);
     hash_state.update(has_brackets);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 }
diff --git a/src/Parsers/ASTFunctionWithKeyValueArguments.h b/src/Parsers/ASTFunctionWithKeyValueArguments.h
index 75a8ae0415e..ec2a793154f 100644
--- a/src/Parsers/ASTFunctionWithKeyValueArguments.h
+++ b/src/Parsers/ASTFunctionWithKeyValueArguments.h
@@ -32,7 +32,7 @@ public:
 
     bool hasSecretParts() const override;
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
     void forEachPointerToChild(std::function<void(void**)> f) override
     {
@@ -66,7 +66,7 @@ public:
 
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 };
 
 }
diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp
index 042b4d9085d..80a618170c6 100644
--- a/src/Parsers/ASTIdentifier.cpp
+++ b/src/Parsers/ASTIdentifier.cpp
@@ -87,6 +87,11 @@ void ASTIdentifier::setShortName(const String & new_name)
     semantic->table = table;
 }
 
+void ASTIdentifier::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
+{
+    ASTWithAlias::updateTreeHashImpl(hash_state, ignore_aliases);
+}
+
 const String & ASTIdentifier::name() const
 {
     if (children.empty())
@@ -244,10 +249,10 @@ void ASTTableIdentifier::resetTable(const String & database_name, const String &
     uuid = identifier->uuid;
 }
 
-void ASTTableIdentifier::updateTreeHashImpl(SipHash & hash_state) const
+void ASTTableIdentifier::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(uuid);
-    IAST::updateTreeHashImpl(hash_state);
+    ASTIdentifier::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 String getIdentifierName(const IAST * ast)
diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h
index 0e030c797ce..d986b9170f3 100644
--- a/src/Parsers/ASTIdentifier.h
+++ b/src/Parsers/ASTIdentifier.h
@@ -47,6 +47,8 @@ public:
     const String & shortName() const { return name_parts.back(); }
     const String & name() const;
 
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_alias) const override;
+
     void restoreTable();  // TODO(ilezhankin): get rid of this
     std::shared_ptr<ASTTableIdentifier> createTable() const;  // returns |nullptr| if identifier is not table.
 
@@ -91,7 +93,7 @@ public:
     // FIXME: used only when it's needed to rewrite distributed table name to real remote table name.
     void resetTable(const String & database_name, const String & table_name);  // TODO(ilezhankin): get rid of this
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 };
 
 }
diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp
index ecb2d4e331b..88e087dd4ee 100644
--- a/src/Parsers/ASTInsertQuery.cpp
+++ b/src/Parsers/ASTInsertQuery.cpp
@@ -138,13 +138,13 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s
     }
 }
 
-void ASTInsertQuery::updateTreeHashImpl(SipHash & hash_state) const
+void ASTInsertQuery::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(table_id.database_name);
     hash_state.update(table_id.table_name);
     hash_state.update(table_id.uuid);
     hash_state.update(format);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 
diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h
index 45fd3d97950..6a4ce078f79 100644
--- a/src/Parsers/ASTInsertQuery.h
+++ b/src/Parsers/ASTInsertQuery.h
@@ -72,7 +72,7 @@ public:
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 };
 
 }
diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp
index 425e5c73bee..d165914a71f 100644
--- a/src/Parsers/ASTLiteral.cpp
+++ b/src/Parsers/ASTLiteral.cpp
@@ -10,11 +10,9 @@
 namespace DB
 {
 
-void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const
+void ASTLiteral::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
-    const char * prefix = "Literal_";
-    hash_state.update(prefix, strlen(prefix));
-    applyVisitor(FieldVisitorHash(hash_state), value);
+    ASTWithAlias::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 ASTPtr ASTLiteral::clone() const
diff --git a/src/Parsers/ASTLiteral.h b/src/Parsers/ASTLiteral.h
index e57bcfcd9d5..0c55aceb068 100644
--- a/src/Parsers/ASTLiteral.h
+++ b/src/Parsers/ASTLiteral.h
@@ -41,7 +41,7 @@ public:
 
     ASTPtr clone() const override;
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp
index 884d69a18e3..318849812aa 100644
--- a/src/Parsers/ASTOrderByElement.cpp
+++ b/src/Parsers/ASTOrderByElement.cpp
@@ -7,13 +7,13 @@
 namespace DB
 {
 
-void ASTOrderByElement::updateTreeHashImpl(SipHash & hash_state) const
+void ASTOrderByElement::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(direction);
     hash_state.update(nulls_direction);
     hash_state.update(nulls_direction_was_explicitly_specified);
     hash_state.update(with_fill);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 void ASTOrderByElement::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
diff --git a/src/Parsers/ASTOrderByElement.h b/src/Parsers/ASTOrderByElement.h
index 468d2161dff..4cebc30be31 100644
--- a/src/Parsers/ASTOrderByElement.h
+++ b/src/Parsers/ASTOrderByElement.h
@@ -32,7 +32,7 @@ public:
         return clone;
     }
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
diff --git a/src/Parsers/ASTQueryParameter.cpp b/src/Parsers/ASTQueryParameter.cpp
index c10cced23ce..9e98252e779 100644
--- a/src/Parsers/ASTQueryParameter.cpp
+++ b/src/Parsers/ASTQueryParameter.cpp
@@ -23,4 +23,9 @@ void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const
     writeString(name, ostr);
 }
 
+void ASTQueryParameter::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
+{
+    ASTWithAlias::updateTreeHashImpl(hash_state, ignore_aliases);
+}
+
 }
diff --git a/src/Parsers/ASTQueryParameter.h b/src/Parsers/ASTQueryParameter.h
index 858b23a0250..dd7f9bff863 100644
--- a/src/Parsers/ASTQueryParameter.h
+++ b/src/Parsers/ASTQueryParameter.h
@@ -21,6 +21,8 @@ public:
 
     ASTPtr clone() const override { return std::make_shared<ASTQueryParameter>(*this); }
 
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
+
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
     void appendColumnNameImpl(WriteBuffer & ostr) const override;
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 2d82708c70d..7c96db006c4 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -42,14 +42,14 @@ ASTPtr ASTSelectQuery::clone() const
 }
 
 
-void ASTSelectQuery::updateTreeHashImpl(SipHash & hash_state) const
+void ASTSelectQuery::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(distinct);
     hash_state.update(group_by_with_totals);
     hash_state.update(group_by_with_rollup);
     hash_state.update(group_by_with_cube);
     hash_state.update(limit_with_ties);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 101dbe9d02c..57f45a8aacd 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -137,7 +137,7 @@ public:
     void replaceDatabaseAndTable(const String & database_name, const String & table_name);
     void replaceDatabaseAndTable(const StorageID & table_id);
     void addTableFunction(ASTPtr & table_function_ptr);
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
     void setFinal();
 
diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp
index 1b7b76fe231..e2c60e8369d 100644
--- a/src/Parsers/ASTSetQuery.cpp
+++ b/src/Parsers/ASTSetQuery.cpp
@@ -9,7 +9,7 @@
 namespace DB
 {
 
-void ASTSetQuery::updateTreeHashImpl(SipHash & hash_state) const
+void ASTSetQuery::updateTreeHashImpl(SipHash & hash_state, bool /*ignore_aliases*/) const
 {
     for (const auto & change : changes)
     {
diff --git a/src/Parsers/ASTSetQuery.h b/src/Parsers/ASTSetQuery.h
index beed052c79a..944f08dcbaa 100644
--- a/src/Parsers/ASTSetQuery.h
+++ b/src/Parsers/ASTSetQuery.h
@@ -34,7 +34,7 @@ public:
 
     void formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override;
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
     QueryKind getQueryKind() const override { return QueryKind::Set; }
 
diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp
index 92adad666ed..75dfccd6e13 100644
--- a/src/Parsers/ASTSubquery.cpp
+++ b/src/Parsers/ASTSubquery.cpp
@@ -51,11 +51,11 @@ void ASTSubquery::formatImplWithoutAlias(const FormatSettings & settings, Format
     settings.ostr << nl_or_nothing << indent_str << ")";
 }
 
-void ASTSubquery::updateTreeHashImpl(SipHash & hash_state) const
+void ASTSubquery::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     if (!cte_name.empty())
         hash_state.update(cte_name);
-    IAST::updateTreeHashImpl(hash_state);
+    ASTWithAlias::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 String ASTSubquery::getAliasOrColumnName() const
diff --git a/src/Parsers/ASTSubquery.h b/src/Parsers/ASTSubquery.h
index e4de766621a..ef277a63126 100644
--- a/src/Parsers/ASTSubquery.h
+++ b/src/Parsers/ASTSubquery.h
@@ -14,7 +14,7 @@ class ASTSubquery : public ASTWithAlias
 public:
     // Stored the name when the subquery is defined in WITH clause. For example:
     // WITH (SELECT 1) AS a SELECT * FROM a AS b; cte_name will be `a`.
-    std::string cte_name;
+    String cte_name;
 
     /** Get the text that identifies this element. */
     String getID(char) const override { return "Subquery"; }
@@ -26,7 +26,7 @@ public:
         return clone;
     }
 
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
     String getAliasOrColumnName() const override;
     String tryGetAlias() const override;
 
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index 75c0ef26c07..e4e8c00879e 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -21,10 +21,10 @@ do \
 while (false)
 
 
-void ASTTableExpression::updateTreeHashImpl(SipHash & hash_state) const
+void ASTTableExpression::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(final);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 
@@ -42,12 +42,12 @@ ASTPtr ASTTableExpression::clone() const
     return res;
 }
 
-void ASTTableJoin::updateTreeHashImpl(SipHash & hash_state) const
+void ASTTableJoin::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(locality);
     hash_state.update(strictness);
     hash_state.update(kind);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 ASTPtr ASTTableJoin::clone() const
@@ -61,10 +61,10 @@ ASTPtr ASTTableJoin::clone() const
     return res;
 }
 
-void ASTArrayJoin::updateTreeHashImpl(SipHash & hash_state) const
+void ASTArrayJoin::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
     hash_state.update(kind);
-    IAST::updateTreeHashImpl(hash_state);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 ASTPtr ASTArrayJoin::clone() const
diff --git a/src/Parsers/ASTTablesInSelectQuery.h b/src/Parsers/ASTTablesInSelectQuery.h
index a004cbf9847..67370eaee14 100644
--- a/src/Parsers/ASTTablesInSelectQuery.h
+++ b/src/Parsers/ASTTablesInSelectQuery.h
@@ -59,7 +59,7 @@ struct ASTTableExpression : public IAST
     String getID(char) const override { return "TableExpression"; }
     ASTPtr clone() const override;
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 };
 
 
@@ -81,7 +81,7 @@ struct ASTTableJoin : public IAST
     void formatImplBeforeTable(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const;
     void formatImplAfterTable(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const;
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 };
 
 /// Specification of ARRAY JOIN.
@@ -102,7 +102,7 @@ struct ASTArrayJoin : public IAST
     String getID(char) const override { return "ArrayJoin"; }
     ASTPtr clone() const override;
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 };
 
 
diff --git a/src/Parsers/ASTTransactionControl.cpp b/src/Parsers/ASTTransactionControl.cpp
index 3106d432c90..6964441622d 100644
--- a/src/Parsers/ASTTransactionControl.cpp
+++ b/src/Parsers/ASTTransactionControl.cpp
@@ -39,7 +39,7 @@ IAST::QueryKind ASTTransactionControl::getQueryKind() const
     }
 }
 
-void ASTTransactionControl::updateTreeHashImpl(SipHash & hash_state) const
+void ASTTransactionControl::updateTreeHashImpl(SipHash & hash_state, bool /*ignore_aliases*/) const
 {
     hash_state.update(action);
 }
diff --git a/src/Parsers/ASTTransactionControl.h b/src/Parsers/ASTTransactionControl.h
index fb0058144dd..84a1dcf0970 100644
--- a/src/Parsers/ASTTransactionControl.h
+++ b/src/Parsers/ASTTransactionControl.h
@@ -20,13 +20,13 @@ public:
 
     UInt64 snapshot;    /// For SET TRANSACTION SNAPSHOT ...
 
-    ASTTransactionControl(QueryType action_) : action(action_) {}
+    explicit ASTTransactionControl(QueryType action_) : action(action_) {}
 
     String getID(char /*delimiter*/) const override { return "ASTTransactionControl"; }
     ASTPtr clone() const override { return std::make_shared<ASTTransactionControl>(*this); }
 
     void formatImpl(const FormatSettings & format, FormatState & /*state*/, FormatStateStacked /*frame*/) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
     QueryKind getQueryKind() const override;
 };
diff --git a/src/Parsers/ASTWithAlias.cpp b/src/Parsers/ASTWithAlias.cpp
index 1b5397654fd..221b956255b 100644
--- a/src/Parsers/ASTWithAlias.cpp
+++ b/src/Parsers/ASTWithAlias.cpp
@@ -1,3 +1,4 @@
+#include <Common/SipHash.h>
 #include <Parsers/ASTWithAlias.h>
 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
@@ -42,6 +43,13 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
     }
 }
 
+void ASTWithAlias::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
+{
+    if (!ignore_aliases && !alias.empty())
+        hash_state.update(alias);
+    IAST::updateTreeHashImpl(hash_state, ignore_aliases);
+}
+
 void ASTWithAlias::appendColumnName(WriteBuffer & ostr) const
 {
     if (prefer_alias_to_column_name && !alias.empty())
diff --git a/src/Parsers/ASTWithAlias.h b/src/Parsers/ASTWithAlias.h
index ea4419402b0..452e2038e55 100644
--- a/src/Parsers/ASTWithAlias.h
+++ b/src/Parsers/ASTWithAlias.h
@@ -27,7 +27,9 @@ public:
     void setAlias(const String & to) override { alias = to; }
 
     /// Calls formatImplWithoutAlias, and also outputs an alias. If necessary, encloses the entire expression in brackets.
-    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final;
+    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const final;
+
+    void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
 
     virtual void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0;
 
diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp
index a494a528cd2..37d7f458d61 100644
--- a/src/Parsers/IAST.cpp
+++ b/src/Parsers/IAST.cpp
@@ -114,24 +114,24 @@ size_t IAST::checkSize(size_t max_size) const
 }
 
 
-IAST::Hash IAST::getTreeHash() const
+IAST::Hash IAST::getTreeHash(bool ignore_aliases) const
 {
     SipHash hash_state;
-    updateTreeHash(hash_state);
+    updateTreeHash(hash_state, ignore_aliases);
     return getSipHash128AsPair(hash_state);
 }
 
 
-void IAST::updateTreeHash(SipHash & hash_state) const
+void IAST::updateTreeHash(SipHash & hash_state, bool ignore_aliases) const
 {
-    updateTreeHashImpl(hash_state);
+    updateTreeHashImpl(hash_state, ignore_aliases);
     hash_state.update(children.size());
     for (const auto & child : children)
-        child->updateTreeHash(hash_state);
+        child->updateTreeHash(hash_state, ignore_aliases);
 }
 
 
-void IAST::updateTreeHashImpl(SipHash & hash_state) const
+void IAST::updateTreeHashImpl(SipHash & hash_state, bool /*ignore_aliases*/) const
 {
     auto id = getID();
     hash_state.update(id.data(), id.size());
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index 812fd082476..9afd59caa05 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -78,11 +78,13 @@ public:
     virtual ASTPtr clone() const = 0;
 
     /** Get hash code, identifying this element and its subtree.
+     *  Hashing by default ignores aliases (e.g. identifier aliases, function aliases, literal aliases) which is
+     *  useful for common subexpression elimination. Set 'ignore_aliases = false' if you don't want that behavior.
       */
     using Hash = CityHash_v1_0_2::uint128;
-    Hash getTreeHash() const;
-    void updateTreeHash(SipHash & hash_state) const;
-    virtual void updateTreeHashImpl(SipHash & hash_state) const;
+    Hash getTreeHash(bool ignore_aliases = true) const;
+    void updateTreeHash(SipHash & hash_state, bool ignore_aliases = true) const;
+    virtual void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const;
 
     void dumpTree(WriteBuffer & ostr, size_t indent = 0) const;
     std::string dumpTree(size_t indent = 0) const;
diff --git a/tests/queries/0_stateless/02494_query_cache_bugs.reference b/tests/queries/0_stateless/02494_query_cache_bugs.reference
new file mode 100644
index 00000000000..448e1366ea7
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_bugs.reference
@@ -0,0 +1,24 @@
+-- Bug 56258: Check literals (ASTLiteral)
+Row 1:
+──────
+10: 10
+Row 1:
+──────
+x: 10
+2
+-- Bug 56258: Check functions (ASTFunction)
+Row 1:
+──────
+toUInt64(42): 42
+Row 1:
+──────
+x: 42
+2
+-- Bug 56258: Check identifiers (ASTIdentifier)
+Row 1:
+──────
+c: 1
+Row 1:
+──────
+x: 1
+2
diff --git a/tests/queries/0_stateless/02494_query_cache_bugs.sql b/tests/queries/0_stateless/02494_query_cache_bugs.sql
new file mode 100644
index 00000000000..74496e0f77a
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_bugs.sql
@@ -0,0 +1,39 @@
+-- Tags: no-parallel
+-- Tag no-parallel: Messes with internal cache
+
+-- Test for Bug 56258
+
+SYSTEM DROP QUERY CACHE;
+
+SELECT '-- Bug 56258: Check literals (ASTLiteral)';
+
+SELECT 10 FORMAT Vertical SETTINGS use_query_cache = 1;
+SELECT 10 AS x FORMAT Vertical SETTINGS use_query_cache = 1;
+
+SELECT count(*) FROM system.query_cache;
+
+SYSTEM DROP QUERY CACHE;
+
+SELECT '-- Bug 56258: Check functions (ASTFunction)';
+
+SELECT toUInt64(42) FORMAT Vertical SETTINGS use_query_cache = 1;
+SELECT toUInt64(42) AS x FORMAT Vertical SETTINGS use_query_cache = 1;
+
+SELECT count(*) FROM system.query_cache;
+
+SYSTEM DROP QUERY CACHE;
+
+SELECT '-- Bug 56258: Check identifiers (ASTIdentifier)';
+
+DROP TABLE IF EXISTS tab;
+
+CREATE TABLE tab(c UInt64) ENGINE = Memory AS SELECT 1;
+
+SELECT c FROM tab FORMAT Vertical SETTINGS use_query_cache = 1;
+SELECT c AS x FROM tab FORMAT Vertical SETTINGS use_query_cache = 1;
+
+SELECT count(*) FROM system.query_cache;
+
+DROP TABLE tab;
+
+SYSTEM DROP QUERY CACHE;

From b2dcd61a6f4800d4b9275b35b1b4a57bebea2197 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Fri, 10 Nov 2023 17:01:46 +0100
Subject: [PATCH 0305/1097] Fix problem with parallel tests

---
 ...ql => 02883_named_collections_override.sh} | 71 +++++++++++--------
 1 file changed, 42 insertions(+), 29 deletions(-)
 rename tests/queries/0_stateless/{02883_named_collections_override.sql => 02883_named_collections_override.sh} (55%)
 mode change 100644 => 100755

diff --git a/tests/queries/0_stateless/02883_named_collections_override.sql b/tests/queries/0_stateless/02883_named_collections_override.sh
old mode 100644
new mode 100755
similarity index 55%
rename from tests/queries/0_stateless/02883_named_collections_override.sql
rename to tests/queries/0_stateless/02883_named_collections_override.sh
index 4cb4e77d75b..a08c795127d
--- a/tests/queries/0_stateless/02883_named_collections_override.sql
+++ b/tests/queries/0_stateless/02883_named_collections_override.sh
@@ -1,56 +1,68 @@
-DROP NAMED COLLECTION IF EXISTS u1;
-DROP NAMED COLLECTION IF EXISTS u2;
-DROP NAMED COLLECTION IF EXISTS u3;
+#!/usr/bin/env bash
 
-CREATE NAMED COLLECTION IF NOT EXISTS u1 AS
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+u1="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection1"
+u2="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection2"
+u3="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection3"
+
+${CLICKHOUSE_CLIENT}  -nm --query "
+
+DROP NAMED COLLECTION IF EXISTS $u1;
+DROP NAMED COLLECTION IF EXISTS $u2;
+DROP NAMED COLLECTION IF EXISTS $u3;
+
+CREATE NAMED COLLECTION IF NOT EXISTS $u1 AS
     url = 'http://127.0.0.1:8123?query=select+1' NOT OVERRIDABLE,
     format = 'RawBLOB' OVERRIDABLE;
 
-CREATE NAMED COLLECTION IF NOT EXISTS u2 AS
+CREATE NAMED COLLECTION IF NOT EXISTS $u2 AS
     url = 'http://127.0.0.1:8123?query=select+1',
     format = 'RawBLOB';
 
 SET allow_named_collection_override_by_default=1;
 SELECT 'allow_named_collection_override_by_default=1 u1';
-SELECT * FROM url(u1);
-SELECT * FROM url(u1, headers('Accept'='text/csv; charset=utf-8'));
-SELECT * FROM url(u1, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
-SELECT * FROM url(u1, format='CSV');
+SELECT * FROM url($u1);
+SELECT * FROM url($u1, headers('Accept'='text/csv; charset=utf-8'));
+SELECT * FROM url($u1, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url($u1, format='CSV');
 SELECT 'allow_named_collection_override_by_default=1 u2';
-SELECT * FROM url(u2);
-SELECT * FROM url(u2, headers('Accept'='text/csv; charset=utf-8'));
-SELECT * FROM url(u2, url='http://127.0.0.1:8123?query=select+2');
-SELECT * FROM url(u2, format='CSV');
+SELECT * FROM url($u2);
+SELECT * FROM url($u2, headers('Accept'='text/csv; charset=utf-8'));
+SELECT * FROM url($u2, url='http://127.0.0.1:8123?query=select+2');
+SELECT * FROM url($u2, format='CSV');
 
 SET allow_named_collection_override_by_default=0;
 SELECT 'allow_named_collection_override_by_default=0 u1';
-SELECT * FROM url(u1);
-SELECT * FROM url(u1, headers('Accept'='text/csv; charset=utf-8')); -- { serverError BAD_ARGUMENTS }
-SELECT * FROM url(u1, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
-SELECT * FROM url(u1, format='CSV');
+SELECT * FROM url($u1);
+SELECT * FROM url($u1, headers('Accept'='text/csv; charset=utf-8')); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url($u1, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url($u1, format='CSV');
 SELECT 'allow_named_collection_override_by_default=0 u2';
-SELECT * FROM url(u2);
-SELECT * FROM url(u2, headers('Accept'='text/csv; charset=utf-8')); -- { serverError BAD_ARGUMENTS }
-SELECT * FROM url(u2, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
-SELECT * FROM url(u2, format='CSV'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url($u2);
+SELECT * FROM url($u2, headers('Accept'='text/csv; charset=utf-8')); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url($u2, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url($u2, format='CSV'); -- { serverError BAD_ARGUMENTS }
 
 SELECT 'Test ALTER';
 
-CREATE NAMED COLLECTION IF NOT EXISTS u3 AS
+CREATE NAMED COLLECTION IF NOT EXISTS $u3 AS
     url = 'http://127.0.0.1:8123?query=select+1' NOT OVERRIDABLE,
     format = 'RawBLOB' OVERRIDABLE;
 
-ALTER NAMED COLLECTION u3 SET
+ALTER NAMED COLLECTION $u3 SET
     url = 'http://127.0.0.1:8123?query=select+2' OVERRIDABLE,
     format = 'RawBLOB' NOT OVERRIDABLE;
 
-SELECT * FROM url(u3);
-SELECT * FROM url(u3, url='http://127.0.0.1:8123?query=select+1');
-SELECT * FROM url(u3, format='CSV'); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM url($u3);
+SELECT * FROM url($u3, url='http://127.0.0.1:8123?query=select+1');
+SELECT * FROM url($u3, format='CSV'); -- { serverError BAD_ARGUMENTS }
 
-DROP NAMED COLLECTION IF EXISTS u1;
-DROP NAMED COLLECTION IF EXISTS u2;
-DROP NAMED COLLECTION IF EXISTS u2;
+DROP NAMED COLLECTION IF EXISTS $u1;
+DROP NAMED COLLECTION IF EXISTS $u2;
+DROP NAMED COLLECTION IF EXISTS $u2;
 
 SELECT 'Test XML collections';
 
@@ -77,3 +89,4 @@ SELECT * FROM url(url_override2);
 SELECT * FROM url(url_override2, headers('Accept'='text/csv; charset=utf-8')); -- { serverError BAD_ARGUMENTS }
 SELECT * FROM url(url_override2, url='http://127.0.0.1:8123?query=select+2'); -- { serverError BAD_ARGUMENTS }
 SELECT * FROM url(url_override2, format='CSV'); -- { serverError BAD_ARGUMENTS }
+"

From 3c15a5e7148114e51c76d899e80e4705ba6c389e Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 10 Nov 2023 17:24:50 +0100
Subject: [PATCH 0306/1097] hide VERSION_INLINE_DATA under
 storage_metadata_write_full_object_key feature

---
 src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp | 10 ++++++++--
 src/Disks/ObjectStorages/DiskObjectStorageMetadata.h   |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
index e923e49a7f1..3271a190193 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
@@ -117,6 +117,9 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
     if (storage_metadata_write_full_object_key)
         write_version = VERSION_FULL_OBJECT_KEY;
 
+    if (!inline_data.empty() && write_version < VERSION_INLINE_DATA)
+        write_version = VERSION_INLINE_DATA;
+
     chassert(write_version >= VERSION_ABSOLUTE_PATHS && write_version <= VERSION_FULL_OBJECT_KEY);
     writeIntText(write_version, buf);
     writeChar('\n', buf);
@@ -153,8 +156,11 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
     writeBoolText(read_only, buf);
     writeChar('\n', buf);
 
-    writeEscapedString(inline_data, buf);
-    writeChar('\n', buf);
+    if (write_version >= VERSION_INLINE_DATA)
+    {
+        writeEscapedString(inline_data, buf);
+        writeChar('\n', buf);
+    }
 
     buf.finalize();
     if (sync)
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h
index 658914b7611..729d93af10d 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h
@@ -19,7 +19,7 @@ private:
     static constexpr UInt32 VERSION_INLINE_DATA = 4;
     static constexpr UInt32 VERSION_FULL_OBJECT_KEY = 5; /// only for reading data
 
-    UInt32 version = VERSION_INLINE_DATA;
+    UInt32 version = VERSION_READ_ONLY_FLAG;
 
     /// Absolute paths of blobs
     ObjectKeysWithMetadata keys_with_meta;

From 4859db79e5144f9121eba1febf31288410927d13 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 10 Nov 2023 17:13:29 +0000
Subject: [PATCH 0307/1097] Fix 02554_fix_grouping_sets_predicate_push_down
 with analyzer.

---
 ...rouping_sets_predicate_push_down.reference | 24 +++++++++
 ..._fix_grouping_sets_predicate_push_down.sql | 51 +++++++++++++++++--
 2 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference
index 794fe5944cd..7e43f249a74 100644
--- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference
+++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference
@@ -41,6 +41,20 @@ ExpressionTransform × 2
               ExpressionTransform
                 (ReadFromMergeTree)
                 MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
+(Expression)
+ExpressionTransform × 2
+  (Filter)
+  FilterTransform × 2
+    (Aggregating)
+    ExpressionTransform × 2
+      AggregatingTransform × 2
+        Copy 1 → 2
+          (Expression)
+          ExpressionTransform
+            (Expression)
+            ExpressionTransform
+              (ReadFromMergeTree)
+              MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
 
 ---Result---
 2023-01-05	all
@@ -60,3 +74,13 @@ ExpressionTransform × 2
             FilterTransform
               (ReadFromMergeTree)
               MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
+(Expression)
+ExpressionTransform × 2
+  (Aggregating)
+  ExpressionTransform × 2
+    AggregatingTransform × 2
+      Copy 1 → 2
+        (Expression)
+        ExpressionTransform
+          (ReadFromMergeTree)
+          MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql
index 9a970674890..3d98096f52e 100644
--- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql
+++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql
@@ -26,7 +26,7 @@ FROM
         SELECT
             day_,
             type_1
-        FROM test_grouping_sets_predicate 
+        FROM test_grouping_sets_predicate
         WHERE day_ = '2023-01-05'
         GROUP BY
             GROUPING SETS (
@@ -58,7 +58,30 @@ FROM
                 (day_))
     ) AS t
 )
-WHERE type_1 = 'all';
+WHERE type_1 = 'all' settings allow_experimental_analyzer=0;
+
+-- Query plan with analyzer has less Filter steps (which is more optimal)
+EXPLAIN PIPELINE
+SELECT *
+FROM
+(
+    SELECT
+        day_,
+        if(type_1 = '', 'all', type_1) AS type_1
+    FROM
+    (
+        SELECT
+            day_,
+            type_1
+        FROM test_grouping_sets_predicate
+        WHERE day_ = '2023-01-05'
+        GROUP BY
+            GROUPING SETS (
+                (day_, type_1),
+                (day_))
+    ) AS t
+)
+WHERE type_1 = 'all' settings allow_experimental_analyzer=1;
 
 SELECT '';
 SELECT '---Result---';
@@ -104,6 +127,28 @@ FROM
                 (day_))
     ) AS t
 )
-WHERE day_ = '2023-01-05';
+WHERE day_ = '2023-01-05' settings allow_experimental_analyzer=0;
+
+-- Query plan with analyzer has less Filter steps (which is more optimal)
+EXPLAIN PIPELINE
+SELECT *
+FROM
+(
+    SELECT
+        day_,
+        if(type_1 = '', 'all', type_1) AS type_1
+    FROM
+    (
+        SELECT
+            day_,
+            type_1
+        FROM test_grouping_sets_predicate
+        GROUP BY
+            GROUPING SETS (
+                (day_, type_1),
+                (day_))
+    ) AS t
+)
+WHERE day_ = '2023-01-05' settings allow_experimental_analyzer=1;
 
 DROP TABLE test_grouping_sets_predicate;

From 047f87a3e0676e16c5128d74f330c3417e9e2a08 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 10 Nov 2023 17:14:50 +0000
Subject: [PATCH 0308/1097] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 018482c2301..33b972f7e18 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -40,7 +40,6 @@
 02458_use_structure_from_insertion_table
 02479_race_condition_between_insert_and_droppin_mv
 02493_inconsistent_hex_and_binary_number
-02554_fix_grouping_sets_predicate_push_down
 02575_merge_prewhere_different_default_kind
 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
 01009_global_array_join_names

From 78f0a2a61d6ab089c475d3f47926353139d5f2aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 10 Nov 2023 17:20:17 +0000
Subject: [PATCH 0309/1097] Revert "Merge pull request #56591 from
 ClickHouse/revert-56500-function-sleep-exception-message-fix"

This reverts commit e8875d881de0f0aaafaa306814ecc33b29ddff49, reversing
changes made to a5b1474b6a8de71cdfa3b579efbb76b25c5741bd.
---
 src/Functions/sleep.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h
index fba8293e5ff..b7d4a1ab921 100644
--- a/src/Functions/sleep.h
+++ b/src/Functions/sleep.h
@@ -112,13 +112,15 @@ public:
         if (size > 0)
         {
             /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time.
+            UInt64 microseconds = static_cast<UInt64>(seconds * 1e6);
             if (max_microseconds && seconds * 1e6 > max_microseconds)
-                throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {}", max_microseconds, seconds);
+                throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {} microseconds",
+                    max_microseconds, microseconds);
 
             if (!dry_run)
             {
                 UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
-                UInt64 microseconds = static_cast<UInt64>(seconds * count * 1e6);
+                microseconds = static_cast<UInt64>(seconds * count * 1e6);
 
                 if (max_microseconds && microseconds > max_microseconds)
                     throw Exception(ErrorCodes::TOO_SLOW,

From 72fae1057d487312608b3da59bca83dcb9b39164 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 10 Nov 2023 17:23:56 +0000
Subject: [PATCH 0310/1097] Fix ubsan in sleep and add sanity checks

---
 src/Functions/sleep.h                                 | 11 ++++++-----
 .../0_stateless/02915_sleep_large_uint.reference      |  0
 tests/queries/0_stateless/02915_sleep_large_uint.sql  |  4 ++++
 3 files changed, 10 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02915_sleep_large_uint.reference
 create mode 100644 tests/queries/0_stateless/02915_sleep_large_uint.sql

diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h
index b7d4a1ab921..024ea757b9e 100644
--- a/src/Functions/sleep.h
+++ b/src/Functions/sleep.h
@@ -50,7 +50,8 @@ public:
         return std::make_shared<FunctionSleep<variant>>(context->getSettingsRef().function_sleep_max_microseconds_per_block);
     }
 
-    FunctionSleep(UInt64 max_microseconds_) : max_microseconds(max_microseconds_)
+    FunctionSleep(UInt64 max_microseconds_)
+        : max_microseconds(std::min(max_microseconds_, static_cast<UInt64>(std::numeric_limits<UInt32>::max())))
     {
     }
 
@@ -103,8 +104,8 @@ public:
 
         Float64 seconds = applyVisitor(FieldVisitorConvertToNumber<Float64>(), assert_cast<const ColumnConst &>(*col).getField());
 
-        if (seconds < 0 || !std::isfinite(seconds))
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot sleep infinite or negative amount of time (not implemented)");
+        if (seconds < 0 || !std::isfinite(seconds) || seconds > static_cast<Float64>(std::numeric_limits<UInt32>::max()))
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot sleep infinite, very large or negative amount of time (not implemented)");
 
         size_t size = col->size();
 
@@ -112,7 +113,7 @@ public:
         if (size > 0)
         {
             /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time.
-            UInt64 microseconds = static_cast<UInt64>(seconds * 1e6);
+            UInt64 microseconds = static_cast<UInt64>(seconds) * 1000000ull;
             if (max_microseconds && seconds * 1e6 > max_microseconds)
                 throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {} microseconds",
                     max_microseconds, microseconds);
@@ -120,7 +121,7 @@ public:
             if (!dry_run)
             {
                 UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
-                microseconds = static_cast<UInt64>(seconds * count * 1e6);
+                microseconds = static_cast<UInt64>(seconds) * count * 1000000ull;
 
                 if (max_microseconds && microseconds > max_microseconds)
                     throw Exception(ErrorCodes::TOO_SLOW,
diff --git a/tests/queries/0_stateless/02915_sleep_large_uint.reference b/tests/queries/0_stateless/02915_sleep_large_uint.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02915_sleep_large_uint.sql b/tests/queries/0_stateless/02915_sleep_large_uint.sql
new file mode 100644
index 00000000000..9fff629fb14
--- /dev/null
+++ b/tests/queries/0_stateless/02915_sleep_large_uint.sql
@@ -0,0 +1,4 @@
+SELECT sleep(3.40282e+44); -- { serverError BAD_ARGUMENTS }
+SELECT sleep((pow(2, 64) / 1000000) - 1); -- { serverError BAD_ARGUMENTS }
+SELECT sleepEachRow(184467440737095516) from numbers(10000); -- { serverError BAD_ARGUMENTS }
+SELECT sleepEachRow(pow(2, 31)) from numbers(9007199254740992) settings function_sleep_max_microseconds_per_block = 8589934592000000000; -- { serverError TOO_SLOW }

From 62daff0db823b602a028dc93979ede28dbf227bb Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 10 Nov 2023 18:41:41 +0100
Subject: [PATCH 0311/1097] Update 02908_filesystem_cache_as_collection.sql

---
 .../0_stateless/02908_filesystem_cache_as_collection.sql        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
index ed9041a0800..c7216833bc9 100644
--- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
@@ -1,4 +1,4 @@
--- Tags: no-fasttest
+-- Tags: no-fasttest, no-replicated-database
 
 CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi';
 DROP TABLE IF EXISTS test;

From bf08f3ef5a54e6fd12db2f5236103b836fb733e1 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 10 Nov 2023 21:21:13 +0300
Subject: [PATCH 0312/1097] Revert "Inserting only non-duplicate chunks in MV"

---
 src/Processors/Sinks/IOutputChunkGenerator.h  | 26 ------
 src/Processors/Sinks/OutputChunkGenerator.cpp | 91 -------------------
 src/Processors/Sinks/SinkToStorage.cpp        | 13 +--
 src/Processors/Sinks/SinkToStorage.h          |  7 +-
 .../MergeTree/ReplicatedMergeTreeSink.cpp     | 25 +++--
 .../MergeTree/ReplicatedMergeTreeSink.h       | 12 +++
 ...erialized_views_duplicated_parts.reference | 14 ---
 ...rt_materialized_views_duplicated_parts.sql | 44 ---------
 8 files changed, 37 insertions(+), 195 deletions(-)
 delete mode 100644 src/Processors/Sinks/IOutputChunkGenerator.h
 delete mode 100644 src/Processors/Sinks/OutputChunkGenerator.cpp
 delete mode 100644 tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.reference
 delete mode 100644 tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.sql

diff --git a/src/Processors/Sinks/IOutputChunkGenerator.h b/src/Processors/Sinks/IOutputChunkGenerator.h
deleted file mode 100644
index 824313a2394..00000000000
--- a/src/Processors/Sinks/IOutputChunkGenerator.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-
-#include <Processors/Chunk.h>
-#include <Interpreters/Context.h>
-
-namespace DB
-{
-
-/// This interface is meant to be used by the SinkToStorage processor
-/// SinkToStorage delegates on it the creation of the data chunk that will deliver to the next stages of the query pipeline
-/// Default implementation (createDefault() factory method) just forwards everything that it receives
-class IOutputChunkGenerator
-{
-public:
-    static std::unique_ptr<IOutputChunkGenerator> createCopyRanges(bool deduplicate_later);
-    static std::unique_ptr<IOutputChunkGenerator> createDefault();
-
-    virtual ~IOutputChunkGenerator() = default;
-
-    virtual void onNewChunkArrived(Chunk chunk) = 0;
-    virtual void onRowsProcessed(size_t row_count, bool append) = 0;
-
-    virtual Chunk generateChunk() = 0;
-};
-
-}
diff --git a/src/Processors/Sinks/OutputChunkGenerator.cpp b/src/Processors/Sinks/OutputChunkGenerator.cpp
deleted file mode 100644
index 942bf49a2d4..00000000000
--- a/src/Processors/Sinks/OutputChunkGenerator.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <Processors/Sinks/IOutputChunkGenerator.h>
-
-namespace DB
-{
-
-/// Default implementation. The new chunk received is forwarded as-is to the next stages of the query
-class ForwardEverythingGenerator : public IOutputChunkGenerator
-{
-public:
-
-    explicit ForwardEverythingGenerator() = default;
-
-    void onNewChunkArrived(Chunk chunk) override
-    {
-        in_chunk = chunk.clone();
-    }
-
-    void onRowsProcessed(size_t /*row_count*/, bool /*append*/) override
-    {}
-
-    Chunk generateChunk() override
-    {
-        return std::move(in_chunk);
-    }
-
-private:
-    Chunk in_chunk;
-};
-
-/// Specific implementation which generates a chunk with just a subset of the rows received originally
-/// Rows are assumed to be processed in the same order than they appear in the original chunk
-/// Is up to the client to decide how many rows process at once, but after each range processed,
-/// onRowsProcessed() has to be called, indicating whether append that range to the output chunk or not
-class CopyRangesGenerator : public IOutputChunkGenerator
-{
-public:
-    explicit CopyRangesGenerator() = default;
-
-    void onNewChunkArrived(Chunk chunk) override
-    {
-        out_cols = chunk.cloneEmptyColumns();
-        in_chunk = std::move(chunk);
-        row_offset = 0;
-        final_chunk_rows = 0;
-    }
-
-    void onRowsProcessed(size_t row_count, bool append) override
-    {
-        if (append)
-        {
-            const Columns& in_cols = in_chunk.getColumns();
-            for (size_t i = 0; i < out_cols.size(); i++)
-            {
-                out_cols[i]->insertRangeFrom(*(in_cols[i]), row_offset, row_count);
-            }
-            final_chunk_rows += row_count;
-        }
-
-        row_offset += row_count;
-    }
-
-    Chunk generateChunk() override
-    {
-        return Chunk(std::move(out_cols), final_chunk_rows);
-    }
-
-private:
-    Chunk in_chunk;
-    MutableColumns out_cols;
-    size_t row_offset = 0;
-    size_t final_chunk_rows = 0;
-};
-
-std::unique_ptr<IOutputChunkGenerator> IOutputChunkGenerator::createCopyRanges(bool deduplicate_later)
-{
-    // If MV is responsible for deduplication, block won't be considered duplicated.
-    // So default implementation, forwarding all the data, is used
-    if (deduplicate_later)
-    {
-        return createDefault();
-    }
-
-    return std::make_unique<CopyRangesGenerator>();
-}
-
-std::unique_ptr<IOutputChunkGenerator> IOutputChunkGenerator::createDefault()
-{
-    return std::make_unique<ForwardEverythingGenerator>();
-}
-
-}
diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp
index 84743306446..5f9f9f9b1a1 100644
--- a/src/Processors/Sinks/SinkToStorage.cpp
+++ b/src/Processors/Sinks/SinkToStorage.cpp
@@ -4,12 +4,7 @@
 namespace DB
 {
 
-SinkToStorage::SinkToStorage(const Block & header) : SinkToStorage(header, IOutputChunkGenerator::createDefault()) {}
-
-SinkToStorage::SinkToStorage(const Block & header, std::unique_ptr<IOutputChunkGenerator> output_generator_)
-    : ExceptionKeepingTransform(header, header, false),
-    output_generator(std::move(output_generator_))
-{ }
+SinkToStorage::SinkToStorage(const Block & header) : ExceptionKeepingTransform(header, header, false) {}
 
 void SinkToStorage::onConsume(Chunk chunk)
 {
@@ -20,15 +15,15 @@ void SinkToStorage::onConsume(Chunk chunk)
       */
     Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns()));
 
-    output_generator->onNewChunkArrived(chunk.clone());
     consume(chunk.clone());
+    if (!lastBlockIsDuplicate())
+        cur_chunk = std::move(chunk);
 }
 
 SinkToStorage::GenerateResult SinkToStorage::onGenerate()
 {
     GenerateResult res;
-
-    res.chunk = output_generator->generateChunk();
+    res.chunk = std::move(cur_chunk);
     res.is_done = true;
     return res;
 }
diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h
index dc7ba23b52a..023bbd8b094 100644
--- a/src/Processors/Sinks/SinkToStorage.h
+++ b/src/Processors/Sinks/SinkToStorage.h
@@ -1,7 +1,6 @@
 #pragma once
 #include <Storages/TableLockHolder.h>
 #include <Processors/Transforms/ExceptionKeepingTransform.h>
-#include <Processors/Sinks/IOutputChunkGenerator.h>
 
 namespace DB
 {
@@ -14,15 +13,13 @@ friend class PartitionedSink;
 
 public:
     explicit SinkToStorage(const Block & header);
-    explicit SinkToStorage(const Block & header, std::unique_ptr<IOutputChunkGenerator> output_generator_);
 
     const Block & getHeader() const { return inputs.front().getHeader(); }
     void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }
 
 protected:
     virtual void consume(Chunk chunk) = 0;
-
-    IOutputChunkGenerator& getOutputGenerator() { return *output_generator; }
+    virtual bool lastBlockIsDuplicate() const { return false; }
 
 private:
     std::vector<TableLockHolder> table_locks;
@@ -30,7 +27,7 @@ private:
     void onConsume(Chunk chunk) override;
     GenerateResult onGenerate() override;
 
-    std::unique_ptr<IOutputChunkGenerator> output_generator;
+    Chunk cur_chunk;
 };
 
 using SinkToStoragePtr = std::shared_ptr<SinkToStorage>;
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index 13ce882a525..7de5d46c66b 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -131,8 +131,7 @@ ReplicatedMergeTreeSinkImpl<async_insert>::ReplicatedMergeTreeSinkImpl(
     bool majority_quorum,
     ContextPtr context_,
     bool is_attach_)
-    : SinkToStorage(metadata_snapshot_->getSampleBlock(),
-                    IOutputChunkGenerator::createCopyRanges(context_->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views))
+    : SinkToStorage(metadata_snapshot_->getSampleBlock())
     , storage(storage_)
     , metadata_snapshot(metadata_snapshot_)
     , required_quorum_size(majority_quorum ? std::nullopt : std::make_optional<size_t>(quorum_size))
@@ -402,7 +401,13 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
     finishDelayedChunk(zookeeper);
     delayed_chunk = std::make_unique<ReplicatedMergeTreeSinkImpl::DelayedChunk>();
     delayed_chunk->partitions = std::move(partitions);
-    finishDelayedChunk(zookeeper);
+
+    /// If deduplicated data should not be inserted into MV, we need to set proper
+    /// value for `last_block_is_duplicate`, which is possible only after the part is committed.
+    /// Othervide we can delay commit.
+    /// TODO: we can also delay commit if there is no MVs.
+    if (!settings.deduplicate_blocks_in_dependent_materialized_views)
+        finishDelayedChunk(zookeeper);
 
     ++num_blocks_processed;
 }
@@ -413,6 +418,8 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF
     if (!delayed_chunk)
         return;
 
+    last_block_is_duplicate = false;
+
     for (auto & partition : delayed_chunk->partitions)
     {
         ProfileEventsScope scoped_attach(&partition.part_counters);
@@ -423,10 +430,9 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF
 
         try
         {
-            const size_t rowsCount = partition.temp_part.part->rows_count;
-            const bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num, false).second;
+            bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num, false).second;
 
-            getOutputGenerator().onRowsProcessed(rowsCount, !deduplicated);
+            last_block_is_duplicate = last_block_is_duplicate || deduplicated;
 
             /// Set a special error code if the block is duplicate
             int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0;
@@ -1085,6 +1091,13 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::onStart()
     storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, true);
 }
 
+template<bool async_insert>
+void ReplicatedMergeTreeSinkImpl<async_insert>::onFinish()
+{
+    auto zookeeper = storage.getZooKeeper();
+    finishDelayedChunk(std::make_shared<ZooKeeperWithFaultInjection>(zookeeper));
+}
+
 template<bool async_insert>
 void ReplicatedMergeTreeSinkImpl<async_insert>::waitForQuorum(
     const ZooKeeperWithFaultInjectionPtr & zookeeper,
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
index b208154631c..4a192a822f5 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
@@ -51,12 +51,23 @@ public:
 
     void onStart() override;
     void consume(Chunk chunk) override;
+    void onFinish() override;
 
     String getName() const override { return "ReplicatedMergeTreeSink"; }
 
     /// For ATTACHing existing data on filesystem.
     bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part);
 
+    /// For proper deduplication in MaterializedViews
+    bool lastBlockIsDuplicate() const override
+    {
+        /// If MV is responsible for deduplication, block is not considered duplicating.
+        if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
+            return false;
+
+        return last_block_is_duplicate;
+    }
+
     struct DelayedChunk;
 private:
     using BlockIDsType = std::conditional_t<async_insert, std::vector<String>, String>;
@@ -111,6 +122,7 @@ private:
     bool is_attach = false;
     bool quorum_parallel = false;
     const bool deduplicate = true;
+    bool last_block_is_duplicate = false;
     UInt64 num_blocks_processed = 0;
 
     using Logger = Poco::Logger;
diff --git a/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.reference b/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.reference
deleted file mode 100644
index 325f639813a..00000000000
--- a/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.reference
+++ /dev/null
@@ -1,14 +0,0 @@
-Initial
-2020-01-01 13:00:00	24
-Last block is duplicate
-2020-01-01 13:00:00	24
-2021-09-01 11:00:00	24
-One block is duplicate (default setting)
-2020-01-01 13:00:00	24
-2021-09-01 11:00:00	24
-2022-01-01 12:00:00	24
-One block is duplicate (changed setting)
-2020-01-01 13:00:00	24
-2021-09-01 11:00:00	24
-2022-01-01 12:00:00	24
-2023-01-01 12:00:00	24
diff --git a/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.sql b/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.sql
deleted file mode 100644
index c087e826a13..00000000000
--- a/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.sql
+++ /dev/null
@@ -1,44 +0,0 @@
--- Tags: zookeeper
-
-DROP TABLE IF EXISTS landing SYNC;
-DROP TABLE IF EXISTS mv SYNC;
-
-CREATE TABLE landing
-(
-    `time` DateTime,
-    `number` Int64
-)
-ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/landing/', 'r1')
-PARTITION BY toYYYYMMDD(time)
-ORDER BY time;
-
-CREATE MATERIALIZED VIEW mv
-ENGINE = ReplicatedSummingMergeTree('/clickhouse/{database}/tables/mv', 'r1')
-PARTITION BY toYYYYMMDD(hour) ORDER BY hour
-AS SELECT
-       toStartOfHour(time) AS hour,
-       sum(number) AS sum_amount
-   FROM landing GROUP BY hour;
-
-SELECT 'Initial';
-INSERT INTO landing VALUES ('2020-01-01 13:23:34', 24);
-SELECT * FROM mv ORDER BY hour;
-
-SELECT 'Last block is duplicate';
-INSERT INTO landing VALUES ('2021-09-01 11:00:00', 24), ('2020-01-01 13:23:34', 24);
-SELECT * FROM mv ORDER BY hour;
-
-SELECT 'One block is duplicate (default setting)';
-SET max_insert_delayed_streams_for_parallel_write = 0;
-INSERT INTO landing VALUES ('2021-09-01 11:00:00', 24), ('2022-01-01 12:03:00', 24);
-SELECT * FROM mv ORDER BY hour;
-
-SELECT 'One block is duplicate (changed setting)';
-SET max_insert_delayed_streams_for_parallel_write = 5;
-INSERT INTO landing VALUES ('2021-09-01 11:00:00', 24), ('2023-01-01 12:03:00', 24);
-
-SELECT * FROM mv ORDER BY hour;
-
-DROP TABLE mv;
-DROP TABLE landing;
-

From 9ac7159a936ab1610a1425ad29f3f53037ce3244 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 10 Nov 2023 21:07:22 +0000
Subject: [PATCH 0313/1097] Update version_date.tsv and changelogs after
 v23.10.3.5-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.10.3.5-stable.md | 16 ++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 5 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.10.3.5-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 7cf2ea63f5f..1f4fd39bc26 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.2.13"
+ARG VERSION="23.10.3.5"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 3e30293011b..41be7e611a3 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.2.13"
+ARG VERSION="23.10.3.5"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 7792564ff78..0ff6ae2e227 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.10.2.13"
+ARG VERSION="23.10.3.5"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.10.3.5-stable.md b/docs/changelogs/v23.10.3.5-stable.md
new file mode 100644
index 00000000000..2357b069cdb
--- /dev/null
+++ b/docs/changelogs/v23.10.3.5-stable.md
@@ -0,0 +1,16 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.10.3.5-stable (b2ba7637a41) FIXME as compared to v23.10.2.13-stable (65d8522bb1d)
+
+#### Improvement
+* Backported in [#56513](https://github.com/ClickHouse/ClickHouse/issues/56513): Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)).
+
+#### NO CL CATEGORY
+
+* Backported in [#56605](https://github.com/ClickHouse/ClickHouse/issues/56605):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 2425593c8db..0f2684cd91d 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v23.10.3.5-stable	2023-11-10
 v23.10.2.13-stable	2023-11-08
 v23.10.1.1976-stable	2023-11-02
 v23.9.4.11-stable	2023-11-08

From d162b5e692dae47d88873b205bab612efe20a638 Mon Sep 17 00:00:00 2001
From: justindeguzman <justin@justindeguzman.net>
Date: Fri, 10 Nov 2023 14:46:25 -0800
Subject: [PATCH 0314/1097] Add new header for README with updated logo

---
 README.md | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b61886c6c68..c25dfc12822 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,18 @@
-[<img alt="ClickHouse — open source distributed column-oriented DBMS" width="400px" src="https://clickhouse.com/images/ch_gh_logo_rounded.png" />](https://clickhouse.com?utm_source=github)
+<div align=center>
 
-ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
+![Website](https://img.shields.io/website?up_message=AVAILABLE&down_message=DOWN&url=https%3A%2F%2Fclickhouse.com&style=for-the-badge)
+[![Apache 2.0 License](https://img.shields.io/badge/license-Apache%202.0-blueviolet?style=for-the-badge)](https://www.apache.org/licenses/LICENSE-2.0)
+![Checks](https://img.shields.io/github/actions/workflow/status/clickhouse/clickhouse/debug.yml?style=for-the-badge&label=Checks)
+
+<picture align=center>
+    <source media="(prefers-color-scheme: dark)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/4ef9c104-2d3f-4646-b186-507358d2fe28">
+    <source media="(prefers-color-scheme: light)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
+    <img alt="The ClickHouse company logo." src="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/b001dc7b-5a45-4dcd-9275-e03beb7f9177">
+</picture>
+
+<h4>ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.</h4>
+
+</div>
 
 ## How To Install (Linux, macOS, FreeBSD)
 ```

From c12957699d34ad1efc25121cb4ae4510823f6e75 Mon Sep 17 00:00:00 2001
From: justindeguzman <justin@justindeguzman.net>
Date: Fri, 10 Nov 2023 14:52:00 -0800
Subject: [PATCH 0315/1097] Add website link to README badge

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c25dfc12822..d0fd19c0b73 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,7 @@
 <div align=center>
 
-![Website](https://img.shields.io/website?up_message=AVAILABLE&down_message=DOWN&url=https%3A%2F%2Fclickhouse.com&style=for-the-badge)
+[![Website](https://img.shields.io/website?up_message=AVAILABLE&down_message=DOWN&url=https%3A%2F%2Fclickhouse.com&style=for-the-badge)](https://clickhouse.com)
 [![Apache 2.0 License](https://img.shields.io/badge/license-Apache%202.0-blueviolet?style=for-the-badge)](https://www.apache.org/licenses/LICENSE-2.0)
-![Checks](https://img.shields.io/github/actions/workflow/status/clickhouse/clickhouse/debug.yml?style=for-the-badge&label=Checks)
 
 <picture align=center>
     <source media="(prefers-color-scheme: dark)" srcset="https://github.com/ClickHouse/clickhouse-docs/assets/9611008/4ef9c104-2d3f-4646-b186-507358d2fe28">

From 0d01438c48282051f8978e5fe30b34db9f2d10fc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 00:44:46 +0100
Subject: [PATCH 0316/1097] Fix error

---
 CMakeLists.txt | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ef97c13fa1c..9c8952aea96 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -554,6 +554,13 @@ if (ENABLE_RUST)
     endif()
 endif()
 
+if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
+    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
+else ()
+    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)
+endif ()
+option(CHECK_LARGE_OBJECT_SIZES "Check that there are no large object files after build." ${CHECK_LARGE_OBJECT_SIZES_DEFAULT})
+
 add_subdirectory (base)
 add_subdirectory (src)
 add_subdirectory (programs)
@@ -622,10 +629,3 @@ if (NATIVE_BUILD_TARGETS
         COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS}
         COMMAND_ECHO STDOUT)
 endif ()
-
-if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
-    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
-else ()
-    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)
-endif ()
-option(CHECK_LARGE_OBJECT_SIZES "Check that there are no large object files after build." ${CHECK_LARGE_OBJECT_SIZES_DEFAULT})

From 4de5ee8acc0121ba95a22033c0ef60fa2e409a90 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 01:01:43 +0100
Subject: [PATCH 0317/1097] Something

---
 src/AggregateFunctions/AggregateFunctionUniq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h
index 1752d5751d5..f20fb8cb933 100644
--- a/src/AggregateFunctions/AggregateFunctionUniq.h
+++ b/src/AggregateFunctions/AggregateFunctionUniq.h
@@ -466,7 +466,7 @@ public:
             std::vector<DataSet *> data_vec;
             data_vec.resize(places.size());
 
-            for (unsigned long i = 0; i < data_vec.size(); i++)
+            for (size_t i = 0; i < data_vec.size(); ++i)
                 data_vec[i] = &this->data(places[i]).set;
 
             DataSet::parallelizeMergePrepare(data_vec, thread_pool);

From a90f3b14e8bbe08f6cf5c781f33947e67fad8c3a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 02:21:24 +0100
Subject: [PATCH 0318/1097] Split FunctionsStringArray

---
 src/Functions/FunctionTokens.h                | 208 ++++
 src/Functions/FunctionsStringArray.cpp        |  73 --
 src/Functions/FunctionsStringArray.h          | 989 ------------------
 src/Functions/URL/URLHierarchy.cpp            |  12 +-
 src/Functions/URL/URLPathHierarchy.cpp        |  11 +-
 .../URL/extractURLParameterNames.cpp          |  11 +-
 src/Functions/URL/extractURLParameters.cpp    |  12 +-
 src/Functions/alphaTokens.cpp                 | 111 ++
 src/Functions/arrayStringConcat.cpp           | 221 ++++
 src/Functions/extractAll.cpp                  | 123 +++
 src/Functions/splitByChar.cpp                 | 122 +++
 src/Functions/splitByNonAlpha.cpp             | 120 +++
 src/Functions/splitByRegexp.cpp               | 157 +++
 src/Functions/splitByString.cpp               | 149 +++
 src/Functions/splitByWhitespace.cpp           | 108 ++
 15 files changed, 1353 insertions(+), 1074 deletions(-)
 create mode 100644 src/Functions/FunctionTokens.h
 delete mode 100644 src/Functions/FunctionsStringArray.cpp
 delete mode 100644 src/Functions/FunctionsStringArray.h
 create mode 100644 src/Functions/alphaTokens.cpp
 create mode 100644 src/Functions/arrayStringConcat.cpp
 create mode 100644 src/Functions/extractAll.cpp
 create mode 100644 src/Functions/splitByChar.cpp
 create mode 100644 src/Functions/splitByNonAlpha.cpp
 create mode 100644 src/Functions/splitByRegexp.cpp
 create mode 100644 src/Functions/splitByString.cpp
 create mode 100644 src/Functions/splitByWhitespace.cpp

diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h
new file mode 100644
index 00000000000..a79d37a7a87
--- /dev/null
+++ b/src/Functions/FunctionTokens.h
@@ -0,0 +1,208 @@
+#pragma once
+
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Functions/Regexps.h>
+#include <Interpreters/Context.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/castColumn.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/assert_cast.h>
+#include <Common/typeid_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+
+/** Functions that split strings into an array of strings or vice versa.
+  *
+  * splitByChar(sep, s[, max_substrings])
+  * splitByString(sep, s[, max_substrings])
+  * splitByRegexp(regexp, s[, max_substrings])
+  *
+  * splitByWhitespace(s[, max_substrings])      - split the string by whitespace characters
+  * splitByNonAlpha(s[, max_substrings])        - split the string by whitespace and punctuation characters
+  *
+  * extractAll(s, regexp)     - select from the string the subsequences corresponding to the regexp.
+  * - first subpattern, if regexp has subpattern;
+  * - zero subpattern (the match part, otherwise);
+  * - otherwise, an empty array
+  *
+  * alphaTokens(s[, max_substrings])            - select from the string subsequence `[a-zA-Z]+`.
+  *
+  * URL functions are located separately.
+  */
+
+
+/// A function that takes a string, and returns an array of substrings created by some generator.
+template <typename Generator>
+class FunctionTokens : public IFunction
+{
+private:
+    using Pos = const char *;
+    bool max_substrings_includes_remaining_string;
+
+public:
+    static constexpr auto name = Generator::name;
+    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
+
+    explicit FunctionTokens<Generator>(ContextPtr context)
+    {
+        const Settings & settings = context->getSettingsRef();
+        max_substrings_includes_remaining_string = settings.splitby_max_substrings_includes_remaining_string;
+    }
+
+    String getName() const override { return name; }
+
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    bool isVariadic() const override { return Generator::isVariadic(); }
+
+    size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        Generator::checkArguments(*this, arguments);
+
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
+    {
+        Generator generator;
+        generator.init(arguments, max_substrings_includes_remaining_string);
+
+        const auto & array_argument = arguments[generator.strings_argument_position];
+
+        const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
+        const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
+
+        auto col_res = ColumnArray::create(ColumnString::create());
+
+        ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
+        ColumnString::Chars & res_strings_chars = res_strings.getChars();
+        ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets();
+
+        ColumnArray::Offsets & res_offsets = col_res->getOffsets();
+
+        if (col_str)
+        {
+            const ColumnString::Chars & src_chars = col_str->getChars();
+            const ColumnString::Offsets & src_offsets = col_str->getOffsets();
+
+            res_offsets.reserve(src_offsets.size());
+            res_strings_offsets.reserve(src_offsets.size() * 5);    /// Constant 5 - at random.
+            res_strings_chars.reserve(src_chars.size());
+
+            Pos token_begin = nullptr;
+            Pos token_end = nullptr;
+
+            size_t size = src_offsets.size();
+            ColumnString::Offset current_src_offset = 0;
+            ColumnArray::Offset current_dst_offset = 0;
+            ColumnString::Offset current_dst_strings_offset = 0;
+            for (size_t i = 0; i < size; ++i)
+            {
+                Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
+                current_src_offset = src_offsets[i];
+                Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
+
+                generator.set(pos, end);
+                size_t j = 0;
+                while (generator.get(token_begin, token_end))
+                {
+                    size_t token_size = token_end - token_begin;
+
+                    res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
+                    memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
+                    res_strings_chars[current_dst_strings_offset + token_size] = 0;
+
+                    current_dst_strings_offset += token_size + 1;
+                    res_strings_offsets.push_back(current_dst_strings_offset);
+                    ++j;
+                }
+
+                current_dst_offset += j;
+                res_offsets.push_back(current_dst_offset);
+            }
+
+            return col_res;
+        }
+        else if (col_str_const)
+        {
+            String src = col_str_const->getValue<String>();
+            Array dst;
+
+            generator.set(src.data(), src.data() + src.size());
+            Pos token_begin = nullptr;
+            Pos token_end = nullptr;
+
+            while (generator.get(token_begin, token_end))
+                dst.push_back(String(token_begin, token_end - token_begin));
+
+            return result_type->createColumnConst(col_str_const->size(), dst);
+        }
+        else
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
+                    array_argument.column->getName(), array_argument.column->getName(), getName());
+    }
+};
+
+
+/// Helper functions for implementations
+static inline std::optional<size_t> extractMaxSplits(
+    const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
+{
+    if (max_substrings_argument_position >= arguments.size())
+        return std::nullopt;
+
+    if (const ColumnConst * column = checkAndGetColumn<ColumnConst>(arguments[max_substrings_argument_position].column.get()))
+        return column->getUInt(0);
+
+    return std::nullopt;
+}
+
+static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings(
+    const IFunction & func, const ColumnsWithTypeAndName & arguments)
+{
+    FunctionArgumentDescriptors mandatory_args{
+        {"separator", &isString<IDataType>, isColumnConst, "const String"},
+        {"s", &isString<IDataType>, nullptr, "String"}
+    };
+
+    FunctionArgumentDescriptors optional_args{
+        {"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
+    };
+
+    validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
+}
+
+static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments)
+{
+    FunctionArgumentDescriptors mandatory_args{
+        {"s", &isString<IDataType>, nullptr, "String"},
+    };
+
+    FunctionArgumentDescriptors optional_args{
+        {"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
+    };
+
+    validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
+}
+
+}
diff --git a/src/Functions/FunctionsStringArray.cpp b/src/Functions/FunctionsStringArray.cpp
deleted file mode 100644
index 4afee55704f..00000000000
--- a/src/Functions/FunctionsStringArray.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsStringArray.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-}
-
-template <typename DataType>
-std::optional<Int64> extractMaxSplitsImpl(const ColumnWithTypeAndName & argument)
-{
-    const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
-    if (!col)
-        return std::nullopt;
-
-    auto value = col->template getValue<DataType>();
-    return static_cast<Int64>(value);
-}
-
-std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
-{
-    if (max_substrings_argument_position >= arguments.size())
-        return std::nullopt;
-
-    std::optional<Int64> max_splits;
-    if (!((max_splits = extractMaxSplitsImpl<UInt8>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int8>(arguments[max_substrings_argument_position]))
-          || (max_splits = extractMaxSplitsImpl<UInt16>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int16>(arguments[max_substrings_argument_position]))
-          || (max_splits = extractMaxSplitsImpl<UInt32>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int32>(arguments[max_substrings_argument_position]))
-          || (max_splits = extractMaxSplitsImpl<UInt64>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int64>(arguments[max_substrings_argument_position]))))
-        throw Exception(
-            ErrorCodes::ILLEGAL_COLUMN,
-            "Illegal column {}, which is {}-th argument",
-            arguments[max_substrings_argument_position].column->getName(),
-            max_substrings_argument_position + 1);
-
-    if (*max_splits <= 0)
-        return std::nullopt;
-
-    return max_splits;
-}
-
-DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const
-{
-    FunctionArgumentDescriptors mandatory_args{
-        {"arr", &isArray<IDataType>, nullptr, "Array"},
-    };
-
-    FunctionArgumentDescriptors optional_args{
-        {"separator", &isString<IDataType>, isColumnConst, "const String"},
-    };
-
-    validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
-
-    return std::make_shared<DataTypeString>();
-}
-
-REGISTER_FUNCTION(StringArray)
-{
-    factory.registerFunction<FunctionExtractAll>();
-
-    factory.registerFunction<FunctionSplitByAlpha>();
-    factory.registerAlias("splitByAlpha", FunctionSplitByAlpha::name);
-    factory.registerFunction<FunctionSplitByNonAlpha>();
-    factory.registerFunction<FunctionSplitByWhitespace>();
-    factory.registerFunction<FunctionSplitByChar>();
-    factory.registerFunction<FunctionSplitByString>();
-    factory.registerFunction<FunctionSplitByRegexp>();
-    factory.registerFunction<FunctionArrayStringConcat>();
-}
-
-}
diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h
deleted file mode 100644
index 22b19daf149..00000000000
--- a/src/Functions/FunctionsStringArray.h
+++ /dev/null
@@ -1,989 +0,0 @@
-#pragma once
-
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnConst.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnNullable.h>
-#include <Columns/ColumnString.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeString.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/IFunction.h>
-#include <Functions/Regexps.h>
-#include <Interpreters/Context.h>
-#include <IO/WriteHelpers.h>
-#include <Interpreters/castColumn.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Common/assert_cast.h>
-#include <Common/typeid_cast.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_COLUMN;
-}
-
-
-/** Functions that split strings into an array of strings or vice versa.
-  *
-  * splitByChar(sep, s[, max_substrings])
-  * splitByString(sep, s[, max_substrings])
-  * splitByRegexp(regexp, s[, max_substrings])
-  *
-  * splitByWhitespace(s[, max_substrings])      - split the string by whitespace characters
-  * splitByNonAlpha(s[, max_substrings])        - split the string by whitespace and punctuation characters
-  *
-  * extractAll(s, regexp)     - select from the string the subsequences corresponding to the regexp.
-  * - first subpattern, if regexp has subpattern;
-  * - zero subpattern (the match part, otherwise);
-  * - otherwise, an empty array
-  *
-  * arrayStringConcat(arr)
-  * arrayStringConcat(arr, delimiter)
-  * - join an array of strings into one string via a separator.
-  *
-  * alphaTokens(s[, max_substrings])            - select from the string subsequence `[a-zA-Z]+`.
-  *
-  * URL functions are located separately.
-  */
-
-
-using Pos = const char *;
-
-std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position);
-
-/// Substring generators. All of them have a common interface.
-
-class SplitByAlphaImpl
-{
-private:
-    Pos pos;
-    Pos end;
-    std::optional<size_t> max_splits;
-    size_t splits;
-    bool max_substrings_includes_remaining_string;
-
-public:
-    static constexpr auto name = "alphaTokens";
-    static String getName() { return name; }
-
-    static bool isVariadic() { return true; }
-
-    static size_t getNumberOfArguments() { return 0; }
-
-    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
-    {
-        FunctionArgumentDescriptors mandatory_args{
-            {"s", &isString<IDataType>, nullptr, "String"},
-        };
-
-        FunctionArgumentDescriptors optional_args{
-            {"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
-        };
-
-        validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
-    }
-
-    static constexpr auto strings_argument_position = 0uz;
-
-    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
-    {
-        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
-        max_splits = extractMaxSplits(arguments, 1);
-    }
-
-    /// Called for each next string.
-    void set(Pos pos_, Pos end_)
-    {
-        pos = pos_;
-        end = end_;
-        splits = 0;
-    }
-
-    /// Get the next token, if any, or return false.
-    bool get(Pos & token_begin, Pos & token_end)
-    {
-        /// Skip garbage
-        while (pos < end && !isAlphaASCII(*pos))
-            ++pos;
-
-        if (pos == end)
-            return false;
-
-        token_begin = pos;
-
-        if (max_splits)
-        {
-            if (max_substrings_includes_remaining_string)
-            {
-                if (splits == *max_splits - 1)
-                {
-                    token_end = end;
-                    pos = end;
-                    return true;
-                }
-            }
-            else
-                if (splits == *max_splits)
-                    return false;
-        }
-
-        while (pos < end && isAlphaASCII(*pos))
-            ++pos;
-
-        token_end = pos;
-        ++splits;
-
-        return true;
-    }
-};
-
-class SplitByNonAlphaImpl
-{
-private:
-    Pos pos;
-    Pos end;
-    std::optional<size_t> max_splits;
-    size_t splits;
-    bool max_substrings_includes_remaining_string;
-
-public:
-    /// Get the name of the function.
-    static constexpr auto name = "splitByNonAlpha";
-    static String getName() { return name; }
-
-    static bool isVariadic() { return true; }
-    static size_t getNumberOfArguments() { return 0; }
-
-    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
-    {
-        SplitByAlphaImpl::checkArguments(func, arguments);
-    }
-
-    static constexpr auto strings_argument_position = 0uz;
-
-    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
-    {
-        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
-        max_splits = extractMaxSplits(arguments, 1);
-    }
-
-    /// Called for each next string.
-    void set(Pos pos_, Pos end_)
-    {
-        pos = pos_;
-        end = end_;
-        splits = 0;
-    }
-
-    /// Get the next token, if any, or return false.
-    bool get(Pos & token_begin, Pos & token_end)
-    {
-        /// Skip garbage
-        while (pos < end && (isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
-            ++pos;
-
-        if (pos == end)
-            return false;
-
-        token_begin = pos;
-
-        if (max_splits)
-        {
-            if (max_substrings_includes_remaining_string)
-            {
-                if (splits == *max_splits - 1)
-                {
-                    token_end = end;
-                    pos = end;
-                    return true;
-                }
-            }
-            else
-                if (splits == *max_splits)
-                    return false;
-        }
-
-        while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
-            ++pos;
-
-        token_end = pos;
-        splits++;
-
-        return true;
-    }
-};
-
-class SplitByWhitespaceImpl
-{
-private:
-    Pos pos;
-    Pos end;
-    std::optional<size_t> max_splits;
-    size_t splits;
-    bool max_substrings_includes_remaining_string;
-
-public:
-    static constexpr auto name = "splitByWhitespace";
-    static String getName() { return name; }
-
-    static bool isVariadic() { return true; }
-    static size_t getNumberOfArguments() { return 0; }
-
-    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
-    {
-        return SplitByNonAlphaImpl::checkArguments(func, arguments);
-    }
-
-    static constexpr auto strings_argument_position = 0uz;
-
-    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
-    {
-        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
-        max_splits = extractMaxSplits(arguments, 1);
-    }
-
-    /// Called for each next string.
-    void set(Pos pos_, Pos end_)
-    {
-        pos = pos_;
-        end = end_;
-        splits = 0;
-    }
-
-    /// Get the next token, if any, or return false.
-    bool get(Pos & token_begin, Pos & token_end)
-    {
-        /// Skip garbage
-        while (pos < end && isWhitespaceASCII(*pos))
-            ++pos;
-
-        if (pos == end)
-            return false;
-
-        token_begin = pos;
-
-        if (max_splits)
-        {
-            if (max_substrings_includes_remaining_string)
-            {
-                if (splits == *max_splits - 1)
-                {
-                    token_end = end;
-                    pos = end;
-                    return true;
-                }
-            }
-            else
-                if (splits == *max_splits)
-                    return false;
-        }
-
-        while (pos < end && !isWhitespaceASCII(*pos))
-            ++pos;
-
-        token_end = pos;
-        splits++;
-
-        return true;
-    }
-};
-
-class SplitByCharImpl
-{
-private:
-    Pos pos;
-    Pos end;
-    char separator;
-    std::optional<size_t> max_splits;
-    size_t splits;
-    bool max_substrings_includes_remaining_string;
-
-public:
-    static constexpr auto name = "splitByChar";
-    static String getName() { return name; }
-    static bool isVariadic() { return true; }
-    static size_t getNumberOfArguments() { return 0; }
-
-    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
-    {
-        FunctionArgumentDescriptors mandatory_args{
-            {"separator", &isString<IDataType>, isColumnConst, "const String"},
-            {"s", &isString<IDataType>, nullptr, "String"}
-        };
-
-        FunctionArgumentDescriptors optional_args{
-            {"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
-        };
-
-        validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
-    }
-
-    static constexpr auto strings_argument_position = 1uz;
-
-    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
-    {
-        const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
-
-        if (!col)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
-                "Must be constant string.", arguments[0].column->getName(), getName());
-
-        String sep_str = col->getValue<String>();
-
-        if (sep_str.size() != 1)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", getName());
-
-        separator = sep_str[0];
-
-        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
-        max_splits = extractMaxSplits(arguments, 2);
-    }
-
-    void set(Pos pos_, Pos end_)
-    {
-        pos = pos_;
-        end = end_;
-        splits = 0;
-    }
-
-    bool get(Pos & token_begin, Pos & token_end)
-    {
-        if (!pos)
-            return false;
-
-        token_begin = pos;
-
-        if (max_splits)
-        {
-            if (max_substrings_includes_remaining_string)
-            {
-                if (splits == *max_splits - 1)
-                {
-                    token_end = end;
-                    pos = nullptr;
-                    return true;
-                }
-            }
-            else
-               if (splits == *max_splits)
-                   return false;
-        }
-
-        pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
-        if (pos)
-        {
-            token_end = pos;
-            ++pos;
-            ++splits;
-        }
-        else
-            token_end = end;
-
-        return true;
-    }
-};
-
-
-class SplitByStringImpl
-{
-private:
-    Pos pos;
-    Pos end;
-    String separator;
-    std::optional<size_t> max_splits;
-    size_t splits;
-    bool max_substrings_includes_remaining_string;
-
-public:
-    static constexpr auto name = "splitByString";
-    static String getName() { return name; }
-    static bool isVariadic() { return true; }
-    static size_t getNumberOfArguments() { return 0; }
-
-    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
-    {
-        SplitByCharImpl::checkArguments(func, arguments);
-    }
-
-    static constexpr auto strings_argument_position = 1uz;
-
-    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
-    {
-        const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
-
-        if (!col)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
-                "Must be constant string.", arguments[0].column->getName(), getName());
-
-        separator = col->getValue<String>();
-
-        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
-        max_splits = extractMaxSplits(arguments, 2);
-    }
-
-    /// Called for each next string.
-    void set(Pos pos_, Pos end_)
-    {
-        pos = pos_;
-        end = end_;
-        splits = 0;
-    }
-
-    /// Get the next token, if any, or return false.
-    bool get(Pos & token_begin, Pos & token_end)
-    {
-        if (separator.empty())
-        {
-            if (pos == end)
-                return false;
-
-            token_begin = pos;
-
-            if (max_splits)
-            {
-                if (max_substrings_includes_remaining_string)
-                {
-                    if (splits == *max_splits - 1)
-                    {
-                        token_end = end;
-                        pos = end;
-                        return true;
-                    }
-                }
-                else
-                    if (splits == *max_splits)
-                        return false;
-            }
-
-            pos += 1;
-            token_end = pos;
-            ++splits;
-        }
-        else
-        {
-            if (!pos)
-                return false;
-
-            token_begin = pos;
-
-            if (max_splits)
-            {
-                if (max_substrings_includes_remaining_string)
-                {
-                    if (splits == *max_splits - 1)
-                    {
-                        token_end = end;
-                        pos = nullptr;
-                        return true;
-                    }
-                }
-                else
-                    if (splits == *max_splits)
-                        return false;
-            }
-
-            pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
-            if (pos)
-            {
-                token_end = pos;
-                pos += separator.size();
-                ++splits;
-            }
-            else
-                token_end = end;
-        }
-
-        return true;
-    }
-};
-
-class SplitByRegexpImpl
-{
-private:
-    Regexps::RegexpPtr re;
-    OptimizedRegularExpression::MatchVec matches;
-
-    Pos pos;
-    Pos end;
-
-    std::optional<size_t> max_splits;
-    size_t splits;
-    bool max_substrings_includes_remaining_string;
-
-public:
-    static constexpr auto name = "splitByRegexp";
-    static String getName() { return name; }
-
-    static bool isVariadic() { return true; }
-    static size_t getNumberOfArguments() { return 0; }
-
-    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
-    {
-        SplitByStringImpl::checkArguments(func, arguments);
-    }
-
-    static constexpr auto strings_argument_position = 1uz;
-
-    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
-    {
-        const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
-
-        if (!col)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
-                            "Must be constant string.", arguments[0].column->getName(), getName());
-
-        if (!col->getValue<String>().empty())
-            re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
-
-        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
-        max_splits = extractMaxSplits(arguments, 2);
-    }
-
-    /// Called for each next string.
-    void set(Pos pos_, Pos end_)
-    {
-        pos = pos_;
-        end = end_;
-        splits = 0;
-    }
-
-    /// Get the next token, if any, or return false.
-    bool get(Pos & token_begin, Pos & token_end)
-    {
-        if (!re)
-        {
-            if (pos == end)
-                return false;
-
-            token_begin = pos;
-
-            if (max_splits)
-            {
-                if (max_substrings_includes_remaining_string)
-                {
-                    if (splits == *max_splits - 1)
-                    {
-                        token_end = end;
-                        pos = end;
-                        return true;
-                    }
-                }
-                else
-                    if (splits == *max_splits)
-                        return false;
-            }
-
-            pos += 1;
-            token_end = pos;
-            ++splits;
-        }
-        else
-        {
-            if (!pos || pos > end)
-                return false;
-
-            token_begin = pos;
-
-            if (max_splits)
-            {
-                if (max_substrings_includes_remaining_string)
-                {
-                    if (splits == *max_splits - 1)
-                    {
-                        token_end = end;
-                        pos = nullptr;
-                        return true;
-                    }
-                }
-                else
-                    if (splits == *max_splits)
-                        return false;
-            }
-
-            if (!re->match(pos, end - pos, matches) || !matches[0].length)
-            {
-                token_end = end;
-                pos = end + 1;
-            }
-            else
-            {
-                token_end = pos + matches[0].offset;
-                pos = token_end + matches[0].length;
-                ++splits;
-            }
-        }
-
-        return true;
-    }
-};
-
-class ExtractAllImpl
-{
-private:
-    Regexps::RegexpPtr re;
-    OptimizedRegularExpression::MatchVec matches;
-    size_t capture;
-
-    Pos pos;
-    Pos end;
-public:
-    static constexpr auto name = "extractAll";
-    static String getName() { return name; }
-    static bool isVariadic() { return false; }
-    static size_t getNumberOfArguments() { return 2; }
-
-    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
-    {
-        FunctionArgumentDescriptors mandatory_args{
-            {"haystack", &isString<IDataType>, nullptr, "String"},
-            {"pattern", &isString<IDataType>, isColumnConst, "const String"}
-        };
-
-        validateFunctionArgumentTypes(func, arguments, mandatory_args);
-    }
-
-    static constexpr auto strings_argument_position = 0uz;
-
-    void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
-    {
-        const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
-
-        if (!col)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
-                "Must be constant string.", arguments[1].column->getName(), getName());
-
-        re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
-        capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
-
-        matches.resize(capture + 1);
-    }
-
-    /// Called for each next string.
-    void set(Pos pos_, Pos end_)
-    {
-        pos = pos_;
-        end = end_;
-    }
-
-    /// Get the next token, if any, or return false.
-    bool get(Pos & token_begin, Pos & token_end)
-    {
-        if (!pos || pos > end)
-            return false;
-
-        if (!re->match(pos, end - pos, matches) || !matches[0].length)
-            return false;
-
-        if (matches[capture].offset == std::string::npos)
-        {
-            /// Empty match.
-            token_begin = pos;
-            token_end = pos;
-        }
-        else
-        {
-            token_begin = pos + matches[capture].offset;
-            token_end = token_begin + matches[capture].length;
-        }
-
-        pos += matches[0].offset + matches[0].length;
-
-        return true;
-    }
-};
-
-/// A function that takes a string, and returns an array of substrings created by some generator.
-template <typename Generator>
-class FunctionTokens : public IFunction
-{
-private:
-    bool max_substrings_includes_remaining_string;
-
-public:
-    static constexpr auto name = Generator::name;
-    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
-
-    explicit FunctionTokens<Generator>(ContextPtr context)
-    {
-        const Settings & settings = context->getSettingsRef();
-        max_substrings_includes_remaining_string = settings.splitby_max_substrings_includes_remaining_string;
-    }
-
-    String getName() const override { return name; }
-
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    bool isVariadic() const override { return Generator::isVariadic(); }
-
-    size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        Generator::checkArguments(*this, arguments);
-
-        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
-    {
-        Generator generator;
-        generator.init(arguments, max_substrings_includes_remaining_string);
-
-        const auto & array_argument = arguments[generator.strings_argument_position];
-
-        const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
-        const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
-
-        auto col_res = ColumnArray::create(ColumnString::create());
-
-        ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
-        ColumnString::Chars & res_strings_chars = res_strings.getChars();
-        ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets();
-
-        ColumnArray::Offsets & res_offsets = col_res->getOffsets();
-
-        if (col_str)
-        {
-            const ColumnString::Chars & src_chars = col_str->getChars();
-            const ColumnString::Offsets & src_offsets = col_str->getOffsets();
-
-            res_offsets.reserve(src_offsets.size());
-            res_strings_offsets.reserve(src_offsets.size() * 5);    /// Constant 5 - at random.
-            res_strings_chars.reserve(src_chars.size());
-
-            Pos token_begin = nullptr;
-            Pos token_end = nullptr;
-
-            size_t size = src_offsets.size();
-            ColumnString::Offset current_src_offset = 0;
-            ColumnArray::Offset current_dst_offset = 0;
-            ColumnString::Offset current_dst_strings_offset = 0;
-            for (size_t i = 0; i < size; ++i)
-            {
-                Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
-                current_src_offset = src_offsets[i];
-                Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
-
-                generator.set(pos, end);
-                size_t j = 0;
-                while (generator.get(token_begin, token_end))
-                {
-                    size_t token_size = token_end - token_begin;
-
-                    res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
-                    memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
-                    res_strings_chars[current_dst_strings_offset + token_size] = 0;
-
-                    current_dst_strings_offset += token_size + 1;
-                    res_strings_offsets.push_back(current_dst_strings_offset);
-                    ++j;
-                }
-
-                current_dst_offset += j;
-                res_offsets.push_back(current_dst_offset);
-            }
-
-            return col_res;
-        }
-        else if (col_str_const)
-        {
-            String src = col_str_const->getValue<String>();
-            Array dst;
-
-            generator.set(src.data(), src.data() + src.size());
-            Pos token_begin = nullptr;
-            Pos token_end = nullptr;
-
-            while (generator.get(token_begin, token_end))
-                dst.push_back(String(token_begin, token_end - token_begin));
-
-            return result_type->createColumnConst(col_str_const->size(), dst);
-        }
-        else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
-                    array_argument.column->getName(), array_argument.column->getName(), getName());
-    }
-};
-
-
-/// Joins an array of type serializable to string into one string via a separator.
-class FunctionArrayStringConcat : public IFunction
-{
-private:
-    static void executeInternal(
-        const ColumnString::Chars & src_chars,
-        const ColumnString::Offsets & src_string_offsets,
-        const ColumnArray::Offsets & src_array_offsets,
-        const char * delimiter,
-        const size_t delimiter_size,
-        ColumnString::Chars & dst_chars,
-        ColumnString::Offsets & dst_string_offsets,
-        const char8_t * null_map)
-    {
-        size_t size = src_array_offsets.size();
-
-        if (!size)
-            return;
-
-        /// With a small margin - as if the separator goes after the last string of the array.
-        dst_chars.resize(
-            src_chars.size()
-            + delimiter_size * src_string_offsets.size()    /// Separators after each string...
-            + src_array_offsets.size()                      /// Zero byte after each joined string
-            - src_string_offsets.size());                   /// The former zero byte after each string of the array
-
-        /// There will be as many strings as there were arrays.
-        dst_string_offsets.resize(src_array_offsets.size());
-
-        ColumnArray::Offset current_src_array_offset = 0;
-
-        ColumnString::Offset current_dst_string_offset = 0;
-
-        /// Loop through the array of strings.
-        for (size_t i = 0; i < size; ++i)
-        {
-            bool first_non_null = true;
-            /// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
-            for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
-            {
-                if (null_map && null_map[current_src_array_offset]) [[unlikely]]
-                    continue;
-
-                if (!first_non_null)
-                {
-                    memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
-                    current_dst_string_offset += delimiter_size;
-                }
-                first_non_null = false;
-
-                const auto current_src_string_offset = current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] : 0;
-                size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
-
-                memcpySmallAllowReadWriteOverflow15(
-                    &dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
-
-                current_dst_string_offset += bytes_to_copy;
-            }
-
-            dst_chars[current_dst_string_offset] = 0;
-            ++current_dst_string_offset;
-
-            dst_string_offsets[i] = current_dst_string_offset;
-        }
-
-        dst_chars.resize(dst_string_offsets.back());
-    }
-
-    static void executeInternal(
-        const ColumnString & col_string,
-        const ColumnArray & col_arr,
-        const String & delimiter,
-        ColumnString & col_res,
-        const char8_t * null_map = nullptr)
-    {
-        executeInternal(
-            col_string.getChars(),
-            col_string.getOffsets(),
-            col_arr.getOffsets(),
-            delimiter.data(),
-            delimiter.size(),
-            col_res.getChars(),
-            col_res.getOffsets(),
-            null_map);
-    }
-
-    static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type)
-    {
-        if (isString(nested_type))
-        {
-            return col_arr.getDataPtr();
-        }
-        else if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData());
-                 col_nullable && isString(col_nullable->getNestedColumn().getDataType()))
-        {
-            return col_nullable->getNestedColumnPtr();
-        }
-        else
-        {
-            return castColumn({col_arr.getDataPtr(), nested_type, "tmp"}, std::make_shared<DataTypeString>());
-        }
-    }
-
-public:
-    static constexpr auto name = "arrayStringConcat";
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); }
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    bool isVariadic() const override { return true; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-    size_t getNumberOfArguments() const override { return 0; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
-    {
-        String delimiter;
-        if (arguments.size() == 2)
-        {
-            const ColumnConst * col_delim = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
-            if (!col_delim)
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant string.", getName());
-
-            delimiter = col_delim->getValue<String>();
-        }
-
-        const auto & nested_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
-        if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
-            col_const_arr && isString(nested_type))
-        {
-            Array src_arr = col_const_arr->getValue<Array>();
-            String dst_str;
-            bool first_non_null = true;
-            for (size_t i = 0, size = src_arr.size(); i < size; ++i)
-            {
-                if (src_arr[i].isNull())
-                    continue;
-                if (!first_non_null)
-                    dst_str += delimiter;
-                first_non_null = false;
-                dst_str += src_arr[i].get<const String &>();
-            }
-
-            return result_type->createColumnConst(col_const_arr->size(), dst_str);
-        }
-
-        ColumnPtr src_column = arguments[0].column->convertToFullColumnIfConst();
-        const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*src_column.get());
-
-        ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type);
-        const ColumnString & col_string = assert_cast<const ColumnString &>(*str_subcolumn.get());
-
-        auto col_res = ColumnString::create();
-        if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData()))
-            executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data());
-        else
-            executeInternal(col_string, col_arr, delimiter, *col_res);
-        return col_res;
-    }
-};
-
-
-using FunctionSplitByAlpha = FunctionTokens<SplitByAlphaImpl>;
-using FunctionSplitByNonAlpha = FunctionTokens<SplitByNonAlphaImpl>;
-using FunctionSplitByWhitespace = FunctionTokens<SplitByWhitespaceImpl>;
-using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
-using FunctionSplitByString = FunctionTokens<SplitByStringImpl>;
-using FunctionSplitByRegexp = FunctionTokens<SplitByRegexpImpl>;
-using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
-
-}
diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp
index 96b64d3182b..25c6c9ef40b 100644
--- a/src/Functions/URL/URLHierarchy.cpp
+++ b/src/Functions/URL/URLHierarchy.cpp
@@ -1,9 +1,15 @@
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsStringArray.h>
+#include <Functions/FunctionTokens.h>
+
 
 namespace DB
 {
 
+namespace
+{
+
+using Pos = const char *;
+
 class URLPathHierarchyImpl
 {
 private:
@@ -14,7 +20,6 @@ private:
 
 public:
     static constexpr auto name = "URLPathHierarchy";
-    static String getName() { return name; }
 
     static bool isVariadic() { return false; }
     static size_t getNumberOfArguments() { return 1; }
@@ -95,9 +100,10 @@ public:
 };
 
 
-struct NameURLPathHierarchy { static constexpr auto name = "URLPathHierarchy"; };
 using FunctionURLPathHierarchy = FunctionTokens<URLPathHierarchyImpl>;
 
+}
+
 REGISTER_FUNCTION(URLPathHierarchy)
 {
     factory.registerFunction<FunctionURLPathHierarchy>();
diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp
index 7fd6601d780..9a60d4cf989 100644
--- a/src/Functions/URL/URLPathHierarchy.cpp
+++ b/src/Functions/URL/URLPathHierarchy.cpp
@@ -1,9 +1,14 @@
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsStringArray.h>
+#include <Functions/FunctionTokens.h>
 
 namespace DB
 {
 
+namespace
+{
+
+using Pos = const char *;
+
 class URLHierarchyImpl
 {
 private:
@@ -13,7 +18,6 @@ private:
 
 public:
     static constexpr auto name = "URLHierarchy";
-    static String getName() { return name; }
 
     static bool isVariadic() { return false; }
     static size_t getNumberOfArguments() { return 1; }
@@ -97,9 +101,10 @@ public:
 };
 
 
-struct NameURLHierarchy { static constexpr auto name = "URLHierarchy"; };
 using FunctionURLHierarchy = FunctionTokens<URLHierarchyImpl>;
 
+}
+
 REGISTER_FUNCTION(URLHierarchy)
 {
     factory.registerFunction<FunctionURLHierarchy>();
diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp
index b792d9140d6..08da148b43e 100644
--- a/src/Functions/URL/extractURLParameterNames.cpp
+++ b/src/Functions/URL/extractURLParameterNames.cpp
@@ -1,9 +1,14 @@
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsStringArray.h>
+#include <Functions/FunctionTokens.h>
 
 namespace DB
 {
 
+namespace
+{
+
+using Pos = const char *;
+
 class ExtractURLParameterNamesImpl
 {
 private:
@@ -13,7 +18,6 @@ private:
 
 public:
     static constexpr auto name = "extractURLParameterNames";
-    static String getName() { return name; }
 
     static bool isVariadic() { return false; }
     static size_t getNumberOfArguments() { return 1; }
@@ -80,9 +84,10 @@ public:
     }
 };
 
-struct NameExtractURLParameterNames { static constexpr auto name = "extractURLParameterNames"; };
 using FunctionExtractURLParameterNames = FunctionTokens<ExtractURLParameterNamesImpl>;
 
+}
+
 REGISTER_FUNCTION(ExtractURLParameterNames)
 {
     factory.registerFunction<FunctionExtractURLParameterNames>();
diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp
index e1243d8fbcd..939622dd9d1 100644
--- a/src/Functions/URL/extractURLParameters.cpp
+++ b/src/Functions/URL/extractURLParameters.cpp
@@ -1,9 +1,15 @@
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsStringArray.h>
+#include <Functions/FunctionTokens.h>
+
 
 namespace DB
 {
 
+namespace
+{
+
+using Pos = const char *;
+
 class ExtractURLParametersImpl
 {
 private:
@@ -13,7 +19,6 @@ private:
 
 public:
     static constexpr auto name = "extractURLParameters";
-    static String getName() { return name; }
 
     static bool isVariadic() { return false; }
     static size_t getNumberOfArguments() { return 1; }
@@ -88,9 +93,10 @@ public:
     }
 };
 
-struct NameExtractURLParameters { static constexpr auto name = "extractURLParameters"; };
 using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
 
+}
+
 REGISTER_FUNCTION(ExtractURLParameters)
 {
     factory.registerFunction<FunctionExtractURLParameters>();
diff --git a/src/Functions/alphaTokens.cpp b/src/Functions/alphaTokens.cpp
new file mode 100644
index 00000000000..9b20509eee0
--- /dev/null
+++ b/src/Functions/alphaTokens.cpp
@@ -0,0 +1,111 @@
+
+#include <Functions/FunctionTokens.h>
+#include <Functions/FunctionFactory.h>
+#include <Common/StringUtils/StringUtils.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+
+/** Functions that split strings into an array of strings or vice versa.
+  *
+  * alphaTokens(s[, max_substrings])            - select from the string subsequence `[a-zA-Z]+`.
+  */
+namespace
+{
+
+using Pos = const char *;
+
+class SplitByAlphaImpl
+{
+private:
+    Pos pos;
+    Pos end;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    bool max_substrings_includes_remaining_string;
+
+public:
+    static constexpr auto name = "alphaTokens";
+
+    static bool isVariadic() { return true; }
+
+    static size_t getNumberOfArguments() { return 0; }
+
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
+    {
+        checkArgumentsWithOptionalMaxSubstrings(func, arguments);
+    }
+
+    static constexpr auto strings_argument_position = 0uz;
+
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
+    {
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 1);
+    }
+
+    /// Called for each next string.
+    void set(Pos pos_, Pos end_)
+    {
+        pos = pos_;
+        end = end_;
+        splits = 0;
+    }
+
+    /// Get the next token, if any, or return false.
+    bool get(Pos & token_begin, Pos & token_end)
+    {
+        /// Skip garbage
+        while (pos < end && !isAlphaASCII(*pos))
+            ++pos;
+
+        if (pos == end)
+            return false;
+
+        token_begin = pos;
+
+        if (max_splits)
+        {
+            if (max_substrings_includes_remaining_string)
+            {
+                if (splits == *max_splits - 1)
+                {
+                    token_end = end;
+                    pos = end;
+                    return true;
+                }
+            }
+            else
+                if (splits == *max_splits)
+                    return false;
+        }
+
+        while (pos < end && isAlphaASCII(*pos))
+            ++pos;
+
+        token_end = pos;
+        ++splits;
+
+        return true;
+    }
+};
+
+using FunctionSplitByAlpha = FunctionTokens<SplitByAlphaImpl>;
+
+}
+
+REGISTER_FUNCTION(SplitByAlpha)
+{
+    factory.registerFunction<FunctionSplitByAlpha>();
+    factory.registerAlias("splitByAlpha", FunctionSplitByAlpha::name);
+}
+
+}
diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp
new file mode 100644
index 00000000000..f5eb3fd49cc
--- /dev/null
+++ b/src/Functions/arrayStringConcat.cpp
@@ -0,0 +1,221 @@
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Functions/Regexps.h>
+#include <Functions/FunctionFactory.h>
+#include <Interpreters/Context.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/castColumn.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/assert_cast.h>
+
+
+/** arrayStringConcat(arr)
+  * arrayStringConcat(arr, delimiter)
+  * - join an array of strings into one string via a separator.
+  */
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+}
+
+namespace
+{
+
+/// Joins an array of type serializable to string into one string via a separator.
+class FunctionArrayStringConcat : public IFunction
+{
+private:
+    static void executeInternal(
+        const ColumnString::Chars & src_chars,
+        const ColumnString::Offsets & src_string_offsets,
+        const ColumnArray::Offsets & src_array_offsets,
+        const char * delimiter,
+        const size_t delimiter_size,
+        ColumnString::Chars & dst_chars,
+        ColumnString::Offsets & dst_string_offsets,
+        const char8_t * null_map)
+    {
+        size_t size = src_array_offsets.size();
+
+        if (!size)
+            return;
+
+        /// With a small margin - as if the separator goes after the last string of the array.
+        dst_chars.resize(
+            src_chars.size()
+            + delimiter_size * src_string_offsets.size()    /// Separators after each string...
+            + src_array_offsets.size()                      /// Zero byte after each joined string
+            - src_string_offsets.size());                   /// The former zero byte after each string of the array
+
+        /// There will be as many strings as there were arrays.
+        dst_string_offsets.resize(src_array_offsets.size());
+
+        ColumnArray::Offset current_src_array_offset = 0;
+
+        ColumnString::Offset current_dst_string_offset = 0;
+
+        /// Loop through the array of strings.
+        for (size_t i = 0; i < size; ++i)
+        {
+            bool first_non_null = true;
+            /// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
+            for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
+            {
+                if (null_map && null_map[current_src_array_offset]) [[unlikely]]
+                    continue;
+
+                if (!first_non_null)
+                {
+                    memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
+                    current_dst_string_offset += delimiter_size;
+                }
+                first_non_null = false;
+
+                const auto current_src_string_offset = current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] : 0;
+                size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
+
+                memcpySmallAllowReadWriteOverflow15(
+                    &dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
+
+                current_dst_string_offset += bytes_to_copy;
+            }
+
+            dst_chars[current_dst_string_offset] = 0;
+            ++current_dst_string_offset;
+
+            dst_string_offsets[i] = current_dst_string_offset;
+        }
+
+        dst_chars.resize(dst_string_offsets.back());
+    }
+
+    static void executeInternal(
+        const ColumnString & col_string,
+        const ColumnArray & col_arr,
+        const String & delimiter,
+        ColumnString & col_res,
+        const char8_t * null_map = nullptr)
+    {
+        executeInternal(
+            col_string.getChars(),
+            col_string.getOffsets(),
+            col_arr.getOffsets(),
+            delimiter.data(),
+            delimiter.size(),
+            col_res.getChars(),
+            col_res.getOffsets(),
+            null_map);
+    }
+
+    static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type)
+    {
+        if (isString(nested_type))
+        {
+            return col_arr.getDataPtr();
+        }
+        else if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData());
+                 col_nullable && isString(col_nullable->getNestedColumn().getDataType()))
+        {
+            return col_nullable->getNestedColumnPtr();
+        }
+        else
+        {
+            return castColumn({col_arr.getDataPtr(), nested_type, "tmp"}, std::make_shared<DataTypeString>());
+        }
+    }
+
+public:
+    static constexpr auto name = "arrayStringConcat";
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); }
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    bool isVariadic() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+    size_t getNumberOfArguments() const override { return 0; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors mandatory_args
+        {
+            {"arr", &isArray<IDataType>, nullptr, "Array"},
+        };
+
+        FunctionArgumentDescriptors optional_args
+        {
+            {"separator", &isString<IDataType>, isColumnConst, "const String"},
+        };
+
+        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
+
+        return std::make_shared<DataTypeString>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
+    {
+        String delimiter;
+        if (arguments.size() == 2)
+        {
+            const ColumnConst * col_delim = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
+            if (!col_delim)
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant string.", getName());
+
+            delimiter = col_delim->getValue<String>();
+        }
+
+        const auto & nested_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
+        if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
+            col_const_arr && isString(nested_type))
+        {
+            Array src_arr = col_const_arr->getValue<Array>();
+            String dst_str;
+            bool first_non_null = true;
+            for (size_t i = 0, size = src_arr.size(); i < size; ++i)
+            {
+                if (src_arr[i].isNull())
+                    continue;
+                if (!first_non_null)
+                    dst_str += delimiter;
+                first_non_null = false;
+                dst_str += src_arr[i].get<const String &>();
+            }
+
+            return result_type->createColumnConst(col_const_arr->size(), dst_str);
+        }
+
+        ColumnPtr src_column = arguments[0].column->convertToFullColumnIfConst();
+        const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*src_column.get());
+
+        ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type);
+        const ColumnString & col_string = assert_cast<const ColumnString &>(*str_subcolumn.get());
+
+        auto col_res = ColumnString::create();
+        if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData()))
+            executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data());
+        else
+            executeInternal(col_string, col_arr, delimiter, *col_res);
+        return col_res;
+    }
+};
+
+}
+
+REGISTER_FUNCTION(ArrayStringConcat)
+{
+    factory.registerFunction<FunctionArrayStringConcat>();
+}
+
+}
diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp
new file mode 100644
index 00000000000..d73dbd7d4a4
--- /dev/null
+++ b/src/Functions/extractAll.cpp
@@ -0,0 +1,123 @@
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionTokens.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/Regexps.h>
+#include <Interpreters/Context.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/assert_cast.h>
+#include <Common/typeid_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+
+/** Functions that split strings into an array of strings or vice versa.
+  *
+  * extractAll(s, regexp)     - select from the string the subsequences corresponding to the regexp.
+  * - first subpattern, if regexp has subpattern;
+  * - zero subpattern (the match part, otherwise);
+  * - otherwise, an empty array
+  */
+namespace
+{
+
+using Pos = const char *;
+
+class ExtractAllImpl
+{
+private:
+    Regexps::RegexpPtr re;
+    OptimizedRegularExpression::MatchVec matches;
+    size_t capture;
+
+    Pos pos;
+    Pos end;
+public:
+    static constexpr auto name = "extractAll";
+    static String getName() { return name; }
+    static bool isVariadic() { return false; }
+    static size_t getNumberOfArguments() { return 2; }
+
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
+    {
+        FunctionArgumentDescriptors mandatory_args{
+            {"haystack", &isString<IDataType>, nullptr, "String"},
+            {"pattern", &isString<IDataType>, isColumnConst, "const String"}
+        };
+
+        validateFunctionArgumentTypes(func, arguments, mandatory_args);
+    }
+
+    static constexpr auto strings_argument_position = 0uz;
+
+    void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
+    {
+        const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
+
+        if (!col)
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
+                "Must be constant string.", arguments[1].column->getName(), getName());
+
+        re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
+        capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
+
+        matches.resize(capture + 1);
+    }
+
+    /// Called for each next string.
+    void set(Pos pos_, Pos end_)
+    {
+        pos = pos_;
+        end = end_;
+    }
+
+    /// Get the next token, if any, or return false.
+    bool get(Pos & token_begin, Pos & token_end)
+    {
+        if (!pos || pos > end)
+            return false;
+
+        if (!re->match(pos, end - pos, matches) || !matches[0].length)
+            return false;
+
+        if (matches[capture].offset == std::string::npos)
+        {
+            /// Empty match.
+            token_begin = pos;
+            token_end = pos;
+        }
+        else
+        {
+            token_begin = pos + matches[capture].offset;
+            token_end = token_begin + matches[capture].length;
+        }
+
+        pos += matches[0].offset + matches[0].length;
+
+        return true;
+    }
+};
+
+using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
+
+}
+
+REGISTER_FUNCTION(ExtractAll)
+{
+    factory.registerFunction<FunctionExtractAll>();
+}
+
+}
diff --git a/src/Functions/splitByChar.cpp b/src/Functions/splitByChar.cpp
new file mode 100644
index 00000000000..d537039dc23
--- /dev/null
+++ b/src/Functions/splitByChar.cpp
@@ -0,0 +1,122 @@
+#include <Columns/ColumnConst.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionTokens.h>
+#include <Functions/FunctionFactory.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/assert_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+
+/** Functions that split strings into an array of strings or vice versa.
+  *
+  * splitByChar(sep, s[, max_substrings])
+  */
+namespace
+{
+
+using Pos = const char *;
+
+class SplitByCharImpl
+{
+private:
+    Pos pos;
+    Pos end;
+    char separator;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    bool max_substrings_includes_remaining_string;
+
+public:
+    static constexpr auto name = "splitByChar";
+    static bool isVariadic() { return true; }
+    static size_t getNumberOfArguments() { return 0; }
+
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
+    {
+        checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments);
+    }
+
+    static constexpr auto strings_argument_position = 1uz;
+
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
+    {
+        const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
+
+        if (!col)
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
+                "Must be constant string.", arguments[0].column->getName(), name);
+
+        String sep_str = col->getValue<String>();
+
+        if (sep_str.size() != 1)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", name);
+
+        separator = sep_str[0];
+
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 2);
+    }
+
+    void set(Pos pos_, Pos end_)
+    {
+        pos = pos_;
+        end = end_;
+        splits = 0;
+    }
+
+    bool get(Pos & token_begin, Pos & token_end)
+    {
+        if (!pos)
+            return false;
+
+        token_begin = pos;
+
+        if (max_splits)
+        {
+            if (max_substrings_includes_remaining_string)
+            {
+                if (splits == *max_splits - 1)
+                {
+                    token_end = end;
+                    pos = nullptr;
+                    return true;
+                }
+            }
+            else
+               if (splits == *max_splits)
+                   return false;
+        }
+
+        pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
+        if (pos)
+        {
+            token_end = pos;
+            ++pos;
+            ++splits;
+        }
+        else
+            token_end = end;
+
+        return true;
+    }
+};
+
+using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
+
+}
+
+REGISTER_FUNCTION(SplitByChar)
+{
+    factory.registerFunction<FunctionSplitByChar>();
+}
+
+}
diff --git a/src/Functions/splitByNonAlpha.cpp b/src/Functions/splitByNonAlpha.cpp
new file mode 100644
index 00000000000..67d3d100420
--- /dev/null
+++ b/src/Functions/splitByNonAlpha.cpp
@@ -0,0 +1,120 @@
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionTokens.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/Regexps.h>
+#include <Interpreters/Context.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/assert_cast.h>
+#include <Common/typeid_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+
+/** Functions that split strings into an array of strings or vice versa.
+  *
+  * splitByNonAlpha(s[, max_substrings])        - split the string by whitespace and punctuation characters
+  */
+namespace
+{
+
+using Pos = const char *;
+
+class SplitByNonAlphaImpl
+{
+private:
+    Pos pos;
+    Pos end;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    bool max_substrings_includes_remaining_string;
+
+public:
+    /// Get the name of the function.
+    static constexpr auto name = "splitByNonAlpha";
+
+    static bool isVariadic() { return true; }
+    static size_t getNumberOfArguments() { return 0; }
+
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
+    {
+        checkArgumentsWithOptionalMaxSubstrings(func, arguments);
+    }
+
+    static constexpr auto strings_argument_position = 0uz;
+
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
+    {
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 1);
+    }
+
+    /// Called for each next string.
+    void set(Pos pos_, Pos end_)
+    {
+        pos = pos_;
+        end = end_;
+        splits = 0;
+    }
+
+    /// Get the next token, if any, or return false.
+    bool get(Pos & token_begin, Pos & token_end)
+    {
+        /// Skip garbage
+        while (pos < end && (isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
+            ++pos;
+
+        if (pos == end)
+            return false;
+
+        token_begin = pos;
+
+        if (max_splits)
+        {
+            if (max_substrings_includes_remaining_string)
+            {
+                if (splits == *max_splits - 1)
+                {
+                    token_end = end;
+                    pos = end;
+                    return true;
+                }
+            }
+            else
+                if (splits == *max_splits)
+                    return false;
+        }
+
+        while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
+            ++pos;
+
+        token_end = pos;
+        splits++;
+
+        return true;
+    }
+};
+
+using FunctionSplitByNonAlpha = FunctionTokens<SplitByNonAlphaImpl>;
+
+}
+
+REGISTER_FUNCTION(SplitByNonAlpha)
+{
+    factory.registerFunction<FunctionSplitByNonAlpha>();
+}
+
+}
diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp
new file mode 100644
index 00000000000..cfffbfbb856
--- /dev/null
+++ b/src/Functions/splitByRegexp.cpp
@@ -0,0 +1,157 @@
+#include <Columns/ColumnConst.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionTokens.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/Regexps.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/assert_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+
+/** Functions that split strings into an array of strings or vice versa.
+  *
+  * splitByRegexp(regexp, s[, max_substrings])
+  */
+namespace
+{
+
+using Pos = const char *;
+
+class SplitByRegexpImpl
+{
+private:
+    Regexps::RegexpPtr re;
+    OptimizedRegularExpression::MatchVec matches;
+
+    Pos pos;
+    Pos end;
+
+    std::optional<size_t> max_splits;
+    size_t splits;
+    bool max_substrings_includes_remaining_string;
+
+public:
+    static constexpr auto name = "splitByRegexp";
+
+    static bool isVariadic() { return true; }
+    static size_t getNumberOfArguments() { return 0; }
+
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
+    {
+        checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments);
+    }
+
+    static constexpr auto strings_argument_position = 1uz;
+
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
+    {
+        const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
+
+        if (!col)
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
+                            "Must be constant string.", arguments[0].column->getName(), name);
+
+        if (!col->getValue<String>().empty())
+            re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
+
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 2);
+    }
+
+    /// Called for each next string.
+    void set(Pos pos_, Pos end_)
+    {
+        pos = pos_;
+        end = end_;
+        splits = 0;
+    }
+
+    /// Get the next token, if any, or return false.
+    bool get(Pos & token_begin, Pos & token_end)
+    {
+        if (!re)
+        {
+            if (pos == end)
+                return false;
+
+            token_begin = pos;
+
+            if (max_splits)
+            {
+                if (max_substrings_includes_remaining_string)
+                {
+                    if (splits == *max_splits - 1)
+                    {
+                        token_end = end;
+                        pos = end;
+                        return true;
+                    }
+                }
+                else
+                    if (splits == *max_splits)
+                        return false;
+            }
+
+            pos += 1;
+            token_end = pos;
+            ++splits;
+        }
+        else
+        {
+            if (!pos || pos > end)
+                return false;
+
+            token_begin = pos;
+
+            if (max_splits)
+            {
+                if (max_substrings_includes_remaining_string)
+                {
+                    if (splits == *max_splits - 1)
+                    {
+                        token_end = end;
+                        pos = nullptr;
+                        return true;
+                    }
+                }
+                else
+                    if (splits == *max_splits)
+                        return false;
+            }
+
+            if (!re->match(pos, end - pos, matches) || !matches[0].length)
+            {
+                token_end = end;
+                pos = end + 1;
+            }
+            else
+            {
+                token_end = pos + matches[0].offset;
+                pos = token_end + matches[0].length;
+                ++splits;
+            }
+        }
+
+        return true;
+    }
+};
+
+using FunctionSplitByRegexp = FunctionTokens<SplitByRegexpImpl>;
+
+}
+
+REGISTER_FUNCTION(SplitByRegexp)
+{
+    factory.registerFunction<FunctionSplitByRegexp>();
+}
+
+}
diff --git a/src/Functions/splitByString.cpp b/src/Functions/splitByString.cpp
new file mode 100644
index 00000000000..6c9ba3fd090
--- /dev/null
+++ b/src/Functions/splitByString.cpp
@@ -0,0 +1,149 @@
+#include <Columns/ColumnConst.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionTokens.h>
+#include <Functions/FunctionFactory.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/assert_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+
+/** Functions that split strings into an array of strings or vice versa.
+  *
+  * splitByString(sep, s[, max_substrings])
+  */
+namespace
+{
+
+using Pos = const char *;
+
+class SplitByStringImpl
+{
+private:
+    Pos pos;
+    Pos end;
+    String separator;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    bool max_substrings_includes_remaining_string;
+
+public:
+    static constexpr auto name = "splitByString";
+    static bool isVariadic() { return true; }
+    static size_t getNumberOfArguments() { return 0; }
+
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
+    {
+        checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments);
+    }
+
+    static constexpr auto strings_argument_position = 1uz;
+
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
+    {
+        const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
+
+        if (!col)
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
+                "Must be constant string.", arguments[0].column->getName(), name);
+
+        separator = col->getValue<String>();
+
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 2);
+    }
+
+    /// Called for each next string.
+    void set(Pos pos_, Pos end_)
+    {
+        pos = pos_;
+        end = end_;
+        splits = 0;
+    }
+
+    /// Get the next token, if any, or return false.
+    bool get(Pos & token_begin, Pos & token_end)
+    {
+        if (separator.empty())
+        {
+            if (pos == end)
+                return false;
+
+            token_begin = pos;
+
+            if (max_splits)
+            {
+                if (max_substrings_includes_remaining_string)
+                {
+                    if (splits == *max_splits - 1)
+                    {
+                        token_end = end;
+                        pos = end;
+                        return true;
+                    }
+                }
+                else
+                    if (splits == *max_splits)
+                        return false;
+            }
+
+            pos += 1;
+            token_end = pos;
+            ++splits;
+        }
+        else
+        {
+            if (!pos)
+                return false;
+
+            token_begin = pos;
+
+            if (max_splits)
+            {
+                if (max_substrings_includes_remaining_string)
+                {
+                    if (splits == *max_splits - 1)
+                    {
+                        token_end = end;
+                        pos = nullptr;
+                        return true;
+                    }
+                }
+                else
+                    if (splits == *max_splits)
+                        return false;
+            }
+
+            pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
+            if (pos)
+            {
+                token_end = pos;
+                pos += separator.size();
+                ++splits;
+            }
+            else
+                token_end = end;
+        }
+
+        return true;
+    }
+};
+
+using FunctionSplitByString = FunctionTokens<SplitByStringImpl>;
+
+}
+
+REGISTER_FUNCTION(SplitByString)
+{
+    factory.registerFunction<FunctionSplitByString>();
+}
+
+}
diff --git a/src/Functions/splitByWhitespace.cpp b/src/Functions/splitByWhitespace.cpp
new file mode 100644
index 00000000000..5feafb3c11d
--- /dev/null
+++ b/src/Functions/splitByWhitespace.cpp
@@ -0,0 +1,108 @@
+#include <Functions/FunctionTokens.h>
+#include <Functions/FunctionFactory.h>
+#include <Common/StringUtils/StringUtils.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+
+/** Functions that split strings into an array of strings or vice versa.
+  *
+  * splitByWhitespace(s[, max_substrings])      - split the string by whitespace characters
+  */
+namespace
+{
+
+using Pos = const char *;
+
+class SplitByWhitespaceImpl
+{
+private:
+    Pos pos;
+    Pos end;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    bool max_substrings_includes_remaining_string;
+
+public:
+    static constexpr auto name = "splitByWhitespace";
+
+    static bool isVariadic() { return true; }
+    static size_t getNumberOfArguments() { return 0; }
+
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
+    {
+        checkArgumentsWithOptionalMaxSubstrings(func, arguments);
+    }
+
+    static constexpr auto strings_argument_position = 0uz;
+
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
+    {
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 1);
+    }
+
+    /// Called for each next string.
+    void set(Pos pos_, Pos end_)
+    {
+        pos = pos_;
+        end = end_;
+        splits = 0;
+    }
+
+    /// Get the next token, if any, or return false.
+    bool get(Pos & token_begin, Pos & token_end)
+    {
+        /// Skip garbage
+        while (pos < end && isWhitespaceASCII(*pos))
+            ++pos;
+
+        if (pos == end)
+            return false;
+
+        token_begin = pos;
+
+        if (max_splits)
+        {
+            if (max_substrings_includes_remaining_string)
+            {
+                if (splits == *max_splits - 1)
+                {
+                    token_end = end;
+                    pos = end;
+                    return true;
+                }
+            }
+            else
+                if (splits == *max_splits)
+                    return false;
+        }
+
+        while (pos < end && !isWhitespaceASCII(*pos))
+            ++pos;
+
+        token_end = pos;
+        splits++;
+
+        return true;
+    }
+};
+
+using FunctionSplitByWhitespace = FunctionTokens<SplitByWhitespaceImpl>;
+
+}
+
+REGISTER_FUNCTION(SplitByWhitespace)
+{
+    factory.registerFunction<FunctionSplitByWhitespace>();
+}
+
+}

From b5a79b1bdadeb0695c02e8cf6e3999e1e683f941 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Sat, 11 Nov 2023 02:20:58 +0000
Subject: [PATCH 0319/1097] less aggressive reschedule when unavailable
 connection exists, to avoid repeated interrupting other normal routines

---
 src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
index dea05ce19d8..0a7de63942a 100644
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
@@ -36,6 +36,7 @@ namespace ErrorCodes
 
 static const auto suffix = ".removed";
 static const auto cleaner_reschedule_ms = 60000;
+static const auto reschedule_error_multiplier = 10;
 
 DatabasePostgreSQL::DatabasePostgreSQL(
         ContextPtr context_,
@@ -332,7 +333,13 @@ void DatabasePostgreSQL::removeOutdatedTables()
     catch (...)
     {
         tryLogCurrentException(__PRETTY_FUNCTION__);
-        cleaner_task->scheduleAfter(cleaner_reschedule_ms);
+
+        /// Avoid repeated interrupting other normal routines (they acquire locks!) 
+        /// for the case of unavailable connection, since it is possible to be 
+        /// unsuccessful again, and the unsuccessful conn is very time-consuming: 
+        /// connection period is exclusive and timeout is at least 2 seconds for 
+        /// PostgreSQL.
+        cleaner_task->scheduleAfter(reschedule_error_multiplier * cleaner_reschedule_ms);
         return;
     }
 

From 2efe26eee9e8df0a976140ea89d8e1994e6dd97e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 03:38:46 +0100
Subject: [PATCH 0320/1097] Fix error and remove garbage

---
 src/Functions/appendTrailingCharIfAbsent.cpp |  5 +--
 src/Functions/arrayStringConcat.cpp          | 47 ++++++--------------
 2 files changed, 16 insertions(+), 36 deletions(-)

diff --git a/src/Functions/appendTrailingCharIfAbsent.cpp b/src/Functions/appendTrailingCharIfAbsent.cpp
index 62c0bbd4598..7ff35e599be 100644
--- a/src/Functions/appendTrailingCharIfAbsent.cpp
+++ b/src/Functions/appendTrailingCharIfAbsent.cpp
@@ -4,7 +4,6 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
-#include <base/range.h>
 
 
 namespace DB
@@ -46,10 +45,10 @@ private:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         if (!isString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the first argument of function {}", arguments[0]->getName(), getName());
 
         if (!isString(arguments[1]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[1]->getName(), getName());
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the second argument of function {}", arguments[1]->getName(), getName());
 
         return std::make_shared<DataTypeString>();
     }
diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp
index f5eb3fd49cc..0194cc4871a 100644
--- a/src/Functions/arrayStringConcat.cpp
+++ b/src/Functions/arrayStringConcat.cpp
@@ -119,19 +119,16 @@ private:
 
     static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type)
     {
-        if (isString(nested_type))
+        DataTypePtr type = nested_type;
+        ColumnPtr column = col_arr.getDataPtr();
+
+        if (type->isNullable())
         {
-            return col_arr.getDataPtr();
-        }
-        else if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData());
-                 col_nullable && isString(col_nullable->getNestedColumn().getDataType()))
-        {
-            return col_nullable->getNestedColumnPtr();
-        }
-        else
-        {
-            return castColumn({col_arr.getDataPtr(), nested_type, "tmp"}, std::make_shared<DataTypeString>());
+            type = removeNullable(type);
+            column = assert_cast<const ColumnNullable &>(*column).getNestedColumnPtr();
         }
+
+        return castColumn({column, type, "tmp"}, std::make_shared<DataTypeString>());
     }
 
 public:
@@ -147,6 +144,9 @@ public:
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
 
+    bool useDefaultImplementationForConstants() const override { return true; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
         FunctionArgumentDescriptors mandatory_args
@@ -164,7 +164,7 @@ public:
         return std::make_shared<DataTypeString>();
     }
 
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
     {
         String delimiter;
         if (arguments.size() == 2)
@@ -177,27 +177,7 @@ public:
         }
 
         const auto & nested_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
-        if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
-            col_const_arr && isString(nested_type))
-        {
-            Array src_arr = col_const_arr->getValue<Array>();
-            String dst_str;
-            bool first_non_null = true;
-            for (size_t i = 0, size = src_arr.size(); i < size; ++i)
-            {
-                if (src_arr[i].isNull())
-                    continue;
-                if (!first_non_null)
-                    dst_str += delimiter;
-                first_non_null = false;
-                dst_str += src_arr[i].get<const String &>();
-            }
-
-            return result_type->createColumnConst(col_const_arr->size(), dst_str);
-        }
-
-        ColumnPtr src_column = arguments[0].column->convertToFullColumnIfConst();
-        const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*src_column.get());
+        const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*arguments[0].column);
 
         ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type);
         const ColumnString & col_string = assert_cast<const ColumnString &>(*str_subcolumn.get());
@@ -207,6 +187,7 @@ public:
             executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data());
         else
             executeInternal(col_string, col_arr, delimiter, *col_res);
+
         return col_res;
     }
 };

From e5aad4a6fd308e49c16d36a4bb04462cc2574bb8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 05:43:58 +0300
Subject: [PATCH 0321/1097] Revert "Add /keeper/availability-zone node to allow
 server load balancing within AZ."

---
 src/Common/ErrorCodes.cpp                     |   1 -
 src/Coordination/KeeperConstants.h            |   1 -
 src/Coordination/KeeperContext.cpp            |  10 +-
 src/Coordination/KeeperContext.h              |   3 +-
 src/Coordination/KeeperDispatcher.cpp         |  12 +-
 src/Coordination/KeeperStorage.cpp            |   3 +-
 src/IO/S3/Credentials.cpp                     | 155 ++++--------------
 src/IO/S3/Credentials.h                       |  23 +--
 tests/integration/helpers/keeper_config1.xml  |   1 -
 tests/integration/helpers/keeper_config2.xml  |   1 -
 .../test_keeper_availability_zone/__init__.py |   0
 .../configs/keeper_config.xml                 |   2 -
 .../test_keeper_availability_zone/test.py     |  31 ----
 .../test_keeper_four_word_command/test.py     |   8 +-
 14 files changed, 39 insertions(+), 212 deletions(-)
 delete mode 100644 tests/integration/test_keeper_availability_zone/__init__.py
 delete mode 100644 tests/integration/test_keeper_availability_zone/configs/keeper_config.xml
 delete mode 100644 tests/integration/test_keeper_availability_zone/test.py

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 8e0d7fabf6b..28f8e6c6021 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -586,7 +586,6 @@
     M(704, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
     M(705, TABLE_NOT_EMPTY) \
     M(706, LIBSSH_ERROR) \
-    M(707, GCP_ERROR) \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
     M(1001, STD_EXCEPTION) \
diff --git a/src/Coordination/KeeperConstants.h b/src/Coordination/KeeperConstants.h
index 08a7c85585a..5a52fbf272b 100644
--- a/src/Coordination/KeeperConstants.h
+++ b/src/Coordination/KeeperConstants.h
@@ -17,6 +17,5 @@ const String keeper_system_path = "/keeper";
 const String keeper_api_version_path = keeper_system_path + "/api_version";
 const String keeper_api_feature_flags_path = keeper_system_path + "/feature_flags";
 const String keeper_config_path = keeper_system_path + "/config";
-const String keeper_availability_zone_path = keeper_system_path + "/availability_zone";
 
 }
diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp
index 1cee2a8e446..25bfb6c6384 100644
--- a/src/Coordination/KeeperContext.cpp
+++ b/src/Coordination/KeeperContext.cpp
@@ -32,17 +32,9 @@ KeeperContext::KeeperContext(bool standalone_keeper_)
     system_nodes_with_data[keeper_api_version_path] = toString(static_cast<uint8_t>(KeeperApiVersion::WITH_MULTI_READ));
 }
 
-void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az)
+void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_)
 {
     dispatcher = dispatcher_;
-
-    /// We only use the environment availability zone when configuration option is missing.
-    auto keeper_az = config.getString("keeper_server.availability_zone", environment_az);
-    if (!keeper_az.empty())
-        system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
-    LOG_INFO(&Poco::Logger::get("KeeperContext"),
-        "Initialize the KeeperContext with availability zone: '{}', environment availability zone '{}'. ", keeper_az, environment_az);
-
     digest_enabled = config.getBool("keeper_server.digest_enabled", false);
     ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false);
 
diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h
index 71f00ce5a93..2485f3d6c55 100644
--- a/src/Coordination/KeeperContext.h
+++ b/src/Coordination/KeeperContext.h
@@ -3,6 +3,7 @@
 #include <Disks/DiskSelector.h>
 #include <IO/WriteBufferFromString.h>
 #include <Poco/Util/AbstractConfiguration.h>
+
 #include <cstdint>
 #include <memory>
 
@@ -23,7 +24,7 @@ public:
         SHUTDOWN
     };
 
-    void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az);
+    void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_);
 
     Phase getServerState() const;
     void setServerState(Phase server_state_);
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index 6d43445474d..ca454c18084 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -11,7 +11,6 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/ProfileEvents.h>
 #include <Common/logger_useful.h>
-#include <IO/S3/Credentials.h>
 
 #include <atomic>
 #include <future>
@@ -371,16 +370,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
     configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
 
     keeper_context = std::make_shared<KeeperContext>(standalone_keeper);
-    String availability_zone;
-    try
-    {
-        availability_zone = DB::S3::getRunningAvailabilityZone();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
-    keeper_context->initialize(config, this, availability_zone);
+    keeper_context->initialize(config, this);
 
     requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_request_queue_size);
     request_thread = ThreadFromGlobalPool([this] { requestThread(); });
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index 0d1d07ec7c5..a4bb65a2a72 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -1081,8 +1081,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce
         Coordination::ZooKeeperGetRequest & request = dynamic_cast<Coordination::ZooKeeperGetRequest &>(*zk_request);
 
         if (request.path == Coordination::keeper_api_feature_flags_path
-            || request.path == Coordination::keeper_config_path
-            || request.path == Coordination::keeper_availability_zone_path)
+            || request.path == Coordination::keeper_config_path)
             return {};
 
         if (!storage.uncommitted_state.getNode(request.path))
diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index eb87568c8ab..1eea167e1b9 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -1,9 +1,4 @@
-#include <exception>
-#include <variant>
 #include <IO/S3/Credentials.h>
-#include <boost/algorithm/string/classification.hpp>
-#include <Poco/Exception.h>
-#include "Common/Exception.h"
 
 #if USE_AWS_S3
 
@@ -16,7 +11,6 @@
 #    include <aws/core/utils/UUID.h>
 #    include <aws/core/http/HttpClientFactory.h>
 
-#    include <IO/S3/PocoHTTPClientFactory.h>
 #    include <aws/core/utils/HashingUtils.h>
 #    include <aws/core/platform/FileSystem.h>
 
@@ -28,16 +22,6 @@
 #    include <fstream>
 #    include <base/EnumReflection.h>
 
-#include <boost/algorithm/string.hpp>
-#include <boost/algorithm/string/split.hpp>
-
-
-#include <Poco/URI.h>
-#include <Poco/Net/HTTPClientSession.h>
-#include <Poco/Net/HTTPRequest.h>
-#include <Poco/Net/HTTPResponse.h>
-#include <Poco/StreamCopier.h>
-
 
 namespace DB
 {
@@ -45,8 +29,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int AWS_ERROR;
-    extern const int GCP_ERROR;
-    extern const int UNSUPPORTED_METHOD;
 }
 
 namespace S3
@@ -169,6 +151,30 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const
     return GetResourceWithAWSWebServiceResult(credentials_request).GetPayload();
 }
 
+Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
+{
+    String user_agent_string = awsComputeUserAgentString();
+    auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
+    if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
+        throw DB::Exception(ErrorCodes::AWS_ERROR,
+            "Failed to make token request. HTTP response code: {}", response_code);
+
+    token = std::move(new_token);
+    const String url = endpoint + EC2_AVAILABILITY_ZONE_RESOURCE;
+    std::shared_ptr<Aws::Http::HttpRequest> profile_request(
+        Aws::Http::CreateHttpRequest(url, Aws::Http::HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
+
+    profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, token);
+    profile_request->SetUserAgent(user_agent_string);
+
+    const auto result = GetResourceWithAWSWebServiceResult(profile_request);
+    if (result.GetResponseCode() != Aws::Http::HttpResponseCode::OK)
+        throw DB::Exception(ErrorCodes::AWS_ERROR,
+            "Failed to get availability zone. HTTP response code: {}", result.GetResponseCode());
+
+    return Aws::Utils::StringUtils::Trim(result.GetPayload().c_str());
+}
+
 std::pair<Aws::String, Aws::Http::HttpResponseCode> AWSEC2MetadataClient::getEC2MetadataToken(const std::string & user_agent_string) const
 {
     std::lock_guard locker(token_mutex);
@@ -193,10 +199,10 @@ Aws::String AWSEC2MetadataClient::getCurrentRegion() const
     return Aws::Region::AWS_GLOBAL;
 }
 
-static Aws::String getAWSMetadataEndpoint()
+std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
 {
-    auto * logger = &Poco::Logger::get("AWSEC2InstanceProfileConfigLoader");
     Aws::String ec2_metadata_service_endpoint = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT");
+    auto * logger = &Poco::Logger::get("AWSEC2InstanceProfileConfigLoader");
     if (ec2_metadata_service_endpoint.empty())
     {
         Aws::String ec2_metadata_service_endpoint_mode = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT_MODE");
@@ -227,95 +233,8 @@ static Aws::String getAWSMetadataEndpoint()
             }
         }
     }
-    return ec2_metadata_service_endpoint;
-}
-
-std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
-{
-    auto endpoint = getAWSMetadataEndpoint();
-    return std::make_shared<AWSEC2MetadataClient>(client_configuration, endpoint.c_str());
-}
-
-String AWSEC2MetadataClient::getAvailabilityZoneOrException()
-{
-    Poco::URI uri(getAWSMetadataEndpoint() + EC2_AVAILABILITY_ZONE_RESOURCE);
-    Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
-
-    Poco::Net::HTTPResponse response;
-    Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
-    session.sendRequest(request);
-
-    std::istream & rs = session.receiveResponse(response);
-    if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
-        throw DB::Exception(ErrorCodes::AWS_ERROR, "Failed to get AWS availability zone. HTTP response code: {}", response.getStatus());
-    String response_data;
-    Poco::StreamCopier::copyToString(rs, response_data);
-    return response_data;
-}
-
-String getGCPAvailabilityZoneOrException()
-{
-    Poco::URI uri(String(GCP_METADATA_SERVICE_ENDPOINT) + "/computeMetadata/v1/instance/zone");
-    Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
-    Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
-    Poco::Net::HTTPResponse response;
-    request.set("Metadata-Flavor", "Google");
-    session.sendRequest(request);
-    std::istream & rs = session.receiveResponse(response);
-    if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
-        throw DB::Exception(ErrorCodes::GCP_ERROR, "Failed to get GCP availability zone. HTTP response code: {}", response.getStatus());
-    String response_data;
-    Poco::StreamCopier::copyToString(rs, response_data);
-    Strings zone_info;
-    boost::split(zone_info, response_data, boost::is_any_of("/"));
-    /// We expect GCP returns a string as "projects/123456789/zones/us-central1a".
-    if (zone_info.size() != 4)
-        throw DB::Exception(ErrorCodes::GCP_ERROR, "Invalid format of GCP zone information, expect projects/<project-number>/zones/<zone-value>, got {}", response_data);
-    return zone_info[3];
-}
-
-String getRunningAvailabilityZoneImpl()
-{
-    LOG_INFO(&Poco::Logger::get("Application"), "Trying to detect the availability zone.");
-    try
-    {
-        auto aws_az = AWSEC2MetadataClient::getAvailabilityZoneOrException();
-        return aws_az;
-    }
-    catch (const DB::Exception & aws_ex)
-    {
-        try
-        {
-            auto gcp_zone = getGCPAvailabilityZoneOrException();
-            return gcp_zone;
-        }
-        catch (const DB::Exception & gcp_ex)
-        {
-            throw DB::Exception(ErrorCodes::UNSUPPORTED_METHOD,
-                "Failed to find the availability zone, tried AWS and GCP. AWS Error: {}\nGCP Error: {}", aws_ex.displayText(), gcp_ex.displayText());
-        }
-    }
-}
-
-std::variant<String, std::exception_ptr> getRunningAvailabilityZoneImplOrException()
-{
-    try
-    {
-        return getRunningAvailabilityZoneImpl();
-    }
-    catch (...)
-    {
-        return std::current_exception();
-    }
-}
-
-String getRunningAvailabilityZone()
-{
-    static auto az_or_exception = getRunningAvailabilityZoneImplOrException();
-    if (const auto * az = std::get_if<String>(&az_or_exception))
-        return *az;
-    else
-        std::rethrow_exception(std::get<std::exception_ptr>(az_or_exception));
+    LOG_INFO(logger, "Using IMDS endpoint: {}", ec2_metadata_service_endpoint);
+    return std::make_shared<AWSEC2MetadataClient>(client_configuration, ec2_metadata_service_endpoint.c_str());
 }
 
 AWSEC2InstanceProfileConfigLoader::AWSEC2InstanceProfileConfigLoader(const std::shared_ptr<AWSEC2MetadataClient> & client_, bool use_secure_pull_)
@@ -784,6 +703,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
             aws_client_configuration.requestTimeoutMs = 1000;
 
             aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);
+
             auto ec2_metadata_client = InitEC2MetadataClient(aws_client_configuration);
             auto config_loader = std::make_shared<AWSEC2InstanceProfileConfigLoader>(ec2_metadata_client, !credentials_configuration.use_insecure_imds_request);
 
@@ -801,21 +721,4 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
 
 }
 
-#else
-
-namespace DB
-{
-
-namespace S3
-{
-
-String getRunningAvailabilityZone()
-{
-    throw Poco::Exception("Does not support availability zone detection for non-cloud environment");
-}
-
-}
-
-}
-
 #endif
diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h
index a978679348f..0243e8e4986 100644
--- a/src/IO/S3/Credentials.h
+++ b/src/IO/S3/Credentials.h
@@ -1,8 +1,5 @@
 #pragma once
 
-#include <exception>
-#include <base/types.h>
-#include <variant>
 #include "config.h"
 
 #if USE_AWS_S3
@@ -21,12 +18,6 @@ namespace DB::S3
 
 inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120;
 
-/// In GCP metadata service can be accessed via DNS regardless of IPv4 or IPv6.
-static constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal";
-
-/// getRunningAvailabilityZone returns the availability zone of the underlying compute resources where the current process runs.
-String getRunningAvailabilityZone();
-
 class AWSEC2MetadataClient : public Aws::Internal::AWSHttpResourceClient
 {
     static constexpr char EC2_SECURITY_CREDENTIALS_RESOURCE[] = "/latest/meta-data/iam/security-credentials";
@@ -59,11 +50,10 @@ public:
 
     virtual Aws::String getCurrentRegion() const;
 
-    friend String getRunningAvailabilityZoneImpl();
+    virtual Aws::String getCurrentAvailabilityZone() const;
 
 private:
     std::pair<Aws::String, Aws::Http::HttpResponseCode> getEC2MetadataToken(const std::string & user_agent_string) const;
-    static String getAvailabilityZoneOrException();
 
     const Aws::String endpoint;
     mutable std::recursive_mutex token_mutex;
@@ -187,15 +177,4 @@ public:
 
 }
 
-#else
-
-namespace DB
-{
-
-namespace S3
-{
-String getRunningAvailabilityZone();
-}
-
-}
 #endif
diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml
index 29232989084..7702aecba9c 100644
--- a/tests/integration/helpers/keeper_config1.xml
+++ b/tests/integration/helpers/keeper_config1.xml
@@ -11,7 +11,6 @@
 
     <keeper_server>
         <tcp_port>2181</tcp_port>
-        <availability_zone>az-zoo1</availability_zone>
         <server_id>1</server_id>
 
         <coordination_settings>
diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml
index 2601efbb313..2a1a1c1003c 100644
--- a/tests/integration/helpers/keeper_config2.xml
+++ b/tests/integration/helpers/keeper_config2.xml
@@ -12,7 +12,6 @@
     <keeper_server>
         <tcp_port>2181</tcp_port>
         <server_id>2</server_id>
-        <availability_zone>az-zoo2</availability_zone>
 
         <coordination_settings>
             <operation_timeout_ms>10000</operation_timeout_ms>
diff --git a/tests/integration/test_keeper_availability_zone/__init__.py b/tests/integration/test_keeper_availability_zone/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_keeper_availability_zone/configs/keeper_config.xml b/tests/integration/test_keeper_availability_zone/configs/keeper_config.xml
deleted file mode 100644
index 3cbf717bb67..00000000000
--- a/tests/integration/test_keeper_availability_zone/configs/keeper_config.xml
+++ /dev/null
@@ -1,2 +0,0 @@
-<clickhouse>
-</clickhouse>
diff --git a/tests/integration/test_keeper_availability_zone/test.py b/tests/integration/test_keeper_availability_zone/test.py
deleted file mode 100644
index 1836f0e679b..00000000000
--- a/tests/integration/test_keeper_availability_zone/test.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-from helpers.keeper_utils import KeeperClient
-
-
-cluster = ClickHouseCluster(__file__)
-
-node = cluster.add_instance(
-    "node",
-    main_configs=["configs/keeper_config.xml"],
-    with_zookeeper=True,
-    stay_alive=True,
-)
-
-
-@pytest.fixture(scope="module", autouse=True)
-def started_cluster():
-    try:
-        cluster.start()
-        yield cluster
-
-    finally:
-        cluster.shutdown()
-
-
-def test_get_availability_zone():
-    with KeeperClient.from_cluster(cluster, "zoo1") as client1:
-        assert client1.get("/keeper/availability_zone") == "az-zoo1"
-
-    with KeeperClient.from_cluster(cluster, "zoo2") as client2:
-        assert client2.get("/keeper/availability_zone") == "az-zoo2"
diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index 415b67a5cc2..c8f9cb9cd67 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -183,8 +183,8 @@ def test_cmd_mntr(started_cluster):
         # contains:
         #   10 nodes created by test
         #   3 nodes created by clickhouse "/clickhouse/task_queue/ddl"
-        #   1 root node, 4 keeper system nodes
-        assert int(result["zk_znode_count"]) == 15
+        #   1 root node, 3 keeper system nodes
+        assert int(result["zk_znode_count"]) == 14
         assert int(result["zk_watch_count"]) == 2
         assert int(result["zk_ephemerals_count"]) == 2
         assert int(result["zk_approximate_data_size"]) > 0
@@ -333,7 +333,7 @@ def test_cmd_srvr(started_cluster):
         assert int(result["Connections"]) == 1
         assert int(result["Zxid"], 16) > 10
         assert result["Mode"] == "leader"
-        assert result["Node count"] == "15"
+        assert result["Node count"] == "14"
 
     finally:
         destroy_zk_client(zk)
@@ -373,7 +373,7 @@ def test_cmd_stat(started_cluster):
         assert int(result["Connections"]) == 1
         assert int(result["Zxid"], 16) >= 10
         assert result["Mode"] == "leader"
-        assert result["Node count"] == "15"
+        assert result["Node count"] == "14"
 
         # filter connection statistics
         cons = [n for n in data.split("\n") if "=" in n]

From e1766b96d98b973eb5eaca38c0d93284c6f20c48 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 05:45:02 +0300
Subject: [PATCH 0322/1097] Revert "Add 4-letter command for yielding/resigning
 leadership"

---
 .../utilities/clickhouse-keeper-client.md     |  1 -
 src/Coordination/CoordinationSettings.cpp     |  2 +-
 src/Coordination/FourLetterCommand.cpp        |  9 --------
 src/Coordination/FourLetterCommand.h          | 13 -----------
 src/Coordination/KeeperDispatcher.h           |  6 -----
 src/Coordination/KeeperServer.cpp             |  6 -----
 src/Coordination/KeeperServer.h               |  2 --
 tests/integration/helpers/keeper_utils.py     |  5 ----
 .../test_keeper_four_word_command/test.py     | 23 -------------------
 9 files changed, 1 insertion(+), 66 deletions(-)

diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md
index d6e11fb9613..d06d88598a7 100644
--- a/docs/en/operations/utilities/clickhouse-keeper-client.md
+++ b/docs/en/operations/utilities/clickhouse-keeper-client.md
@@ -12,7 +12,6 @@ A client application to interact with clickhouse-keeper by its native protocol.
 -   `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
 -   `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
 -   `-p N`, `--port=N` — Server port. Default value: 9181
--   `-c FILE_PATH`, `--config-file=FILE_PATH` — Set path of config file to get the connection string. Default value: `config.xml`.
 -   `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
 -   `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
 -   `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index cdd691f6a79..1f27823182a 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
 }
 
 
-const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld";
+const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl";
 
 KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
     : server_id(NOT_EXIST)
diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index be2c5ebd071..1bec17f2050 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -172,9 +172,6 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
         FourLetterCommandPtr feature_flags_command = std::make_shared<FeatureFlagsCommand>(keeper_dispatcher);
         factory.registerCommand(feature_flags_command);
 
-        FourLetterCommandPtr yield_leadership_command = std::make_shared<YieldLeadershipCommand>(keeper_dispatcher);
-        factory.registerCommand(yield_leadership_command);
-
         factory.initializeAllowList(keeper_dispatcher);
         factory.setInitialize(true);
     }
@@ -582,10 +579,4 @@ String FeatureFlagsCommand::run()
     return ret.str();
 }
 
-String YieldLeadershipCommand::run()
-{
-    keeper_dispatcher.yieldLeadership();
-    return "Sent yield leadership request to leader.";
-}
-
 }
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index bb3c616e080..0520da06b6d 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -415,17 +415,4 @@ struct FeatureFlagsCommand : public IFourLetterCommand
     ~FeatureFlagsCommand() override = default;
 };
 
-/// Yield leadership and become follower.
-struct YieldLeadershipCommand : public IFourLetterCommand
-{
-    explicit YieldLeadershipCommand(KeeperDispatcher & keeper_dispatcher_)
-        : IFourLetterCommand(keeper_dispatcher_)
-    {
-    }
-
-    String name() override { return "ydld"; }
-    String run() override;
-    ~YieldLeadershipCommand() override = default;
-};
-
 }
diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h
index 6483de7bd19..39941f55d5e 100644
--- a/src/Coordination/KeeperDispatcher.h
+++ b/src/Coordination/KeeperDispatcher.h
@@ -237,12 +237,6 @@ public:
         return server->requestLeader();
     }
 
-    /// Yield leadership and become follower.
-    void yieldLeadership()
-    {
-        return server->yieldLeadership();
-    }
-
     void recalculateStorageStats()
     {
         return server->recalculateStorageStats();
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 656d009e0a7..82023db071f 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -1101,12 +1101,6 @@ bool KeeperServer::requestLeader()
     return isLeader() || raft_instance->request_leadership();
 }
 
-void KeeperServer::yieldLeadership()
-{
-    if (isLeader())
-        raft_instance->yield_leadership();
-}
-
 void KeeperServer::recalculateStorageStats()
 {
     state_machine->recalculateStorageStats();
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index fde40d7d60f..e5559216e1b 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -144,8 +144,6 @@ public:
 
     bool requestLeader();
 
-    void yieldLeadership();
-
     void recalculateStorageStats();
 };
 
diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py
index 83d0f2969b7..79d498b909f 100644
--- a/tests/integration/helpers/keeper_utils.py
+++ b/tests/integration/helpers/keeper_utils.py
@@ -248,11 +248,6 @@ def is_leader(cluster, node, port=9181):
     return "Mode: leader" in stat
 
 
-def is_follower(cluster, node, port=9181):
-    stat = send_4lw_cmd(cluster, node, "stat", port)
-    return "Mode: follower" in stat
-
-
 def get_leader(cluster, nodes):
     for node in nodes:
         if is_leader(cluster, node):
diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index c8f9cb9cd67..48e8f80e4bc 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -725,26 +725,3 @@ def test_cmd_clrs(started_cluster):
 
     finally:
         destroy_zk_client(zk)
-
-
-def test_cmd_ydld(started_cluster):
-    wait_nodes()
-    for node in [node1, node3]:
-        data = keeper_utils.send_4lw_cmd(cluster, node, cmd="ydld")
-        assert data == "Sent yield leadership request to leader."
-
-        print("ydld output -------------------------------------")
-        print(data)
-
-        if keeper_utils.is_leader(cluster, node):
-            # wait for it to yield leadership
-            retry = 0
-            while keeper_utils.is_leader(cluster, node) and retry < 30:
-                time.sleep(1)
-                retry += 1
-            if retry == 30:
-                print(
-                    node.name
-                    + " did not yield leadership after 30s, maybe there is something wrong."
-                )
-        assert keeper_utils.is_follower(cluster, node)

From 28bf9779603fa906ce9b028832d9e4400d0c6a3c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 03:59:16 +0100
Subject: [PATCH 0323/1097] Fix test

---
 src/Functions/FunctionTokens.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h
index a79d37a7a87..0a669e0279c 100644
--- a/src/Functions/FunctionTokens.h
+++ b/src/Functions/FunctionTokens.h
@@ -172,7 +172,11 @@ static inline std::optional<size_t> extractMaxSplits(
         return std::nullopt;
 
     if (const ColumnConst * column = checkAndGetColumn<ColumnConst>(arguments[max_substrings_argument_position].column.get()))
-        return column->getUInt(0);
+    {
+        size_t res = column->getUInt(0);
+        if (res)
+            return res;
+    }
 
     return std::nullopt;
 }

From f69c7eecbb621ae0de3bb7da802ad6af4ea0017a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 04:09:35 +0100
Subject: [PATCH 0324/1097] Remove bad test

---
 .../01600_parts_types_metrics_long.reference  |  3 --
 .../01600_parts_types_metrics_long.sh         | 49 -------------------
 2 files changed, 52 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01600_parts_types_metrics_long.reference
 delete mode 100755 tests/queries/0_stateless/01600_parts_types_metrics_long.sh

diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.reference b/tests/queries/0_stateless/01600_parts_types_metrics_long.reference
deleted file mode 100644
index e8183f05f5d..00000000000
--- a/tests/queries/0_stateless/01600_parts_types_metrics_long.reference
+++ /dev/null
@@ -1,3 +0,0 @@
-1
-1
-1
diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh
deleted file mode 100755
index 3ffac772aa7..00000000000
--- a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-s3-storage, no-asan
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-set -e
-set -o pipefail
-
-# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
-verify_sql="SELECT
-    (SELECT sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) =
-    (SELECT countIf(part_type = 'Compact'), countIf(part_type = 'Wide')
-        FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))"
-
-# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time.
-# So, there is inherent race condition (especially in fasttest that runs tests in parallel).
-#
-# But it should get the expected result eventually.
-# In case of test failure, this code will do infinite loop and timeout.
-verify()
-{
-    while true; do
-        result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" )
-        if [ "$result" = "1" ]; then
-            echo 1
-            return
-        fi
-        sleep 0.1
-    done
-}
-
-$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS data_01600"
-# Compact  - (5..10]
-# Wide     - >10
-$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10, index_granularity = 8192, index_granularity_bytes = '10Mi'"
-
-# Compact
-$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Compact', number FROM system.numbers LIMIT 6"
-verify
-
-# Wide
-$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Wide', number FROM system.numbers LIMIT 11 OFFSET 6"
-verify
-
-# DROP and check
-$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE data_01600"
-verify

From 66d297ee478a2d670371bbe82c5897c95810307e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 04:25:49 +0100
Subject: [PATCH 0325/1097] Fix style

---
 src/Functions/FunctionTokens.h      | 1 -
 src/Functions/alphaTokens.cpp       | 7 -------
 src/Functions/extractAll.cpp        | 1 -
 src/Functions/splitByNonAlpha.cpp   | 7 -------
 src/Functions/splitByRegexp.cpp     | 1 -
 src/Functions/splitByString.cpp     | 1 -
 src/Functions/splitByWhitespace.cpp | 7 -------
 7 files changed, 25 deletions(-)

diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h
index 0a669e0279c..5c4e582c637 100644
--- a/src/Functions/FunctionTokens.h
+++ b/src/Functions/FunctionTokens.h
@@ -24,7 +24,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_COLUMN;
 }
 
diff --git a/src/Functions/alphaTokens.cpp b/src/Functions/alphaTokens.cpp
index 9b20509eee0..35cacdbdbb8 100644
--- a/src/Functions/alphaTokens.cpp
+++ b/src/Functions/alphaTokens.cpp
@@ -7,13 +7,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_COLUMN;
-}
-
-
 /** Functions that split strings into an array of strings or vice versa.
   *
   * alphaTokens(s[, max_substrings])            - select from the string subsequence `[a-zA-Z]+`.
diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp
index d73dbd7d4a4..ad49f32f769 100644
--- a/src/Functions/extractAll.cpp
+++ b/src/Functions/extractAll.cpp
@@ -19,7 +19,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_COLUMN;
 }
 
diff --git a/src/Functions/splitByNonAlpha.cpp b/src/Functions/splitByNonAlpha.cpp
index 67d3d100420..467e7b0b5c3 100644
--- a/src/Functions/splitByNonAlpha.cpp
+++ b/src/Functions/splitByNonAlpha.cpp
@@ -17,13 +17,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_COLUMN;
-}
-
-
 /** Functions that split strings into an array of strings or vice versa.
   *
   * splitByNonAlpha(s[, max_substrings])        - split the string by whitespace and punctuation characters
diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp
index cfffbfbb856..77328205c01 100644
--- a/src/Functions/splitByRegexp.cpp
+++ b/src/Functions/splitByRegexp.cpp
@@ -12,7 +12,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_COLUMN;
 }
 
diff --git a/src/Functions/splitByString.cpp b/src/Functions/splitByString.cpp
index 6c9ba3fd090..7d6803b2f27 100644
--- a/src/Functions/splitByString.cpp
+++ b/src/Functions/splitByString.cpp
@@ -11,7 +11,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_COLUMN;
 }
 
diff --git a/src/Functions/splitByWhitespace.cpp b/src/Functions/splitByWhitespace.cpp
index 5feafb3c11d..168e429c6f5 100644
--- a/src/Functions/splitByWhitespace.cpp
+++ b/src/Functions/splitByWhitespace.cpp
@@ -6,13 +6,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_COLUMN;
-}
-
-
 /** Functions that split strings into an array of strings or vice versa.
   *
   * splitByWhitespace(s[, max_substrings])      - split the string by whitespace characters

From 6078ff20bbfb41be358f4c63f3e79aec2142e63d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 05:52:44 +0100
Subject: [PATCH 0326/1097] Symbolize trace_log for exporting

---
 docker/test/base/setup_export_logs.sh         | 22 +++++++++++++++----
 .../users.d/allow_introspection_functions.xml |  7 ------
 .../allow_introspection_functions.yaml        |  1 +
 tests/config/install.sh                       |  1 +
 .../allow_introspection_functions.yaml        |  3 +++
 5 files changed, 23 insertions(+), 11 deletions(-)
 delete mode 100644 programs/server/users.d/allow_introspection_functions.xml
 create mode 120000 programs/server/users.d/allow_introspection_functions.yaml
 create mode 100644 tests/config/users.d/allow_introspection_functions.yaml

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 0ff79e24bf8..d5558ea35d5 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -19,6 +19,11 @@ EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, c
 EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
 EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
 
+# trace_log needs more columns for symbolization
+EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(String), lines Array(String), "
+EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> demangle(addressToSymbol(x)), trace) AS symbols, arrayMap(x -> addressToLine(x), trace) AS lines"
+
+
 function __set_connection_args
 {
     # It's impossible to use generous $CONNECTION_ARGS string, it's unsafe from word splitting perspective.
@@ -125,9 +130,18 @@ function setup_logs_replication
     echo 'Create %_log tables'
     clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
     do
+        if [[ "$table" = "trace_log" ]]
+        then
+            EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS_TRACE_LOG}"
+            EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
+        else
+            EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS}"
+            EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"
+        fi
+
         # Calculate hash of its structure. Note: 4 is the version of extra columns - increment it if extra columns are changed:
         hash=$(clickhouse-client --query "
-            SELECT sipHash64(4, groupArray((name, type)))
+            SELECT sipHash64(5, groupArray((name, type)))
             FROM (SELECT name, type FROM system.columns
                 WHERE database = 'system' AND table = '$table'
                 ORDER BY position)
@@ -135,7 +149,7 @@ function setup_logs_replication
 
         # Create the destination table with adapted name and structure:
         statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
-            s/^\($/('"$EXTRA_COLUMNS"'/;
+            s/^\($/('"$EXTRA_COLUMNS_FOR_TABLE"'/;
             s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/;
             s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
             /^TTL /d
@@ -155,7 +169,7 @@ function setup_logs_replication
             ENGINE = Distributed(${CLICKHOUSE_CI_LOGS_CLUSTER}, default, ${table}_${hash})
             SETTINGS flush_on_detach=0
             EMPTY AS
-            SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
+            SELECT ${EXTRA_COLUMNS_EXPRESSION_FOR_TABLE}, *
             FROM system.${table}
         " || continue
 
@@ -163,7 +177,7 @@ function setup_logs_replication
 
         clickhouse-client --query "
             CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
-            SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
+            SELECT ${EXTRA_COLUMNS_EXPRESSION_FOR_TABLE}, *
             FROM system.${table}
         " || continue
     done
diff --git a/programs/server/users.d/allow_introspection_functions.xml b/programs/server/users.d/allow_introspection_functions.xml
deleted file mode 100644
index ec3057c82d7..00000000000
--- a/programs/server/users.d/allow_introspection_functions.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<clickhouse>
-    <profiles>
-        <default>
-            <allow_introspection_functions>1</allow_introspection_functions>
-        </default>
-    </profiles>
-</clickhouse>
diff --git a/programs/server/users.d/allow_introspection_functions.yaml b/programs/server/users.d/allow_introspection_functions.yaml
new file mode 120000
index 00000000000..bac14df302b
--- /dev/null
+++ b/programs/server/users.d/allow_introspection_functions.yaml
@@ -0,0 +1 @@
+../../../tests/config/users.d/allow_introspection_functions.yaml
\ No newline at end of file
diff --git a/tests/config/install.sh b/tests/config/install.sh
index a5037bfb64e..c31275cdcf2 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -82,6 +82,7 @@ ln -sf $SRC_PATH/users.d/marks.xml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/insert_keeper_retries.xml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/nonconst_timezone.xml $DEST_SERVER_PATH/users.d/
+ln -sf $SRC_PATH/users.d/allow_introspection_functions.yaml $DEST_SERVER_PATH/users.d/
 
 if [[ -n "$USE_NEW_ANALYZER" ]] && [[ "$USE_NEW_ANALYZER" -eq 1 ]]; then
     ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/
diff --git a/tests/config/users.d/allow_introspection_functions.yaml b/tests/config/users.d/allow_introspection_functions.yaml
new file mode 100644
index 00000000000..24806bbe235
--- /dev/null
+++ b/tests/config/users.d/allow_introspection_functions.yaml
@@ -0,0 +1,3 @@
+profiles:
+    default:
+        allow_introspection_functions: 1

From 43b921abe5f2c82be075e8a896849a8d5302b530 Mon Sep 17 00:00:00 2001
From: James Seymour <james.seymour@cubiko.com.au>
Date: Sat, 11 Nov 2023 14:56:16 +1000
Subject: [PATCH 0327/1097] fix(docs): correct default value for
 output_format_parquet_compression_method to 'ltz4'

---
 docs/en/interfaces/formats.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index eb963de0c35..155ae316890 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -2156,7 +2156,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
 - [input_format_parquet_local_file_min_bytes_for_seek](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_local_file_min_bytes_for_seek) - min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. Default value - `8192`.
 - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
 - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
-- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
+- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
 
 ## ParquetMetadata {data-format-parquet-metadata}
 

From fb0335efb0d94054e7c856e658037f118e035d8d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 06:20:34 +0100
Subject: [PATCH 0328/1097] What if I add some indices

---
 docker/test/base/setup_export_logs.sh | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 0ff79e24bf8..801d1439c60 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -15,7 +15,17 @@ CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci}
 # Pre-configured destination cluster, where to export the data
 CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
 
-EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name String, instance_type String, instance_id String, "}
+EXTRA_COLUMNS=${EXTRA_COLUMNS:-"
+    pull_request_number UInt32,
+    commit_sha String,
+    check_start_time DateTime('UTC'),
+    check_name LowCardinality(String),
+    instance_type LowCardinality(String),
+    instance_id String,
+    INDEX ix_pr (pull_request_number) TYPE set(100),
+    INDEX ix_commit (commit_sha) TYPE set(100),
+    INDEX ix_check_time (check_start_time) TYPE minmax,
+"}
 EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
 EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
 

From 629c5c0e458af51c7c610f7da172ca3abfe09c95 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 06:21:31 +0100
Subject: [PATCH 0329/1097] Change tables version

---
 docker/test/base/setup_export_logs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 801d1439c60..01a2a561168 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -137,7 +137,7 @@ function setup_logs_replication
     do
         # Calculate hash of its structure. Note: 4 is the version of extra columns - increment it if extra columns are changed:
         hash=$(clickhouse-client --query "
-            SELECT sipHash64(4, groupArray((name, type)))
+            SELECT sipHash64(6, groupArray((name, type)))
             FROM (SELECT name, type FROM system.columns
                 WHERE database = 'system' AND table = '$table'
                 ORDER BY position)

From c52a91caf6e1134860932324df7d8b35c4a74a49 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 06:59:20 +0100
Subject: [PATCH 0330/1097] Remove dependencies

---
 src/Interpreters/CrossToInnerJoinVisitor.cpp  | 11 ++---
 .../GatherFunctionQuantileVisitor.cpp         | 42 +++++++++----------
 src/Interpreters/addMissingDefaults.cpp       |  3 +-
 3 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp
index 8755daa1dc8..005450c2a2c 100644
--- a/src/Interpreters/CrossToInnerJoinVisitor.cpp
+++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp
@@ -1,13 +1,8 @@
 #include <Common/typeid_cast.h>
 #include <Parsers/queryToString.h>
-#include <Functions/FunctionsComparison.h>
-#include <Functions/FunctionsLogical.h>
-#include <IO/WriteHelpers.h>
 #include <Interpreters/CrossToInnerJoinVisitor.h>
 #include <Interpreters/DatabaseAndTableWithAlias.h>
 #include <Interpreters/IdentifierSemantic.h>
-#include <Interpreters/misc.h>
-#include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
@@ -15,11 +10,11 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/ExpressionListParsers.h>
-#include <Parsers/ParserTablesInSelectQuery.h>
 #include <Parsers/parseQuery.h>
 
 #include <Common/logger_useful.h>
 
+
 namespace DB
 {
 
@@ -117,7 +112,7 @@ std::map<size_t, std::vector<ASTPtr>> moveExpressionToJoinOn(
     std::map<size_t, std::vector<ASTPtr>> asts_to_join_on;
     for (const auto & node : splitConjunctionsAst(ast))
     {
-        if (const auto * func = node->as<ASTFunction>(); func && func->name == NameEquals::name)
+        if (const auto * func = node->as<ASTFunction>(); func && func->name == "equals")
         {
             if (!func->arguments || func->arguments->children.size() != 2)
                 return {};
@@ -154,7 +149,7 @@ ASTPtr makeOnExpression(const std::vector<ASTPtr> & expressions)
     for (const auto & ast : expressions)
         arguments.emplace_back(ast->clone());
 
-    return makeASTFunction(NameAnd::name, std::move(arguments));
+    return makeASTFunction("and", std::move(arguments));
 }
 
 std::vector<JoinedElement> getTables(const ASTSelectQuery & select)
diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.cpp b/src/Interpreters/GatherFunctionQuantileVisitor.cpp
index 805fcfec181..664bb9e9383 100644
--- a/src/Interpreters/GatherFunctionQuantileVisitor.cpp
+++ b/src/Interpreters/GatherFunctionQuantileVisitor.cpp
@@ -1,10 +1,9 @@
 #include <Interpreters/GatherFunctionQuantileVisitor.h>
 
-#include <AggregateFunctions/AggregateFunctionQuantile.h>
 #include <Parsers/ASTFunction.h>
-#include <base/types.h>
 #include <Common/Exception.h>
 
+
 namespace DB
 {
 
@@ -14,22 +13,23 @@ namespace ErrorCodes
 }
 
 /// Mapping from quantile functions for single value to plural
-static const std::unordered_map<String, String> quantile_fuse_name_mapping = {
-    {NameQuantile::name, NameQuantiles::name},
-    {NameQuantileBFloat16::name, NameQuantilesBFloat16::name},
-    {NameQuantileBFloat16Weighted::name, NameQuantilesBFloat16Weighted::name},
-    {NameQuantileDeterministic::name, NameQuantilesDeterministic::name},
-    {NameQuantileExact::name, NameQuantilesExact::name},
-    {NameQuantileExactExclusive::name, NameQuantilesExactExclusive::name},
-    {NameQuantileExactHigh::name, NameQuantilesExactHigh::name},
-    {NameQuantileExactInclusive::name, NameQuantilesExactInclusive::name},
-    {NameQuantileExactLow::name, NameQuantilesExactLow::name},
-    {NameQuantileExactWeighted::name, NameQuantilesExactWeighted::name},
-    {NameQuantileInterpolatedWeighted::name, NameQuantilesInterpolatedWeighted::name},
-    {NameQuantileTDigest::name, NameQuantilesTDigest::name},
-    {NameQuantileTDigestWeighted::name, NameQuantilesTDigestWeighted::name},
-    {NameQuantileTiming::name, NameQuantilesTiming::name},
-    {NameQuantileTimingWeighted::name, NameQuantilesTimingWeighted::name},
+static const std::unordered_map<String, String> quantile_fuse_name_mapping =
+{
+    {"quantile", "quantiles"},
+    {"quantileBFloat16", "quantilesBFloat16"},
+    {"quantileBFloat16Weighted", "quantilesBFloat16Weighted"},
+    {"quantileDeterministic", "quantilesDeterministic"},
+    {"quantileExact", "quantilesExact"},
+    {"quantileExactExclusive", "quantilesExactExclusive"},
+    {"quantileExactHigh", "quantilesExactHigh"},
+    {"quantileExactInclusive", "quantilesExactInclusive"},
+    {"quantileExactLow", "quantilesExactLow"},
+    {"quantileExactWeighted", "quantilesExactWeighted"},
+    {"quantileInterpolatedWeighted", "quantilesInterpolatedWeighted"},
+    {"quantileTDigest", "quantilesTDigest"},
+    {"quantileTDigestWeighted", "quantilesTDigestWeighted"},
+    {"quantileTiming", "quantilesTiming"},
+    {"quantileTimingWeighted", "quantilesTimingWeighted"},
 };
 
 String GatherFunctionQuantileData::toFusedNameOrSelf(const String & func_name)
@@ -63,9 +63,9 @@ void GatherFunctionQuantileData::FuseQuantileAggregatesData::addFuncNode(ASTPtr
     const auto & arguments = func->arguments->children;
 
 
-    bool need_two_args = func->name == NameQuantileDeterministic::name || func->name == NameQuantileExactWeighted::name
-        || func->name == NameQuantileInterpolatedWeighted::name || func->name == NameQuantileTimingWeighted::name
-        || func->name == NameQuantileTDigestWeighted::name || func->name == NameQuantileBFloat16Weighted::name;
+    bool need_two_args = func->name == "quantileDeterministic" || func->name == "quantileExactWeighted"
+        || func->name == "quantileInterpolatedWeighted" || func->name == "quantileTimingWeighted"
+        || func->name == "quantileTDigestWeighted" || func->name == "quantileBFloat16Weighted";
 
     if (arguments.size() != (need_two_args ? 2 : 1))
         return;
diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index 3febcfc74a8..fbf17d7efb7 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -8,8 +8,7 @@
 #include <Core/Block.h>
 #include <Storages/ColumnsDescription.h>
 #include <Interpreters/ExpressionActions.h>
-#include <Functions/IFunctionAdaptors.h>
-#include <Functions/materialize.h>
+#include <Functions/FunctionFactory.h>
 
 
 namespace DB

From df24ef42b1979bda684f3e272eef2d0b8ea11022 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 07:27:10 +0100
Subject: [PATCH 0331/1097] Publish stripped binary

---
 cmake/split_debug_symbols.cmake         |  2 ++
 programs/CMakeLists.txt                 |  7 +++++++
 programs/self-extracting/CMakeLists.txt |  6 +++---
 tests/ci/build_check.py                 | 15 +++++++++++----
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/cmake/split_debug_symbols.cmake b/cmake/split_debug_symbols.cmake
index d6821eb6c48..67c2c386f20 100644
--- a/cmake/split_debug_symbols.cmake
+++ b/cmake/split_debug_symbols.cmake
@@ -1,3 +1,5 @@
+# Generates a separate file with debug symbols while stripping it from the main binary.
+# This is needed for Debian packages.
 macro(clickhouse_split_debug_symbols)
    set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH)
 
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index eb4a898d472..f20e1058b82 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -439,6 +439,13 @@ else()
     install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
 endif()
 
+# A target to get stripped binary.
+# Note: this is different to the above (extract debug symbols to a separate place)
+add_custom_target(clickhouse-stripped
+    COMMAND "${STRIP_PATH}" -o "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-stripped" --strip-debug --remove-section=.comment --remove-section=.note "${CMAKE_CURRENT_BINARY_DIR}/clickhouse"
+    DEPENDS clickhouse
+    COMMENT "Stripping clickhouse binary" VERBATIM)
+
 if (ENABLE_TESTS)
     set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
     add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})
diff --git a/programs/self-extracting/CMakeLists.txt b/programs/self-extracting/CMakeLists.txt
index f3ff0bbcd78..4b6dd07f618 100644
--- a/programs/self-extracting/CMakeLists.txt
+++ b/programs/self-extracting/CMakeLists.txt
@@ -11,8 +11,8 @@ else ()
 endif ()
 
 add_custom_target (self-extracting ALL
-    ${CMAKE_COMMAND} -E remove clickhouse
+    ${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped
     COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse
-    DEPENDS clickhouse compressor
+    COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped
+    DEPENDS clickhouse clickhouse-stripped compressor
 )
-
diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index 584ece0a736..c18abcf1191 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -212,10 +212,17 @@ def upload_master_static_binaries(
     elif pr_info.base_ref != "master":
         return
 
-    s3_path = "/".join((pr_info.base_ref, static_binary_name, "clickhouse"))
-    binary = build_output_path / "clickhouse"
-    url = s3_helper.upload_build_file_to_s3(binary, s3_path)
-    print(f"::notice ::Binary static URL: {url}")
+    # Full binary with debug info:
+    s3_path_full = "/".join((pr_info.base_ref, static_binary_name, "clickhouse-full"))
+    binary_full = build_output_path / "clickhouse"
+    url_full = s3_helper.upload_build_file_to_s3(binary_full, s3_path_full)
+    print(f"::notice ::Binary static URL (with debug info): {url_full}")
+
+    # Stripped binary without debug info:
+    s3_path_compact = "/".join((pr_info.base_ref, static_binary_name, "clickhouse"))
+    binary_compact = build_output_path / "clickhouse-stripped"
+    url_compact = s3_helper.upload_build_file_to_s3(binary_compact, s3_path_compact)
+    print(f"::notice ::Binary static URL (compact): {url_compact}")
 
 
 def main():

From 4b1fa685bb286a2b7c533c602868f643751f1088 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 07:28:26 +0100
Subject: [PATCH 0332/1097] Publish stripped binary

---
 programs/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index f20e1058b82..eb117e74f6b 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -441,7 +441,7 @@ endif()
 
 # A target to get stripped binary.
 # Note: this is different to the above (extract debug symbols to a separate place)
-add_custom_target(clickhouse-stripped
+add_custom_target(clickhouse-stripped ALL
     COMMAND "${STRIP_PATH}" -o "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-stripped" --strip-debug --remove-section=.comment --remove-section=.note "${CMAKE_CURRENT_BINARY_DIR}/clickhouse"
     DEPENDS clickhouse
     COMMENT "Stripping clickhouse binary" VERBATIM)

From 4460abafbef08cc632562fa8b21bf3de08865977 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 07:37:39 +0100
Subject: [PATCH 0333/1097] No idea why it did not work

---
 docker/test/base/setup_export_logs.sh | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 01a2a561168..dc6ac5642f0 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -15,17 +15,7 @@ CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci}
 # Pre-configured destination cluster, where to export the data
 CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
 
-EXTRA_COLUMNS=${EXTRA_COLUMNS:-"
-    pull_request_number UInt32,
-    commit_sha String,
-    check_start_time DateTime('UTC'),
-    check_name LowCardinality(String),
-    instance_type LowCardinality(String),
-    instance_id String,
-    INDEX ix_pr (pull_request_number) TYPE set(100),
-    INDEX ix_commit (commit_sha) TYPE set(100),
-    INDEX ix_check_time (check_start_time) TYPE minmax,
-"}
+EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, INDEX ix_pr (pull_request_number) TYPE set(100), INDEX ix_commit (commit_sha) TYPE set(100), INDEX ix_check_time (check_start_time) TYPE minmax, "}
 EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
 EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
 

From 9824631044c136abd996412be14378c04050cf9f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 07:46:35 +0100
Subject: [PATCH 0334/1097] Apply LowCardinality in more places

---
 docker/test/base/setup_export_logs.sh | 2 +-
 tests/ci/clickhouse_helper.py         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index dc6ac5642f0..119c9398848 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -16,7 +16,7 @@ CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci}
 CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
 
 EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, INDEX ix_pr (pull_request_number) TYPE set(100), INDEX ix_commit (commit_sha) TYPE set(100), INDEX ix_check_time (check_start_time) TYPE minmax, "}
-EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
+EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, toLowCardinality('') AS check_name, toLowCardinality('') AS instance_type, '' AS instance_id"}
 EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
 
 function __set_connection_args
diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py
index 3ae20bd871d..f338a1e14c3 100644
--- a/tests/ci/clickhouse_helper.py
+++ b/tests/ci/clickhouse_helper.py
@@ -297,8 +297,8 @@ class CiLogsCredentials:
             return ""
         extra_columns = (
             f"CAST({pr_info.number} AS UInt32) AS pull_request_number, '{pr_info.sha}' AS commit_sha, "
-            f"toDateTime('{check_start_time}', 'UTC') AS check_start_time, '{check_name}' AS check_name, "
-            f"'{get_instance_type()}' AS instance_type, '{get_instance_id()}' AS instance_id"
+            f"toDateTime('{check_start_time}', 'UTC') AS check_start_time, toLowCardinality('{check_name}') AS check_name, "
+            f"toLowCardinality('{get_instance_type()}') AS instance_type, '{get_instance_id()}' AS instance_id"
         )
         return (
             f'-e EXTRA_COLUMNS_EXPRESSION="{extra_columns}" '

From 0064872bc7953a665fe0040982d62a13781bc5d6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 07:46:56 +0100
Subject: [PATCH 0335/1097] Apply LowCardinality in more places

---
 docker/test/base/setup_export_logs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 119c9398848..f09b8225da4 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -127,7 +127,7 @@ function setup_logs_replication
     do
         # Calculate hash of its structure. Note: 4 is the version of extra columns - increment it if extra columns are changed:
         hash=$(clickhouse-client --query "
-            SELECT sipHash64(6, groupArray((name, type)))
+            SELECT sipHash64(7, groupArray((name, type)))
             FROM (SELECT name, type FROM system.columns
                 WHERE database = 'system' AND table = '$table'
                 ORDER BY position)

From e073eaf5ce7f72cb529f74f985b2e78c8341fa6e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 08:23:58 +0100
Subject: [PATCH 0336/1097] Try LowCardinality

---
 docker/test/base/setup_export_logs.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index d5558ea35d5..c7809b4e72e 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -20,8 +20,8 @@ EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_
 EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
 
 # trace_log needs more columns for symbolization
-EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(String), lines Array(String), "
-EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> demangle(addressToSymbol(x)), trace) AS symbols, arrayMap(x -> addressToLine(x), trace) AS lines"
+EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), "
+EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> toLowCardinality(demangle(addressToSymbol(x))), trace) AS symbols, arrayMap(x -> toLowCardinality(addressToLine(x)), trace) AS lines"
 
 
 function __set_connection_args
@@ -141,7 +141,7 @@ function setup_logs_replication
 
         # Calculate hash of its structure. Note: 4 is the version of extra columns - increment it if extra columns are changed:
         hash=$(clickhouse-client --query "
-            SELECT sipHash64(5, groupArray((name, type)))
+            SELECT sipHash64(8, groupArray((name, type)))
             FROM (SELECT name, type FROM system.columns
                 WHERE database = 'system' AND table = '$table'
                 ORDER BY position)

From d0922782edf9b2d6cbfb3ff0ff6a27abee0aee64 Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Sat, 11 Nov 2023 15:37:49 +0800
Subject: [PATCH 0337/1097] Revert "Revert "Add 4-letter command for
 yielding/resigning leadership""

This reverts commit e1766b96d98b973eb5eaca38c0d93284c6f20c48.
---
 .../utilities/clickhouse-keeper-client.md     |  1 +
 src/Coordination/CoordinationSettings.cpp     |  2 +-
 src/Coordination/FourLetterCommand.cpp        |  9 ++++++++
 src/Coordination/FourLetterCommand.h          | 13 +++++++++++
 src/Coordination/KeeperDispatcher.h           |  6 +++++
 src/Coordination/KeeperServer.cpp             |  6 +++++
 src/Coordination/KeeperServer.h               |  2 ++
 tests/integration/helpers/keeper_utils.py     |  5 ++++
 .../test_keeper_four_word_command/test.py     | 23 +++++++++++++++++++
 9 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md
index d06d88598a7..d6e11fb9613 100644
--- a/docs/en/operations/utilities/clickhouse-keeper-client.md
+++ b/docs/en/operations/utilities/clickhouse-keeper-client.md
@@ -12,6 +12,7 @@ A client application to interact with clickhouse-keeper by its native protocol.
 -   `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
 -   `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
 -   `-p N`, `--port=N` — Server port. Default value: 9181
+-   `-c FILE_PATH`, `--config-file=FILE_PATH` — Set path of config file to get the connection string. Default value: `config.xml`.
 -   `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
 -   `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
 -   `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index 1f27823182a..cdd691f6a79 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
 }
 
 
-const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl";
+const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld";
 
 KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
     : server_id(NOT_EXIST)
diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 1bec17f2050..be2c5ebd071 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -172,6 +172,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
         FourLetterCommandPtr feature_flags_command = std::make_shared<FeatureFlagsCommand>(keeper_dispatcher);
         factory.registerCommand(feature_flags_command);
 
+        FourLetterCommandPtr yield_leadership_command = std::make_shared<YieldLeadershipCommand>(keeper_dispatcher);
+        factory.registerCommand(yield_leadership_command);
+
         factory.initializeAllowList(keeper_dispatcher);
         factory.setInitialize(true);
     }
@@ -579,4 +582,10 @@ String FeatureFlagsCommand::run()
     return ret.str();
 }
 
+String YieldLeadershipCommand::run()
+{
+    keeper_dispatcher.yieldLeadership();
+    return "Sent yield leadership request to leader.";
+}
+
 }
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index 0520da06b6d..bb3c616e080 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -415,4 +415,17 @@ struct FeatureFlagsCommand : public IFourLetterCommand
     ~FeatureFlagsCommand() override = default;
 };
 
+/// Yield leadership and become follower.
+struct YieldLeadershipCommand : public IFourLetterCommand
+{
+    explicit YieldLeadershipCommand(KeeperDispatcher & keeper_dispatcher_)
+        : IFourLetterCommand(keeper_dispatcher_)
+    {
+    }
+
+    String name() override { return "ydld"; }
+    String run() override;
+    ~YieldLeadershipCommand() override = default;
+};
+
 }
diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h
index 39941f55d5e..6483de7bd19 100644
--- a/src/Coordination/KeeperDispatcher.h
+++ b/src/Coordination/KeeperDispatcher.h
@@ -237,6 +237,12 @@ public:
         return server->requestLeader();
     }
 
+    /// Yield leadership and become follower.
+    void yieldLeadership()
+    {
+        return server->yieldLeadership();
+    }
+
     void recalculateStorageStats()
     {
         return server->recalculateStorageStats();
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 82023db071f..656d009e0a7 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -1101,6 +1101,12 @@ bool KeeperServer::requestLeader()
     return isLeader() || raft_instance->request_leadership();
 }
 
+void KeeperServer::yieldLeadership()
+{
+    if (isLeader())
+        raft_instance->yield_leadership();
+}
+
 void KeeperServer::recalculateStorageStats()
 {
     state_machine->recalculateStorageStats();
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index e5559216e1b..fde40d7d60f 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -144,6 +144,8 @@ public:
 
     bool requestLeader();
 
+    void yieldLeadership();
+
     void recalculateStorageStats();
 };
 
diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py
index 79d498b909f..83d0f2969b7 100644
--- a/tests/integration/helpers/keeper_utils.py
+++ b/tests/integration/helpers/keeper_utils.py
@@ -248,6 +248,11 @@ def is_leader(cluster, node, port=9181):
     return "Mode: leader" in stat
 
 
+def is_follower(cluster, node, port=9181):
+    stat = send_4lw_cmd(cluster, node, "stat", port)
+    return "Mode: follower" in stat
+
+
 def get_leader(cluster, nodes):
     for node in nodes:
         if is_leader(cluster, node):
diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index 48e8f80e4bc..c8f9cb9cd67 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -725,3 +725,26 @@ def test_cmd_clrs(started_cluster):
 
     finally:
         destroy_zk_client(zk)
+
+
+def test_cmd_ydld(started_cluster):
+    wait_nodes()
+    for node in [node1, node3]:
+        data = keeper_utils.send_4lw_cmd(cluster, node, cmd="ydld")
+        assert data == "Sent yield leadership request to leader."
+
+        print("ydld output -------------------------------------")
+        print(data)
+
+        if keeper_utils.is_leader(cluster, node):
+            # wait for it to yield leadership
+            retry = 0
+            while keeper_utils.is_leader(cluster, node) and retry < 30:
+                time.sleep(1)
+                retry += 1
+            if retry == 30:
+                print(
+                    node.name
+                    + " did not yield leadership after 30s, maybe there is something wrong."
+                )
+        assert keeper_utils.is_follower(cluster, node)

From 356ae52e9b122545ea9a26a18ccce0e5311d4df5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 08:46:10 +0100
Subject: [PATCH 0338/1097] Enable access and named collections control by
 default

---
 .../config/users.d/perf-comparison-tweaks-users.xml          | 5 -----
 programs/server/embedded.xml                                 | 2 ++
 programs/server/users.xml                                    | 5 ++++-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
index cb591f1a184..e780a99ecde 100644
--- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
+++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
@@ -34,9 +34,4 @@
             <memory_profiler_step>0</memory_profiler_step>
         </default>
     </profiles>
-    <users>
-        <default>
-            <access_management>1</access_management>
-        </default>
-    </users>
 </clickhouse>
diff --git a/programs/server/embedded.xml b/programs/server/embedded.xml
index c2336e0d582..9311749a173 100644
--- a/programs/server/embedded.xml
+++ b/programs/server/embedded.xml
@@ -23,7 +23,9 @@
 
             <profile>default</profile>
             <quota>default</quota>
+
             <access_management>1</access_management>
+            <named_collection_control>1</named_collection_control>
         </default>
     </users>
 
diff --git a/programs/server/users.xml b/programs/server/users.xml
index fbb5a2c228f..57bc6309a54 100644
--- a/programs/server/users.xml
+++ b/programs/server/users.xml
@@ -85,7 +85,10 @@
             <quota>default</quota>
 
             <!-- User can create other users and grant rights to them. -->
-            <!-- <access_management>1</access_management> -->
+            <access_management>1</access_management>
+
+            <!-- User can manipulate named collections. -->
+            <named_collection_control>1</named_collection_control>
 
             <!-- User permissions can be granted here -->
             <!--

From 71381744010f8d83be33fefd563d90d43ee57d07 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 08:58:06 +0100
Subject: [PATCH 0339/1097] Maybe fix tidy

---
 src/AggregateFunctions/AggregateFunctionHistogram.cpp     | 8 ++++----
 .../AggregateFunctionVarianceMatrix.cpp                   | 2 +-
 src/Common/HashTable/HashTable.h                          | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.cpp b/src/AggregateFunctions/AggregateFunctionHistogram.cpp
index eccef4f3f3c..4e935b4def1 100644
--- a/src/AggregateFunctions/AggregateFunctionHistogram.cpp
+++ b/src/AggregateFunctions/AggregateFunctionHistogram.cpp
@@ -22,9 +22,9 @@
 
 #include <AggregateFunctions/IAggregateFunction.h>
 
-#include <math.h>
 #include <queue>
-#include <stddef.h>
+#include <cmath>
+#include <cstddef>
 
 
 namespace DB
@@ -159,8 +159,8 @@ private:
         std::priority_queue<
             QueueItem,
             PriorityQueueStorage<QueueItem>,
-            std::greater<QueueItem>>
-                queue{std::greater<QueueItem>(),
+            std::greater<>>
+                queue{std::greater<>(),
                         PriorityQueueStorage<QueueItem>(storage)};
 
         auto quality = [&](UInt32 i) { return points[next[i]].mean - points[i].mean; };
diff --git a/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp b/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp
index 9e54a824677..f2f3309a474 100644
--- a/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp
+++ b/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp
@@ -120,7 +120,7 @@ public:
         UNREACHABLE();
     }
 
-    void create(AggregateDataPtr __restrict place) const override
+    void create(AggregateDataPtr __restrict const place) const override
     {
         new (place) Data(this->argument_types.size());
     }
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index f6cec1ce6a9..f23c4ca15dd 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -853,7 +853,7 @@ public:
 
     private:
         DB::ReadBuffer & in;
-        Cell cell;
+        Cell cell{};
         size_t read_count = 0;
         size_t size = 0;
         bool is_eof = false;

From f2a67ae8111b0d9b4b557c29d0efcd7d5c93d68f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 09:24:26 +0100
Subject: [PATCH 0340/1097] Addition

---
 docker/packager/binary/build.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index cc2613cbaf5..150ce1ab385 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -126,6 +126,7 @@ fi
 
 mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
 [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
+[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
 mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
 mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure
 

From f1ccee34ccd87f55abf3a6a84396f9a46e6fd7f8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 09:28:30 +0100
Subject: [PATCH 0341/1097] Fix tests and docs

---
 docs/en/interfaces/http.md                             | 10 +++++-----
 docs/ru/interfaces/http.md                             | 10 +++++-----
 docs/zh/interfaces/http.md                             | 10 +++++-----
 .../0_stateless/00408_http_keep_alive.reference        |  6 +++---
 tests/queries/0_stateless/00501_http_head.reference    |  5 ++---
 5 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 0e2c0c00e4c..63f75fb7830 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -438,7 +438,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
 < X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a
 < X-ClickHouse-Format: Template
 < X-ClickHouse-Timezone: Asia/Shanghai
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 # HELP "Query" "Number of executing queries"
@@ -603,7 +603,7 @@ $ curl -vv  -H 'XXX:xxx' 'http://localhost:8123/hi'
 < Connection: Keep-Alive
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 * Connection #0 to host localhost left intact
@@ -643,7 +643,7 @@ $ curl -v  -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
 < Connection: Keep-Alive
 < Content-Type: text/plain; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 * Connection #0 to host localhost left intact
@@ -695,7 +695,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
 < Connection: Keep-Alive
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 <html><body>Absolute Path File</body></html>
@@ -714,7 +714,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
 < Connection: Keep-Alive
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 <html><body>Relative Path File</body></html>
diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md
index 16927408bc4..be8cfbdda6c 100644
--- a/docs/ru/interfaces/http.md
+++ b/docs/ru/interfaces/http.md
@@ -366,7 +366,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
 < X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a
 < X-ClickHouse-Format: Template
 < X-ClickHouse-Timezone: Asia/Shanghai
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
 <
 # HELP "Query" "Number of executing queries"
@@ -529,7 +529,7 @@ $ curl -vv  -H 'XXX:xxx' 'http://localhost:8123/hi'
 < Connection: Keep-Alive
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 * Connection #0 to host localhost left intact
@@ -569,7 +569,7 @@ $ curl -v  -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
 < Connection: Keep-Alive
 < Content-Type: text/plain; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 * Connection #0 to host localhost left intact
@@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
 < Connection: Keep-Alive
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 <html><body>Absolute Path File</body></html>
@@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
 < Connection: Keep-Alive
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 <html><body>Relative Path File</body></html>
diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md
index dfdcf53bd3f..84ca5ed0c47 100644
--- a/docs/zh/interfaces/http.md
+++ b/docs/zh/interfaces/http.md
@@ -362,7 +362,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
 < X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a
 < X-ClickHouse-Format: Template
 < X-ClickHouse-Timezone: Asia/Shanghai
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 # HELP "Query" "Number of executing queries"
@@ -520,7 +520,7 @@ $ curl -vv  -H 'XXX:xxx' 'http://localhost:8123/hi'
 < Connection: Keep-Alive
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 * Connection #0 to host localhost left intact
@@ -560,7 +560,7 @@ $ curl -v  -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
 < Connection: Keep-Alive
 < Content-Type: text/plain; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 * Connection #0 to host localhost left intact
@@ -612,7 +612,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
 < Connection: Keep-Alive
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 <html><body>Absolute Path File</body></html>
@@ -631,7 +631,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
 < Connection: Keep-Alive
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
 <
 <html><body>Relative Path File</body></html>
diff --git a/tests/queries/0_stateless/00408_http_keep_alive.reference b/tests/queries/0_stateless/00408_http_keep_alive.reference
index 5f9cc1079a7..17a7fd690a8 100644
--- a/tests/queries/0_stateless/00408_http_keep_alive.reference
+++ b/tests/queries/0_stateless/00408_http_keep_alive.reference
@@ -1,6 +1,6 @@
 < Connection: Keep-Alive
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < Connection: Keep-Alive
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
 < Connection: Keep-Alive
-< Keep-Alive: timeout=3
+< Keep-Alive: timeout=10
diff --git a/tests/queries/0_stateless/00501_http_head.reference b/tests/queries/0_stateless/00501_http_head.reference
index 9727c63b2d8..3d2d563acf4 100644
--- a/tests/queries/0_stateless/00501_http_head.reference
+++ b/tests/queries/0_stateless/00501_http_head.reference
@@ -2,11 +2,10 @@ HTTP/1.1 200 OK
 Connection: Keep-Alive
 Content-Type: text/tab-separated-values; charset=UTF-8
 Transfer-Encoding: chunked
-Keep-Alive: timeout=3
+Keep-Alive: timeout=10
 
 HTTP/1.1 200 OK
 Connection: Keep-Alive
 Content-Type: text/tab-separated-values; charset=UTF-8
 Transfer-Encoding: chunked
-Keep-Alive: timeout=3
-
+Keep-Alive: timeout=10

From 90f16d67ac63c7e2780027f43866a959ba8fe652 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 11 Nov 2023 09:31:01 +0100
Subject: [PATCH 0342/1097] Fix test

---
 tests/queries/0_stateless/00501_http_head.reference | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/00501_http_head.reference b/tests/queries/0_stateless/00501_http_head.reference
index 3d2d563acf4..8351327b356 100644
--- a/tests/queries/0_stateless/00501_http_head.reference
+++ b/tests/queries/0_stateless/00501_http_head.reference
@@ -9,3 +9,4 @@ Connection: Keep-Alive
 Content-Type: text/tab-separated-values; charset=UTF-8
 Transfer-Encoding: chunked
 Keep-Alive: timeout=10
+

From 2ec76f2140be4b98fc84d128933045e7137dc20b Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Sat, 11 Nov 2023 16:47:43 +0800
Subject: [PATCH 0343/1097] Fix test

---
 .../integration/test_keeper_four_word_command/test.py  | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index c8f9cb9cd67..71501133ae7 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -736,15 +736,19 @@ def test_cmd_ydld(started_cluster):
         print("ydld output -------------------------------------")
         print(data)
 
-        if keeper_utils.is_leader(cluster, node):
+        # Whenever there is a leader switch, there is a brief amount of time when any
+        # of the 4 letter commands will return empty result. Thus, we need to test for
+        # negative condition. So we can't use keeper_utils.is_leader() here and likewise
+        # in the while loop below.
+        if not keeper_utils.is_follower(cluster, node):
             # wait for it to yield leadership
             retry = 0
-            while keeper_utils.is_leader(cluster, node) and retry < 30:
+            while not keeper_utils.is_follower(cluster, node) and retry < 30:
                 time.sleep(1)
                 retry += 1
             if retry == 30:
                 print(
                     node.name
-                    + " did not yield leadership after 30s, maybe there is something wrong."
+                    + " did not become follower after 30s of yielding leadership, maybe there is something wrong."
                 )
         assert keeper_utils.is_follower(cluster, node)

From 504cc33a2a84885873d7b915244fea62860fd9d6 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 9 Nov 2023 15:51:17 +0300
Subject: [PATCH 0344/1097] Analyzer special functions projection names fix

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 21 ++++++++++++++++---
 tests/analyzer_tech_debt.txt                  |  1 -
 .../02338_analyzer_constants_basic.reference  |  4 ++--
 .../02378_analyzer_projection_names.reference | 17 ++++++++++++++-
 .../02378_analyzer_projection_names.sql       |  3 +++
 ..._injective_functions_elimination.reference |  2 +-
 6 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index c6fbd728b8f..7855c4f34a8 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1467,9 +1467,15 @@ ProjectionName QueryAnalyzer::calculateFunctionProjectionName(const QueryTreeNod
     const ProjectionNames & arguments_projection_names)
 {
     const auto & function_node_typed = function_node->as<FunctionNode &>();
+    const auto & function_node_name = function_node_typed.getFunctionName();
+
+    bool is_array_function = function_node_name == "array";
+    bool is_tuple_function = function_node_name == "tuple";
 
     WriteBufferFromOwnString buffer;
-    buffer << function_node_typed.getFunctionName();
+
+    if (!is_array_function && !is_tuple_function)
+        buffer << function_node_name;
 
     if (!parameters_projection_names.empty())
     {
@@ -1487,7 +1493,16 @@ ProjectionName QueryAnalyzer::calculateFunctionProjectionName(const QueryTreeNod
         buffer << ')';
     }
 
-    buffer << '(';
+    char open_bracket = '(';
+    char close_bracket = ')';
+
+    if (is_array_function)
+    {
+        open_bracket = '[';
+        close_bracket = ']';
+    }
+
+    buffer << open_bracket;
 
     size_t function_arguments_projection_names_size = arguments_projection_names.size();
     for (size_t i = 0; i < function_arguments_projection_names_size; ++i)
@@ -1498,7 +1513,7 @@ ProjectionName QueryAnalyzer::calculateFunctionProjectionName(const QueryTreeNod
             buffer << ", ";
     }
 
-    buffer << ')';
+    buffer << close_bracket;
 
     return buffer.str();
 }
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 018482c2301..bbb9abda079 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -35,7 +35,6 @@
 02352_grouby_shadows_arg
 02354_annoy
 02402_merge_engine_with_view
-02426_orc_bug
 02428_parameterized_view
 02458_use_structure_from_insertion_table
 02479_race_condition_between_insert_and_droppin_mv
diff --git a/tests/queries/0_stateless/02338_analyzer_constants_basic.reference b/tests/queries/0_stateless/02338_analyzer_constants_basic.reference
index f3a69e4d835..32f8a5eb124 100644
--- a/tests/queries/0_stateless/02338_analyzer_constants_basic.reference
+++ b/tests/queries/0_stateless/02338_analyzer_constants_basic.reference
@@ -25,11 +25,11 @@ NULL	Nullable(Nothing)
 (1, 1)	Tuple(UInt8, UInt8)					
 (1,1)
 --
-array((1, 1))	Array(Tuple(UInt8, UInt8))					
+[(1, 1)]	Array(Tuple(UInt8, UInt8))					
 [(1,1)]
 NULL	Nullable(Nothing)					
 1	UInt8					
 \'test\'	String					
 [1, 2, 3]	Array(UInt8)					
-array((1, 1), (1, 1))	Array(Tuple(UInt8, UInt8))					
+[(1, 1), (1, 1)]	Array(Tuple(UInt8, UInt8))					
 \N	1	test	[1,2,3]	[(1,1),(1,1)]
diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference
index a82d4d4c5d2..f8b18e6df15 100644
--- a/tests/queries/0_stateless/02378_analyzer_projection_names.reference
+++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference
@@ -13,7 +13,7 @@ concat(\'Value_1\', \'Value_2\')	String
 SELECT '--';
 --
 DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)'));
-CAST(tuple(1, \'Value\'), \'Tuple (id UInt64, value String)\')	Tuple(id UInt64, value String)					
+CAST((1, \'Value\'), \'Tuple (id UInt64, value String)\')	Tuple(id UInt64, value String)					
 SELECT 'Columns';
 Columns
 DESCRIBE (SELECT test_table.id, test_table.id, id FROM test_table);
@@ -737,3 +737,18 @@ t2.id	UInt64
 t2.value	String					
 t3.id	UInt64					
 t3.value	String					
+SELECT 'Special functions array, tuple';
+Special functions array, tuple
+DESCRIBE (SELECT [], array(), [1], array(1), [1, 2], array(1, 2), tuple(1), (1, 2), [[], []], [([], [])], ([], []), ([([], []), ([], [])]));
+[]	Array(Nothing)					
+[]	Array(Nothing)					
+[1]	Array(UInt8)					
+[1]	Array(UInt8)					
+[1, 2]	Array(UInt8)					
+[1, 2]	Array(UInt8)					
+(1)	Tuple(UInt8)					
+(1, 2)	Tuple(UInt8, UInt8)					
+[[], []]	Array(Array(Nothing))					
+[([], [])]	Array(Tuple(Array(Nothing), Array(Nothing)))					
+([], [])	Tuple(Array(Nothing), Array(Nothing))					
+[([], []), ([], [])]	Array(Tuple(Array(Nothing), Array(Nothing)))					
diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.sql b/tests/queries/0_stateless/02378_analyzer_projection_names.sql
index c69a1c1ad26..f5ac5f7476f 100644
--- a/tests/queries/0_stateless/02378_analyzer_projection_names.sql
+++ b/tests/queries/0_stateless/02378_analyzer_projection_names.sql
@@ -533,6 +533,9 @@ SELECT '--';
 DESCRIBE (SELECT id, value, t1.id, t1.value, t2.id, t2.value, t3.id, t3.value
 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value) INNER JOIN test_table_join_3 AS t3 USING (id, value));
 
+SELECT 'Special functions array, tuple';
+DESCRIBE (SELECT [], array(), [1], array(1), [1, 2], array(1, 2), tuple(1), (1, 2), [[], []], [([], [])], ([], []), ([([], []), ([], [])]));
+
 -- { echoOff }
 
 DROP TABLE test_table_join_1;
diff --git a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference
index c42b9ce0cc4..5b808310f0e 100644
--- a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference
+++ b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference
@@ -1,6 +1,6 @@
 QUERY id: 0
   PROJECTION COLUMNS
-    uniqCombined(tuple(\'\')) UInt64
+    uniqCombined((\'\')) UInt64
   PROJECTION
     LIST id: 1, nodes: 1
       FUNCTION id: 2, function_name: uniqCombined, function_type: aggregate, result_type: UInt64

From 91d820991e845254c17148ffc1da890ad5fa78c7 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 10 Nov 2023 13:36:52 +0300
Subject: [PATCH 0345/1097] Updated tests

---
 .../queries/0_stateless/01232_untuple.reference  |  2 +-
 .../02483_cuturlparameter_with_arrays.reference  |  8 ++++----
 .../02890_untuple_column_names.reference         | 16 ++++++++--------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/queries/0_stateless/01232_untuple.reference b/tests/queries/0_stateless/01232_untuple.reference
index 7bb80bf618f..0358cde1354 100644
--- a/tests/queries/0_stateless/01232_untuple.reference
+++ b/tests/queries/0_stateless/01232_untuple.reference
@@ -2,7 +2,7 @@
 hello	1	3	world
 9
 9	(0,1)
-key	tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'1\')	tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'2\')	tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'3\')	tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'4\')	tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'5\')
+key	tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'1\')	tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'2\')	tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'3\')	tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'4\')	tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'5\')
 1	20	20	10	20	30
 2	11	20	10	20	30
 3	70	20	10	20	30
diff --git a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference
index 348408a15cc..89c4ab58b21 100644
--- a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference
+++ b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference
@@ -31,7 +31,7 @@ SELECT
     FORMAT Vertical;
 Row 1:
 ──────
-cutURLParameter('http://bigmir.net/?a=b&c=d', array()):                    http://bigmir.net/?a=b&c=d
+cutURLParameter('http://bigmir.net/?a=b&c=d', []):                         http://bigmir.net/?a=b&c=d
 cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']):                      http://bigmir.net/?c=d
 cutURLParameter('http://bigmir.net/?a=b&c=d', ['a', 'c']):                 http://bigmir.net/?
 cutURLParameter('http://bigmir.net/?a=b&c=d', ['c']):                      http://bigmir.net/?a=b
@@ -44,7 +44,7 @@ cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['c', 'g']):           http:
 cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['e', 'g']):           http://bigmir.net/?a=b&c=d#e
 cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'e']): http://bigmir.net/?a=b&c=d#test?g=h
 cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'g']): http://bigmir.net/?a=b&c=d#test?e=f
-cutURLParameter('//bigmir.net/?a=b&c=d', array()):                         //bigmir.net/?a=b&c=d
+cutURLParameter('//bigmir.net/?a=b&c=d', []):                              //bigmir.net/?a=b&c=d
 cutURLParameter('//bigmir.net/?a=b&c=d', ['a']):                           //bigmir.net/?c=d
 cutURLParameter('//bigmir.net/?a=b&c=d', ['a', 'c']):                      //bigmir.net/?
 cutURLParameter('//bigmir.net/?a=b&c=d#e=f', ['a', 'e']):                  //bigmir.net/?c=d#
@@ -89,7 +89,7 @@ SELECT
     FORMAT Vertical;
 Row 1:
 ──────
-cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), array()):                    http://bigmir.net/?a=b&c=d
+cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), []):                         http://bigmir.net/?a=b&c=d
 cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a']):                      http://bigmir.net/?c=d
 cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a', 'c']):                 http://bigmir.net/?
 cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['c']):                      http://bigmir.net/?a=b
@@ -102,7 +102,7 @@ cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['c', 'g']):
 cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['e', 'g']):           http://bigmir.net/?a=b&c=d#e
 cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'e']): http://bigmir.net/?a=b&c=d#test?g=h
 cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'g']): http://bigmir.net/?a=b&c=d#test?e=f
-cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), array()):                         //bigmir.net/?a=b&c=d
+cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), []):                              //bigmir.net/?a=b&c=d
 cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a']):                           //bigmir.net/?c=d
 cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a', 'c']):                      //bigmir.net/?
 cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), ['a', 'e']):                  //bigmir.net/?c=d#
diff --git a/tests/queries/0_stateless/02890_untuple_column_names.reference b/tests/queries/0_stateless/02890_untuple_column_names.reference
index 831d5dac39a..388f974c45f 100644
--- a/tests/queries/0_stateless/02890_untuple_column_names.reference
+++ b/tests/queries/0_stateless/02890_untuple_column_names.reference
@@ -5,12 +5,12 @@ tupleElement(CAST(tuple(1), 'Tuple(a Int)'), 1):      1
 tupleElement(CAST(tuple('s'), 'Tuple(a String)'), 1): s
 Row 1:
 ──────
-tupleElement(CAST(tuple(1), 'Tuple(a Int)'), 'a'):      1
-tupleElement(CAST(tuple('s'), 'Tuple(a String)'), 'a'): s
+tupleElement(CAST((1), 'Tuple(a Int)'), 'a'):      1
+tupleElement(CAST(('s'), 'Tuple(a String)'), 'a'): s
 Row 1:
 ──────
-tupleElement(CAST(tuple(1), 'Tuple(a Int)'), 'a'): 1
-tupleElement(CAST(tuple(1), 'Tuple(a Int)'), 'a'): 1
+tupleElement(CAST((1), 'Tuple(a Int)'), 'a'): 1
+tupleElement(CAST((1), 'Tuple(a Int)'), 'a'): 1
 -- tuple element alias + untuple() alias
 Row 1:
 ──────
@@ -44,12 +44,12 @@ tupleElement(CAST(tuple(1), 'Tuple(Int)'), 1):      1
 tupleElement(CAST(tuple('s'), 'Tuple(String)'), 1): s
 Row 1:
 ──────
-tupleElement(CAST(tuple(1), 'Tuple(Int)'), '1'):      1
-tupleElement(CAST(tuple('s'), 'Tuple(String)'), '1'): s
+tupleElement(CAST((1), 'Tuple(Int)'), '1'):      1
+tupleElement(CAST(('s'), 'Tuple(String)'), '1'): s
 Row 1:
 ──────
-tupleElement(CAST(tuple(1), 'Tuple(Int)'), '1'): 1
-tupleElement(CAST(tuple(1), 'Tuple(Int)'), '1'): 1
+tupleElement(CAST((1), 'Tuple(Int)'), '1'): 1
+tupleElement(CAST((1), 'Tuple(Int)'), '1'): 1
 -- tuple() loses the column names (would be good to fix, see #36773)
 Row 1:
 ──────

From 857b47de2eaffef8995217f20c8f348c5510fbff Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Fri, 10 Nov 2023 16:49:04 +0100
Subject: [PATCH 0346/1097] Fix test
 02843_backup_use_same_s3_credentials_for_base_backup

---
 ...e_s3_credentials_for_base_backup.reference | 13 +++++++++
 ...use_same_s3_credentials_for_base_backup.sh | 27 +++++++++++++++----
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.reference b/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.reference
index 2862c459ae1..f9ade26cd0c 100644
--- a/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.reference
+++ b/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.reference
@@ -1,14 +1,27 @@
 use_same_s3_credentials_for_base_backup for S3
+base
 BACKUP_CREATED
+inc_1
 BACKUP_CREATED
+inc_2
+BACKUP_CREATED
+inc_3_bad
 The request signature we calculated does not match the signature you provided. Check your key and signing method. (S3_ERROR)
+inc_4
 BACKUP_CREATED
+restore inc_1
 The request signature we calculated does not match the signature you provided. Check your key and signing method. (S3_ERROR)
+restore inc_1
 RESTORED
+restore inc_2
 RESTORED
 use_same_s3_credentials_for_base_backup for S3 (invalid arguments)
+inc_5_bad
 BACKUP_CREATED
+inc_6_bad
 NUMBER_OF_ARGUMENTS_DOESNT_MATCH
 use_same_s3_credentials_for_base_backup for Disk
+backup_1
 BACKUP_CREATED
+backup_2
 BAD_ARGUMENTS
diff --git a/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.sh b/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.sh
index 939179baa26..16ac095312c 100755
--- a/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.sh
+++ b/tests/queries/0_stateless/02843_backup_use_same_s3_credentials_for_base_backup.sh
@@ -22,24 +22,41 @@ function write_invalid_password_to_base_backup()
 
 # Returns the arguments for the BACKUP TO S3() function, i.e. (url, access_key_id, secret_access_key)
 function s3_location() { echo "'http://localhost:11111/test/backups/$CLICKHOUSE_DATABASE/use_same_s3_credentials_for_base_backup_base_$*', 'test', 'testtest'"; }
+function s3_location_with_invalid_password() { echo "'http://localhost:11111/test/backups/$CLICKHOUSE_DATABASE/use_same_s3_credentials_for_base_backup_base_$*', 'test', 'INVALID_PASSWORD'"; }
 
 echo 'use_same_s3_credentials_for_base_backup for S3'
+echo "base"
 $CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location base))" | cut -f2
+
+echo "inc_1"
 $CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_1)) SETTINGS base_backup=S3($(s3_location base))" | cut -f2
 write_invalid_password_to_base_backup inc_1
-$CLICKHOUSE_CLIENT --format Null -q "BACKUP TABLE data TO S3($(s3_location inc_2)) SETTINGS base_backup=S3($(s3_location inc_1))" |& grep -m1 -o 'The request signature we calculated does not match the signature you provided. Check your key and signing method. (S3_ERROR)'
-$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_3)) SETTINGS base_backup=S3($(s3_location inc_1)), use_same_s3_credentials_for_base_backup=1" | cut -f2
 
+echo "inc_2"
+$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_2)) SETTINGS base_backup=S3($(s3_location inc_1))" | cut -f2
+
+echo "inc_3_bad"
+$CLICKHOUSE_CLIENT --format Null -q "BACKUP TABLE data TO S3($(s3_location inc_3_bad)) SETTINGS base_backup=S3($(s3_location_with_invalid_password inc_1))" |& grep -m1 -o 'The request signature we calculated does not match the signature you provided. Check your key and signing method. (S3_ERROR)'
+echo "inc_4"
+$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_4)) SETTINGS base_backup=S3($(s3_location_with_invalid_password inc_1)), use_same_s3_credentials_for_base_backup=1" | cut -f2
+
+echo "restore inc_1"
 $CLICKHOUSE_CLIENT --format Null -q "RESTORE TABLE data AS data FROM S3($(s3_location inc_1))" |& grep -m1 -o 'The request signature we calculated does not match the signature you provided. Check your key and signing method. (S3_ERROR)'
+echo "restore inc_1"
 $CLICKHOUSE_CLIENT -q "RESTORE TABLE data AS data_1 FROM S3($(s3_location inc_1)) SETTINGS use_same_s3_credentials_for_base_backup=1" | cut -f2
-$CLICKHOUSE_CLIENT -q "RESTORE TABLE data AS data_2 FROM S3($(s3_location inc_3)) SETTINGS use_same_s3_credentials_for_base_backup=1" | cut -f2
+echo "restore inc_2"
+$CLICKHOUSE_CLIENT -q "RESTORE TABLE data AS data_2 FROM S3($(s3_location inc_2)) SETTINGS use_same_s3_credentials_for_base_backup=1" | cut -f2
 
 echo 'use_same_s3_credentials_for_base_backup for S3 (invalid arguments)'
-$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_4_bad)) SETTINGS base_backup=S3($(s3_location inc_1), 'foo'), use_same_s3_credentials_for_base_backup=1" |& cut -f2
-$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_5_bad), 'foo') SETTINGS base_backup=S3($(s3_location inc_1)), use_same_s3_credentials_for_base_backup=1" |& grep -o -m1 NUMBER_OF_ARGUMENTS_DOESNT_MATCH
+echo "inc_5_bad"
+$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_5_bad)) SETTINGS base_backup=S3($(s3_location inc_1), 'foo'), use_same_s3_credentials_for_base_backup=1" |& cut -f2
+echo "inc_6_bad"
+$CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO S3($(s3_location inc_6_bad), 'foo') SETTINGS base_backup=S3($(s3_location inc_1)), use_same_s3_credentials_for_base_backup=1" |& grep -o -m1 NUMBER_OF_ARGUMENTS_DOESNT_MATCH
 
 echo 'use_same_s3_credentials_for_base_backup for Disk'
+echo "backup_1"
 $CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO Disk('backups', '$CLICKHOUSE_DATABASE/backup_1') SETTINGS use_same_s3_credentials_for_base_backup=1" | cut -f2
+echo "backup_2"
 $CLICKHOUSE_CLIENT -q "BACKUP TABLE data TO Disk('backups', '$CLICKHOUSE_DATABASE/backup_2') SETTINGS use_same_s3_credentials_for_base_backup=1, base_backup=Disk('backups', '$CLICKHOUSE_DATABASE/backup_1')" |& grep -o -m1 BAD_ARGUMENTS
 
 exit 0

From 7b16e90b2a694fd61461bf8cd9328009fb792cee Mon Sep 17 00:00:00 2001
From: ruslandoga <doga.ruslan@gmail.com>
Date: Sat, 11 Nov 2023 20:54:03 +0900
Subject: [PATCH 0347/1097] add plausible/ecto_ch to client-libraries.md

---
 docs/en/interfaces/third-party/client-libraries.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md
index 7b5c4f27a2a..5aa634785aa 100644
--- a/docs/en/interfaces/third-party/client-libraries.md
+++ b/docs/en/interfaces/third-party/client-libraries.md
@@ -74,6 +74,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn’t don
 ### Elixir
  - [clickhousex](https://github.com/appodeal/clickhousex/)
  - [pillar](https://github.com/sofakingworld/pillar)
+ - [ecto_ch](https://github.com/plausible/ecto_ch)
 ### Nim
  - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse)
 ### Haskell

From a1abf3202ed06d252bbb79e6326a1fa6f285f1bd Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sat, 11 Nov 2023 12:34:27 +0000
Subject: [PATCH 0348/1097] Add test for ROW POLICY ON CLUSTER

---
 .../0_stateless/02911_row_policy_on_cluster.reference  |  6 ++++++
 .../0_stateless/02911_row_policy_on_cluster.sql        | 10 ++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 tests/queries/0_stateless/02911_row_policy_on_cluster.reference
 create mode 100644 tests/queries/0_stateless/02911_row_policy_on_cluster.sql

diff --git a/tests/queries/0_stateless/02911_row_policy_on_cluster.reference b/tests/queries/0_stateless/02911_row_policy_on_cluster.reference
new file mode 100644
index 00000000000..c13b599bea6
--- /dev/null
+++ b/tests/queries/0_stateless/02911_row_policy_on_cluster.reference
@@ -0,0 +1,6 @@
+localhost	9000	0		0	0
+localhost	9000	0		0	0
+localhost	9000	0		0	0
+localhost	9000	0		0	0
+localhost	9000	0		0	0
+localhost	9000	0		0	0
diff --git a/tests/queries/0_stateless/02911_row_policy_on_cluster.sql b/tests/queries/0_stateless/02911_row_policy_on_cluster.sql
new file mode 100644
index 00000000000..4a132e889a9
--- /dev/null
+++ b/tests/queries/0_stateless/02911_row_policy_on_cluster.sql
@@ -0,0 +1,10 @@
+-- Tags: no-parallel, zookeeper
+
+DROP ROW POLICY IF EXISTS 02911_rowpolicy ON default.* ON CLUSTER default;
+DROP USER IF EXISTS 02911_user ON CLUSTER default;
+
+CREATE USER 02911_user ON CLUSTER default;
+CREATE ROW POLICY 02911_rowpolicy ON CLUSTER default ON default.* USING 1 TO 02911_user;
+
+DROP ROW POLICY 02911_rowpolicy ON default.* ON CLUSTER default;
+DROP USER 02911_user ON CLUSTER default;

From bc68fcf7599b5b6bd52f42d693f3efb34915873d Mon Sep 17 00:00:00 2001
From: Shri Bodas <shri.bodas@clickhouse.com>
Date: Sat, 11 Nov 2023 10:11:47 -0800
Subject: [PATCH 0349/1097] canonicalRand is replaced by randCanonical

---
 docs/en/sql-reference/data-types/float.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md
index 3b55271f707..f1b99153b41 100644
--- a/docs/en/sql-reference/data-types/float.md
+++ b/docs/en/sql-reference/data-types/float.md
@@ -16,7 +16,7 @@ CREATE TABLE IF NOT EXISTS float_vs_decimal
    my_decimal Decimal64(3)
 )Engine=MergeTree ORDER BY tuple()
 
-INSERT INTO float_vs_decimal SELECT round(canonicalRand(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal
+INSERT INTO float_vs_decimal SELECT round(randCanonical(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal
 
 SELECT sum(my_float), sum(my_decimal) FROM float_vs_decimal;
 > 500279.56300000014	500279.563

From 0bfcd3958eb525fedd9a9ad056331125d1152052 Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Sat, 11 Nov 2023 11:37:26 +0100
Subject: [PATCH 0350/1097] Add test describing MV deduplication issues

---
 ...02912_ingestion_mv_deduplication.reference |  24 +++
 .../02912_ingestion_mv_deduplication.sql      | 146 ++++++++++++++++++
 2 files changed, 170 insertions(+)
 create mode 100644 tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference
 create mode 100644 tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql

diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference
new file mode 100644
index 00000000000..a22ecdd1d78
--- /dev/null
+++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference
@@ -0,0 +1,24 @@
+-- Original issue with max_insert_delayed_streams_for_parallel_write = 1
+-- Landing
+2022-09-01 12:23:34	42
+2023-09-01 12:23:34	42
+-- MV
+2022-09-01 12:00:00	84
+2023-09-01 12:00:00	42
+-- Original issue with max_insert_delayed_streams_for_parallel_write > 1
+-- Landing
+2022-09-01 12:23:34	42
+2023-09-01 12:23:34	42
+-- MV
+2022-09-01 12:00:00	42
+-- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184
+-- Landing (Agg/Replacing)MergeTree
+org-1	prod	checkout	user	1
+org-1	prod	login	account	1
+org-1	prod	login	user	1
+org-1	stage	login	user	1
+--- MV
+org-1	prod	checkout	user	1
+org-1	prod	login	account	3
+org-1	prod	login	user	3
+org-1	stage	login	user	1
diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql
new file mode 100644
index 00000000000..e445ac9822b
--- /dev/null
+++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql
@@ -0,0 +1,146 @@
+-- Tags: replica
+SET session_timezone = 'UTC';
+SET deduplicate_blocks_in_dependent_materialized_views = 0;
+
+SELECT '-- Original issue with max_insert_delayed_streams_for_parallel_write = 1';
+/*
+
+    This is the expected behavior when mv deduplication is set to false. TODO: Add more details about what happened.
+
+*/
+SET max_insert_delayed_streams_for_parallel_write = 1;
+
+CREATE TABLE landing
+(
+    time DateTime,
+    number Int64
+)
+Engine=ReplicatedReplacingMergeTree('/clickhouse/' || currentDatabase() || '/landing/{shard}/', '{replica}')
+PARTITION BY toYYYYMMDD(time)
+ORDER BY time;
+
+CREATE MATERIALIZED VIEW mv
+ENGINE = ReplicatedSummingMergeTree('/clickhouse/' || currentDatabase() || '/mv/{shard}/', '{replica}')
+PARTITION BY toYYYYMMDD(hour) ORDER BY hour
+AS SELECT
+    toStartOfHour(time) AS hour,
+    sum(number) AS sum_amount
+FROM landing
+GROUP BY hour;
+
+INSERT INTO landing VALUES ('2022-09-01 12:23:34', 42);
+INSERT INTO landing VALUES ('2022-09-01 12:23:34', 42),('2023-09-01 12:23:34', 42);
+
+SELECT '-- Landing';
+SELECT * FROM landing FINAL ORDER BY time;
+SELECT '-- MV';
+SELECT * FROM mv FINAL ORDER BY hour;
+
+DROP TABLE IF EXISTS landing SYNC;
+DROP TABLE IF EXISTS mv SYNC;
+
+SELECT '-- Original issue with max_insert_delayed_streams_for_parallel_write > 1';
+/*
+
+    This is the unexpected behavior. TODO: Add more details about what happened.
+
+*/
+SET max_insert_delayed_streams_for_parallel_write = 10;
+
+CREATE TABLE landing
+(
+    time DateTime,
+    number Int64
+)
+Engine=ReplicatedReplacingMergeTree('/clickhouse/' || currentDatabase() || '/landing/{shard}/', '{replica}')
+PARTITION BY toYYYYMMDD(time)
+ORDER BY time;
+
+CREATE MATERIALIZED VIEW mv
+ENGINE = ReplicatedSummingMergeTree('/clickhouse/' || currentDatabase() || '/mv/{shard}/', '{replica}')
+PARTITION BY toYYYYMMDD(hour) ORDER BY hour
+AS SELECT
+    toStartOfHour(time) AS hour,
+    sum(number) AS sum_amount
+FROM landing
+GROUP BY hour;
+
+INSERT INTO landing VALUES ('2022-09-01 12:23:34', 42);
+INSERT INTO landing VALUES ('2022-09-01 12:23:34', 42),('2023-09-01 12:23:34', 42);
+
+SELECT '-- Landing';
+SELECT * FROM landing FINAL ORDER BY time;
+SELECT '-- MV';
+SELECT * FROM mv FINAL ORDER BY hour;
+
+SET max_insert_delayed_streams_for_parallel_write = 1;
+DROP TABLE IF EXISTS landing SYNC;
+DROP TABLE IF EXISTS mv SYNC;
+
+SELECT '-- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184';
+/*
+
+    This is a test to prevent regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 from happening again
+
+*/
+
+CREATE TABLE landing
+(
+    `time` DateTime,
+    `pk1` LowCardinality(String),
+    `pk2` LowCardinality(String),
+    `pk3` LowCardinality(String),
+    `pk4` String
+)
+ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/landing', '{replica}')
+ORDER BY (pk1, pk2, pk3, pk4);
+
+CREATE TABLE ds
+(
+    `pk1` LowCardinality(String),
+    `pk2` LowCardinality(String),
+    `pk3` LowCardinality(String),
+    `pk4` LowCardinality(String),
+    `occurences` AggregateFunction(count)
+)
+ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/ds', '{replica}')
+ORDER BY (pk1, pk2, pk3, pk4);
+
+CREATE MATERIALIZED VIEW mv TO ds AS
+SELECT
+    pk1,
+    pk2,
+    pk4,
+    pk3,
+    countState() AS occurences
+FROM landing
+GROUP BY pk1, pk2, pk4, pk3;
+
+INSERT INTO landing (time, pk1, pk2, pk4, pk3)
+VALUES ('2023-01-01 00:00:00','org-1','prod','login','user'),('2023-01-01 00:00:00','org-1','prod','login','user'),('2023-01-01 00:00:00','org-1','prod','login','user'),('2023-02-01 00:00:00','org-1','stage','login','user'),('2023-02-01 00:00:00','org-1','prod','login','account'),('2023-02-01 00:00:00','org-1','prod','checkout','user'),('2023-03-01 00:00:00','org-1','prod','login','account'),('2023-03-01 00:00:00','org-1','prod','login','account');
+
+SELECT '-- Landing (Agg/Replacing)MergeTree';
+SELECT
+    pk1,
+    pk2,
+    pk4,
+    pk3,
+    count() as occurences
+FROM landing
+GROUP BY pk1, pk2, pk4, pk3
+ORDER BY pk1, pk2, pk4, pk3;
+
+SELECT '--- MV';
+SELECT
+    pk1,
+    pk2,
+    pk4,
+    pk3,
+    countMerge(occurences) AS occurences
+FROM ds
+GROUP BY pk1, pk2, pk4, pk3
+ORDER BY pk1, pk2, pk4, pk3;
+
+DROP TABLE IF EXISTS landing SYNC;
+DROP TABLE IF EXISTS ds SYNC;
+DROP TABLE IF EXISTS mv SYNC;

From 684f3aa2407e024547bcbdf8f9e38c79acb3ca04 Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Sat, 11 Nov 2023 11:52:23 +0100
Subject: [PATCH 0351/1097] Improve test

---
 ...02912_ingestion_mv_deduplication.reference |  9 ++-
 .../02912_ingestion_mv_deduplication.sql      | 77 ++++++++++++++++---
 2 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference
index a22ecdd1d78..946897a4fe3 100644
--- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference
+++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference
@@ -5,12 +5,19 @@
 -- MV
 2022-09-01 12:00:00	84
 2023-09-01 12:00:00	42
--- Original issue with max_insert_delayed_streams_for_parallel_write > 1
+-- Original issue with deduplicate_blocks_in_dependent_materialized_views = 0 AND max_insert_delayed_streams_for_parallel_write > 1
 -- Landing
 2022-09-01 12:23:34	42
 2023-09-01 12:23:34	42
 -- MV
 2022-09-01 12:00:00	42
+-- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1
+-- Landing
+2022-09-01 12:23:34	42
+2023-09-01 12:23:34	42
+-- MV
+2022-09-01 12:00:00	42
+2023-09-01 12:00:00	42
 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184
 -- Landing (Agg/Replacing)MergeTree
 org-1	prod	checkout	user	1
diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql
index e445ac9822b..68901b67c91 100644
--- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql
+++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql
@@ -1,14 +1,17 @@
 -- Tags: replica
 SET session_timezone = 'UTC';
-SET deduplicate_blocks_in_dependent_materialized_views = 0;
 
 SELECT '-- Original issue with max_insert_delayed_streams_for_parallel_write = 1';
 /*
 
-    This is the expected behavior when mv deduplication is set to false. TODO: Add more details about what happened.
+    This is the expected behavior when mv deduplication is set to false.
+
+    - 1st insert works for landing and mv tables
+    - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table
+    - 2nd insert gets both blocks inserted in mv table
 
 */
-SET max_insert_delayed_streams_for_parallel_write = 1;
+SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1;
 
 CREATE TABLE landing
 (
@@ -39,13 +42,66 @@ SELECT * FROM mv FINAL ORDER BY hour;
 DROP TABLE IF EXISTS landing SYNC;
 DROP TABLE IF EXISTS mv SYNC;
 
-SELECT '-- Original issue with max_insert_delayed_streams_for_parallel_write > 1';
+SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_views = 0 AND max_insert_delayed_streams_for_parallel_write > 1';
 /*
 
-    This is the unexpected behavior. TODO: Add more details about what happened.
+    This is the unexpected behavior due to setting max_insert_delayed_streams_for_parallel_write > 1.
+
+    This unexpected behavior was present since version 21.9 or earlier but due to this PR https://github.com/ClickHouse/ClickHouse/pull/34780
+    when max_insert_delayed_streams_for_parallel_write setting it to 1 by default the issue was mitigated.
+
+    This is what happens:
+
+    - 1st insert works for landing and mv tables
+    - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table
+    - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded
 
 */
-SET max_insert_delayed_streams_for_parallel_write = 10;
+SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 10;
+
+CREATE TABLE landing
+(
+    time DateTime,
+    number Int64
+)
+Engine=ReplicatedReplacingMergeTree('/clickhouse/' || currentDatabase() || '/landing/{shard}/', '{replica}')
+PARTITION BY toYYYYMMDD(time)
+ORDER BY time;
+
+CREATE MATERIALIZED VIEW mv
+ENGINE = ReplicatedSummingMergeTree('/clickhouse/' || currentDatabase() || '/mv/{shard}/', '{replica}')
+PARTITION BY toYYYYMMDD(hour) ORDER BY hour
+AS SELECT
+    toStartOfHour(time) AS hour,
+    sum(number) AS sum_amount
+FROM landing
+GROUP BY hour;
+
+INSERT INTO landing VALUES ('2022-09-01 12:23:34', 42);
+INSERT INTO landing VALUES ('2022-09-01 12:23:34', 42),('2023-09-01 12:23:34', 42);
+
+SELECT '-- Landing';
+SELECT * FROM landing FINAL ORDER BY time;
+SELECT '-- MV';
+SELECT * FROM mv FINAL ORDER BY hour;
+
+SET max_insert_delayed_streams_for_parallel_write = 1;
+DROP TABLE IF EXISTS landing SYNC;
+DROP TABLE IF EXISTS mv SYNC;
+
+SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1';
+/*
+
+    By setting  deduplicate_blocks_in_dependent_materialized_views = 1 we can make the code go through a different path getting an expected
+    behavior again, even with max_insert_delayed_streams_for_parallel_write > 1.
+
+    This is what happens now:
+
+    - 1st insert works for landing and mv tables
+    - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables
+
+*/
+SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 10;
 
 CREATE TABLE landing
 (
@@ -80,7 +136,10 @@ DROP TABLE IF EXISTS mv SYNC;
 SELECT '-- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184';
 /*
 
-    This is a test to prevent regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 from happening again
+    This is a test to prevent regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 from happening again.
+
+    The PR was trying to fix the unexpected behavior when deduplicate_blocks_in_dependent_materialized_views = 0 AND
+    max_insert_delayed_streams_for_parallel_write > 1 but it ended up adding a new regression.
 
 */
 
@@ -92,7 +151,7 @@ CREATE TABLE landing
     `pk3` LowCardinality(String),
     `pk4` String
 )
-ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/landing', '{replica}')
+ENGINE = ReplicatedReplacingMergeTree('/clickhouse/' || currentDatabase() || '/landing/{shard}/', '{replica}')
 ORDER BY (pk1, pk2, pk3, pk4);
 
 CREATE TABLE ds
@@ -103,7 +162,7 @@ CREATE TABLE ds
     `pk4` LowCardinality(String),
     `occurences` AggregateFunction(count)
 )
-ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/ds', '{replica}')
+ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/' || currentDatabase() || '/ds/{shard}/', '{replica}')
 ORDER BY (pk1, pk2, pk3, pk4);
 
 CREATE MATERIALIZED VIEW mv TO ds AS

From 0de9f831920aab70e863bd2d30e82bf23dc8e835 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 00:23:09 +0100
Subject: [PATCH 0352/1097] Fix tidy

---
 .../AggregateFunctionVarianceMatrix.cpp          | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp b/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp
index f2f3309a474..81804b56221 100644
--- a/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp
+++ b/src/AggregateFunctions/AggregateFunctionVarianceMatrix.cpp
@@ -100,7 +100,6 @@ class AggregateFunctionVarianceMatrix final
     : public IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>
 {
 public:
-
     explicit AggregateFunctionVarianceMatrix(const DataTypes & argument_types_)
         : IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>(argument_types_, {}, createResultType())
     {}
@@ -111,16 +110,15 @@ public:
 
     String getName() const override
     {
-        if constexpr (Data::kind == StatisticsMatrixFunctionKind::covarPopMatrix)
-            return "covarPopMatrix";
-        if constexpr (Data::kind == StatisticsMatrixFunctionKind::covarSampMatrix)
-            return "covarSampMatrix";
-        if constexpr (Data::kind == StatisticsMatrixFunctionKind::corrMatrix)
-            return "corrMatrix";
-        UNREACHABLE();
+        switch (Data::kind)
+        {
+            case StatisticsMatrixFunctionKind::covarPopMatrix: return "covarPopMatrix";
+            case StatisticsMatrixFunctionKind::covarSampMatrix: return "covarSampMatrix";
+            case StatisticsMatrixFunctionKind::corrMatrix: return "corrMatrix";
+        }
     }
 
-    void create(AggregateDataPtr __restrict const place) const override
+    void create(AggregateDataPtr __restrict place) const override /// NOLINT
     {
         new (place) Data(this->argument_types.size());
     }

From 82b41f232ad397da6d92a13bca583ee1a3c8a847 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 00:51:48 +0100
Subject: [PATCH 0353/1097] Fix bad test CC @AVMusorin

---
 .../0_stateless/01555_system_distribution_queue_mask.sql      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
index 472e042a18b..3a90765226a 100644
--- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
+++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
@@ -18,7 +18,7 @@ system stop distributed sends dist_01555;
 insert into dist_01555 values (1)(2);
 -- since test_cluster_with_incorrect_pw contains incorrect password ignore error
 system flush distributed dist_01555; -- { serverError 516 }
-select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 5 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV;
+select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 3600 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV;
 
 drop table dist_01555;
 
@@ -31,7 +31,7 @@ create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect
 insert into dist_01555 values (1)(2);
 -- since test_cluster_with_incorrect_pw contains incorrect password ignore error
 system flush distributed dist_01555; -- { serverError 516 }
-select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 5 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV;
+select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 3600 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV;
 
 drop table dist_01555;
 

From 812b8c2bc16fb3c29beadafb3719ad760550b099 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 00:56:57 +0100
Subject: [PATCH 0354/1097] Fix test; delete strange file

---
 .../configs/users.d/0a_users_no_default_access.xml       | 9 ---------
 .../configs/users.d/users_no_default_access.xml          | 1 +
 2 files changed, 1 insertion(+), 9 deletions(-)
 delete mode 100644 tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml

diff --git a/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml b/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml
deleted file mode 100644
index b8f38f04ca9..00000000000
--- a/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-<clickhouse>
-    <users>
-        <default>
-            <password></password>
-            <profile>default</profile>
-            <quota>default</quota>
-        </default>
-    </users>
-</clickhouse>
diff --git a/tests/integration/test_named_collections/configs/users.d/users_no_default_access.xml b/tests/integration/test_named_collections/configs/users.d/users_no_default_access.xml
index b8f38f04ca9..dcac83188dc 100644
--- a/tests/integration/test_named_collections/configs/users.d/users_no_default_access.xml
+++ b/tests/integration/test_named_collections/configs/users.d/users_no_default_access.xml
@@ -4,6 +4,7 @@
             <password></password>
             <profile>default</profile>
             <quota>default</quota>
+            <named_collection_control>0</named_collection_control>
         </default>
     </users>
 </clickhouse>

From d2560f2197afb640196650baa624264d39cbd7b3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 02:42:51 +0100
Subject: [PATCH 0355/1097] Make clickhouse-local and clickhouse-client
 available under short names for usability

---
 programs/install/Install.cpp | 45 +++++++++++++++++++++++-------------
 programs/main.cpp            | 32 +++++++++++++++++--------
 2 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index e10a9fea86b..9d4d791263b 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -420,7 +420,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
 
         /// Create symlinks.
 
-        std::initializer_list<const char *> tools
+        std::initializer_list<std::string_view> tools
         {
             "clickhouse-server",
             "clickhouse-client",
@@ -435,6 +435,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
             "clickhouse-keeper",
             "clickhouse-keeper-converter",
             "clickhouse-disks",
+            "ch",
+            "chl",
+            "chc",
         };
 
         for (const auto & tool : tools)
@@ -444,29 +447,39 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
 
             if (fs::exists(symlink_path))
             {
-                bool is_symlink = FS::isSymlink(symlink_path);
-                fs::path points_to;
-                if (is_symlink)
-                    points_to = fs::weakly_canonical(FS::readSymlink(symlink_path));
-
-                if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path)))
+                /// Do not replace short named symlinks if they are already present in the system
+                /// to avoid collision with other tools.
+                if (!tool.starts_with("clickhouse"))
                 {
+                    fmt::print("Symlink {} already exists. Will keep it.\n", symlink_path.string());
                     need_to_create = false;
                 }
                 else
                 {
-                    if (!is_symlink)
+                    bool is_symlink = FS::isSymlink(symlink_path);
+                    fs::path points_to;
+                    if (is_symlink)
+                        points_to = fs::weakly_canonical(FS::readSymlink(symlink_path));
+
+                    if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path)))
                     {
-                        fs::path rename_path = symlink_path.replace_extension(".old");
-                        fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n",
-                                   symlink_path.string(), rename_path.string());
-                        fs::rename(symlink_path, rename_path);
+                        need_to_create = false;
                     }
-                    else if (points_to != main_bin_path)
+                    else
                     {
-                        fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n",
-                                   symlink_path.string(), points_to.string(), main_bin_path.string());
-                        fs::remove(symlink_path);
+                        if (!is_symlink)
+                        {
+                            fs::path rename_path = symlink_path.replace_extension(".old");
+                            fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n",
+                                       symlink_path.string(), rename_path.string());
+                            fs::rename(symlink_path, rename_path);
+                        }
+                        else if (points_to != main_bin_path)
+                        {
+                            fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n",
+                                       symlink_path.string(), points_to.string(), main_bin_path.string());
+                            fs::remove(symlink_path);
+                        }
                     }
                 }
             }
diff --git a/programs/main.cpp b/programs/main.cpp
index 5857e8d5ee4..959984d565d 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -2,15 +2,12 @@
 #include <csetjmp>
 #include <unistd.h>
 
-#ifdef OS_LINUX
-#include <sys/mman.h>
-#endif
-
 #include <new>
 #include <iostream>
 #include <vector>
 #include <string>
 #include <tuple>
+#include <string_view>
 #include <utility> /// pair
 
 #include <fmt/format.h>
@@ -22,7 +19,6 @@
 #include <Common/IO.h>
 
 #include <base/phdr_cache.h>
-#include <base/scope_guard.h>
 
 
 /// Universal executable for various clickhouse applications
@@ -98,7 +94,7 @@ using MainFunc = int (*)(int, char**);
 #if !defined(FUZZING_MODE)
 
 /// Add an item here to register new application
-std::pair<const char *, MainFunc> clickhouse_applications[] =
+std::pair<std::string_view, MainFunc> clickhouse_applications[] =
 {
 #if ENABLE_CLICKHOUSE_LOCAL
     {"local", mainEntryClickHouseLocal},
@@ -158,6 +154,18 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
 #endif
 };
 
+/// Add an item here to register a new short name
+std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
+{
+#if ENABLE_CLICKHOUSE_LOCAL
+    {"ch", "local"},
+    {"chl", "local"},
+#endif
+#if ENABLE_CLICKHOUSE_CLIENT
+    {"chc", "client"},
+#endif
+};
+
 int printHelp(int, char **)
 {
     std::cerr << "Use one of the following commands:" << std::endl;
@@ -387,15 +395,21 @@ void checkHarmfulEnvironmentVariables(char ** argv)
 
 }
 
-bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
+bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv)
 {
+    for (const auto & [alias, name] : clickhouse_short_names)
+        if (app_suffix == name
+            && !argv.empty() && (alias == argv[0] || endsWith(argv[0], "/" + std::string(alias))))
+            return true;
+
     /// Use app if the first arg 'app' is passed (the arg should be quietly removed)
     if (argv.size() >= 2)
     {
         auto first_arg = argv.begin() + 1;
 
         /// 'clickhouse --client ...' and 'clickhouse client ...' are Ok
-        if (*first_arg == "--" + app_suffix || *first_arg == app_suffix)
+        if (*first_arg == app_suffix
+            || (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix))
         {
             argv.erase(first_arg);
             return true;
@@ -403,7 +417,7 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
     }
 
     /// Use app if clickhouse binary is run through symbolic link with name clickhouse-app
-    std::string app_name = "clickhouse-" + app_suffix;
+    std::string app_name = "clickhouse-" + std::string(app_suffix);
     return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
 }
 

From ed0e3873a30bdf020e42b24a6e33fde109ab55f7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 02:45:25 +0100
Subject: [PATCH 0356/1097] Add a test

---
 docker/test/stateless/run.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 34fc12d1a72..136e2e2a409 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -19,6 +19,11 @@ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
 dpkg -i package_folder/clickhouse-server_*.deb
 dpkg -i package_folder/clickhouse-client_*.deb
 
+# Check that the tools are available under short names
+ch --query "SELECT 1" || exit 1
+chl --query "SELECT 1" || exit 1
+chc --version || exit 1
+
 ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
 
 # shellcheck disable=SC1091

From 67e4d22c56f56c34ac304de5f75b4d5e8f493d4d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 03:04:47 +0100
Subject: [PATCH 0357/1097] Fix typo

---
 docker/test/stateless/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 136e2e2a409..9951d79d6ac 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -67,7 +67,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then
     export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
 
     mkdir -p /var/run/clickhouse-server
-    # simpliest way to forward env variables to server
+    # simplest way to forward env variables to server
     sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid
 else
     sudo clickhouse start

From 6c30aa121cdc04d7c5be04968f948a9b5d150680 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 04:01:59 +0100
Subject: [PATCH 0358/1097] Collect information about binary sizes to the CI
 database

---
 tests/ci/build_check.py                       | 30 ++++++++++++-
 .../prepare-time-trace/prepare-time-trace.sh  | 42 +++++++++++++++++--
 2 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index c18abcf1191..51e601e2b34 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -433,8 +433,8 @@ FORMAT JSONCompactEachRow"""
         )
         profile_data_file = temp_path / "profile.json"
         with open(profile_data_file, "wb") as profile_fd:
-            for profile_sourse in profiles_dir.iterdir():
-                with open(profiles_dir / profile_sourse, "rb") as ps_fd:
+            for profile_source in profiles_dir.iterdir():
+                with open(profiles_dir / profile_source, "rb") as ps_fd:
                     profile_fd.write(ps_fd.read())
 
         logging.info(
@@ -445,6 +445,32 @@ FORMAT JSONCompactEachRow"""
         )
         ch_helper.insert_file(url, auth, query, profile_data_file)
 
+        query = f"""INSERT INTO binary_sizes
+(
+    pull_request_number,
+    commit_sha,
+    check_start_time,
+    check_name,
+    instance_type,
+    instance_id,
+    file,
+    size
+)
+SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', '{instance_id}', file, size
+FROM input('size UInt64, file String')
+FORMAT Regexp
+SETTINGS format_regexp = '^\\s*(\\d+) (.+)$'
+"""
+        binary_sizes_file = temp_path / "binary_sizes.txt"
+
+        logging.info(
+            "::notice ::Log Uploading binary sizes data, path: %s, size: %s, query: %s",
+            binary_sizes_file,
+            binary_sizes_file.stat().st_size,
+            query,
+        )
+        ch_helper.insert_file(url, auth, query, binary_sizes_file)
+
     # Upload statistics to CI database
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/utils/prepare-time-trace/prepare-time-trace.sh b/utils/prepare-time-trace/prepare-time-trace.sh
index 7cacdec8c94..33f21878d99 100755
--- a/utils/prepare-time-trace/prepare-time-trace.sh
+++ b/utils/prepare-time-trace/prepare-time-trace.sh
@@ -11,15 +11,22 @@
 <<///
 CREATE TABLE build_time_trace
 (
-    -- extra columns here
+    -- Extra columns:
+    pull_request_number UInt32,
+    commit_sha String,
+    check_start_time DateTime,
+    check_name LowCardinality(String),
+    instance_type LowCardinality(String),
+    instance_id String,
+
+    -- Normal columns:
     file String,
     library LowCardinality(String),
     date Date DEFAULT toDate(time),
     time DateTime64(6),
-
     pid UInt32,
     tid UInt32,
-    ph Enum8('B', 'E', 'X', 'i', 'I', 'C', 'b', 'n', 'e', 'S', 'T', 'p', 'F', 's', 't', 'f', 'P', 'N', 'O', 'D', 'M'),
+    ph Enum8('B' = 1, 'E' = 2, 'X' = 3, 'i' = 4, 'I' = 5, 'C' = 6, 'b' = 7, 'n' = 8, 'e' = 9, 'S' = 10, 'T' = 11, 'p' = 12, 'F' = 13, 's' = 14, 't' = 15, 'f' = 16, 'P' = 17, 'N' = 18, 'O' = 19, 'D' = 20, 'M' = 21),
     ts UInt64,
     dur UInt64,
     cat LowCardinality(String),
@@ -30,7 +37,8 @@ CREATE TABLE build_time_trace
     args_name LowCardinality(String),
     is_total Bool DEFAULT name LIKE 'Total %'
 )
-ENGINE = MergeTree ORDER BY (date, file, name, args_name);
+ENGINE = MergeTree
+ORDER BY (date, file, name, args_name);
 ///
 
 INPUT_DIR=$1
@@ -48,3 +56,29 @@ find "$INPUT_DIR" -name '*.json' | grep -P '\.(c|cpp|cc|cxx)\.json$' | xargs -P
 # Now you can upload it as follows:
 
 #cat "$OUTPUT_DIR"/* | clickhouse-client --progress --query "INSERT INTO build_time_trace (extra_column_names, file, library, time, pid, tid, ph, ts, dur, cat, name, detail, count, avgMs, args_name) FORMAT JSONCompactEachRow"
+
+# Additionally, collect information about the sizes of translation units
+
+<<///
+CREATE TABLE binary_sizes
+(
+    -- Extra columns:
+    pull_request_number UInt32,
+    commit_sha String,
+    check_start_time DateTime,
+    check_name LowCardinality(String),
+    instance_type LowCardinality(String),
+    instance_id String,
+
+    -- Normal columns:
+    file LowCardinality(String),
+    library LowCardinality(String) DEFAULT extract(file, 'CMakeFiles/([^/]+)\.dir/'),
+    size UInt64,
+    date Date DEFAULT toDate(time),
+    time DateTime64(6) DEFAULT now64()
+)
+ENGINE = MergeTree
+ORDER BY (date, file, pull_request_number, commit_sha, check_name);
+///
+
+find "$INPUT_DIR" -type f -executable -or -name '*.o' -or -name '*.a' | grep -v cargo | xargs wc -c | grep -v 'total' > "${OUTPUT_DIR}/binary_sizes.txt"

From 63a167105465d4f7bc06e8269139dc20978f4ff6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 04:04:09 +0100
Subject: [PATCH 0359/1097] Collect information about binary sizes to the CI
 database

---
 utils/prepare-time-trace/prepare-time-trace.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/prepare-time-trace/prepare-time-trace.sh b/utils/prepare-time-trace/prepare-time-trace.sh
index 33f21878d99..7e585db2000 100755
--- a/utils/prepare-time-trace/prepare-time-trace.sh
+++ b/utils/prepare-time-trace/prepare-time-trace.sh
@@ -26,7 +26,7 @@ CREATE TABLE build_time_trace
     time DateTime64(6),
     pid UInt32,
     tid UInt32,
-    ph Enum8('B' = 1, 'E' = 2, 'X' = 3, 'i' = 4, 'I' = 5, 'C' = 6, 'b' = 7, 'n' = 8, 'e' = 9, 'S' = 10, 'T' = 11, 'p' = 12, 'F' = 13, 's' = 14, 't' = 15, 'f' = 16, 'P' = 17, 'N' = 18, 'O' = 19, 'D' = 20, 'M' = 21),
+    ph Enum8('B', 'E', 'X', 'i', 'I', 'C', 'b', 'n', 'e', 'S', 'T', 'p', 'F', 's', 't', 'f', 'P', 'N', 'O', 'D', 'M'),
     ts UInt64,
     dur UInt64,
     cat LowCardinality(String),

From 84bd67dd306a5ae82a0a0aa4daaddab61beec69f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 04:23:05 +0100
Subject: [PATCH 0360/1097] Parallelize compilation for RISC-V

---
 .../AggregateFunctionUniqCombined.cpp         | 277 +---------------
 .../AggregateFunctionUniqCombined.h           | 299 ++++++++++++++++++
 .../AggregateFunctionUniqCombined12.cpp       |   6 +
 .../AggregateFunctionUniqCombined13.cpp       |   6 +
 .../AggregateFunctionUniqCombined14.cpp       |   6 +
 .../AggregateFunctionUniqCombined15.cpp       |   6 +
 .../AggregateFunctionUniqCombined16.cpp       |   6 +
 .../AggregateFunctionUniqCombined17.cpp       |   6 +
 .../AggregateFunctionUniqCombined18.cpp       |   6 +
 .../AggregateFunctionUniqCombined19.cpp       |   6 +
 .../AggregateFunctionUniqCombined20.cpp       |   6 +
 11 files changed, 354 insertions(+), 276 deletions(-)
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined.h
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined12.cpp
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined13.cpp
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined14.cpp
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined15.cpp
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined16.cpp
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined17.cpp
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined18.cpp
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined19.cpp
 create mode 100644 src/AggregateFunctions/AggregateFunctionUniqCombined20.cpp

diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
index 861312ec503..cb298a39ce3 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
@@ -1,286 +1,11 @@
-#include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/Helpers.h>
-
-#include <Common/FieldVisitorConvertToNumber.h>
-
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDate32.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypeIPv4andIPv6.h>
-
-#include <base/bit_cast.h>
-
-#include <Common/CombinedCardinalityEstimator.h>
-#include <Common/SipHash.h>
-#include <Common/typeid_cast.h>
-#include <Common/assert_cast.h>
-
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeUUID.h>
-#include <DataTypes/DataTypesNumber.h>
-
-#include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/UniqCombinedBiasData.h>
-#include <AggregateFunctions/UniqVariadicHash.h>
-
-#include <Columns/ColumnVector.h>
-
-#include <functional>
-
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
 
 namespace DB
 {
 
-struct Settings;
-
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int ARGUMENT_OUT_OF_BOUND;
-}
-
 namespace
 {
 
-// Unlike HashTableGrower always grows to power of 2.
-struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<>
-{
-    void increaseSize() { increaseSizeDegree(1); }
-};
-
-
-template <typename T, UInt8 K, typename HashValueType>
-struct AggregateFunctionUniqCombinedData
-{
-    using Key = std::conditional_t<
-        std::is_same_v<T, String> || std::is_same_v<T, IPv6>,
-        UInt64,
-        HashValueType>;
-
-    // TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|,
-    //                   at the moment gen-bias-data.py script doesn't work.
-
-    // We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
-    // The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
-    // For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
-
-    /// Note: I don't recall what is special with '17' - probably it is one of the original functions that has to be compatible.
-    using Set = CombinedCardinalityEstimator<
-        Key,
-        HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
-        16,
-        K - 5 + (sizeof(Key) == sizeof(UInt32)),
-        K,
-        TrivialHash,
-        Key,
-        std::conditional_t<K == 17, HyperLogLogBiasEstimator<UniqCombinedBiasData>, TrivialBiasEstimator>,
-        HyperLogLogMode::FullFeatured>;
-
-    Set set;
-};
-
-
-template <typename T, UInt8 K, typename HashValueType>
-class AggregateFunctionUniqCombined final
-    : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>
-{
-public:
-    AggregateFunctionUniqCombined(const DataTypes & argument_types_, const Array & params_)
-        : IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>(argument_types_, params_, std::make_shared<DataTypeUInt64>())
-    {}
-
-    String getName() const override
-    {
-        if constexpr (std::is_same_v<HashValueType, UInt64>)
-            return "uniqCombined64";
-        else
-            return "uniqCombined";
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        if constexpr (std::is_same_v<T, String> || std::is_same_v<T, IPv6>)
-        {
-            StringRef value = columns[0]->getDataAt(row_num);
-            this->data(place).set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
-        }
-        else
-        {
-            const auto & value = assert_cast<const ColumnVector<T> &>(*columns[0]).getElement(row_num);
-
-            HashValueType hash;
-
-            if constexpr (std::is_same_v<T, UInt128>)
-            {
-                /// This specialization exists due to historical circumstances.
-                /// Initially UInt128 was introduced only for UUID, and then the other big-integer types were added.
-                hash = static_cast<HashValueType>(sipHash64(value));
-            }
-            else if constexpr (std::is_floating_point_v<T>)
-            {
-                hash = static_cast<HashValueType>(intHash64(bit_cast<UInt64>(value)));
-            }
-            else if constexpr (sizeof(T) > sizeof(UInt64))
-            {
-                hash = static_cast<HashValueType>(DefaultHash64<T>(value));
-            }
-            else
-            {
-                /// This specialization exists also for compatibility with the initial implementation.
-                hash = static_cast<HashValueType>(intHash64(value));
-            }
-
-            this->data(place).set.insert(hash);
-        }
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).set.merge(this->data(rhs).set);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).set.write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
-    {
-        this->data(place).set.read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
-    }
-};
-
-/** For multiple arguments. To compute, hashes them.
-  * You can pass multiple arguments as is; You can also pass one argument - a tuple.
-  * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples.
-  */
-template <bool is_exact, bool argument_is_tuple, UInt8 K, typename HashValueType>
-class AggregateFunctionUniqCombinedVariadic final : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
-                                                           AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>
-{
-private:
-    size_t num_args = 0;
-
-public:
-    explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
-            AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>(arguments, params, std::make_shared<DataTypeUInt64>())
-    {
-        if (argument_is_tuple)
-            num_args = typeid_cast<const DataTypeTuple &>(*arguments[0]).getElements().size();
-        else
-            num_args = arguments.size();
-    }
-
-    String getName() const override
-    {
-        if constexpr (std::is_same_v<HashValueType, UInt64>)
-            return "uniqCombined64";
-        else
-            return "uniqCombined";
-    }
-
-    bool allocatesMemoryInArena() const override { return false; }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        this->data(place).set.insert(typename AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>::Set::value_type(
-            UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
-    }
-
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
-    {
-        this->data(place).set.merge(this->data(rhs).set);
-    }
-
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
-    {
-        this->data(place).set.write(buf);
-    }
-
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
-    {
-        this->data(place).set.read(buf);
-    }
-
-    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
-    {
-        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
-    }
-};
-
-
-template <UInt8 K, typename HashValueType>
-struct WithK
-{
-    template <typename T>
-    using AggregateFunction = AggregateFunctionUniqCombined<T, K, HashValueType>;
-
-    template <bool is_exact, bool argument_is_tuple>
-    using AggregateFunctionVariadic = AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>;
-};
-
-template <UInt8 K, typename HashValueType>
-AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types, const Array & params)
-{
-    /// We use exact hash function if the arguments are not contiguous in memory, because only exact hash function has support for this case.
-    bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types);
-
-    if (argument_types.size() == 1)
-    {
-        const IDataType & argument_type = *argument_types[0];
-
-        AggregateFunctionPtr res(createWithNumericType<WithK<K, HashValueType>::template AggregateFunction>(*argument_types[0], argument_types, params));
-
-        WhichDataType which(argument_type);
-        if (res)
-            return res;
-        else if (which.isDate())
-            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate::FieldType>>(argument_types, params);
-        else if (which.isDate32())
-            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate32::FieldType>>(argument_types, params);
-        else if (which.isDateTime())
-            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDateTime::FieldType>>(argument_types, params);
-        else if (which.isStringOrFixedString())
-            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<String>>(argument_types, params);
-        else if (which.isUUID())
-            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeUUID::FieldType>>(argument_types, params);
-        else if (which.isIPv4())
-            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeIPv4::FieldType>>(argument_types, params);
-        else if (which.isIPv6())
-            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeIPv6::FieldType>>(argument_types, params);
-        else if (which.isTuple())
-        {
-            if (use_exact_hash_function)
-                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<true, true>>(argument_types, params);
-            else
-                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<false, true>>(argument_types, params);
-        }
-    }
-
-    /// "Variadic" method also works as a fallback generic case for a single argument.
-    if (use_exact_hash_function)
-        return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<true, false>>(argument_types, params);
-    else
-        return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<false, false>>(argument_types, params);
-}
-
-template <UInt8 K>
-AggregateFunctionPtr createAggregateFunctionWithHashType(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params)
-{
-    if (use_64_bit_hash)
-        return createAggregateFunctionWithK<K, UInt64>(argument_types, params);
-    else
-        return createAggregateFunctionWithK<K, UInt32>(argument_types, params);
-}
-
 AggregateFunctionPtr createAggregateFunctionUniqCombined(bool use_64_bit_hash,
     const std::string & name, const DataTypes & argument_types, const Array & params)
 {
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h
new file mode 100644
index 00000000000..ec60ffd140e
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h
@@ -0,0 +1,299 @@
+#pragma once
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/Helpers.h>
+
+#include <Common/FieldVisitorConvertToNumber.h>
+
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDate32.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeIPv4andIPv6.h>
+
+#include <base/bit_cast.h>
+
+#include <Common/CombinedCardinalityEstimator.h>
+#include <Common/SipHash.h>
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/UniqCombinedBiasData.h>
+#include <AggregateFunctions/UniqVariadicHash.h>
+
+#include <Columns/ColumnVector.h>
+
+#include <functional>
+
+
+namespace DB
+{
+
+struct Settings;
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ARGUMENT_OUT_OF_BOUND;
+}
+
+// Unlike HashTableGrower always grows to power of 2.
+struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<>
+{
+    void increaseSize() { increaseSizeDegree(1); }
+};
+
+namespace
+{
+
+template <typename T, UInt8 K, typename HashValueType>
+struct AggregateFunctionUniqCombinedData
+{
+    using Key = std::conditional_t<
+        std::is_same_v<T, String> || std::is_same_v<T, IPv6>,
+        UInt64,
+        HashValueType>;
+
+    // TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|,
+    //                   at the moment gen-bias-data.py script doesn't work.
+
+    // We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
+    // The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
+    // For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
+
+    /// Note: I don't recall what is special with '17' - probably it is one of the original functions that has to be compatible.
+    using Set = CombinedCardinalityEstimator<
+        Key,
+        HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
+        16,
+        K - 5 + (sizeof(Key) == sizeof(UInt32)),
+        K,
+        TrivialHash,
+        Key,
+        std::conditional_t<K == 17, HyperLogLogBiasEstimator<UniqCombinedBiasData>, TrivialBiasEstimator>,
+        HyperLogLogMode::FullFeatured>;
+
+    Set set;
+};
+
+
+template <typename T, UInt8 K, typename HashValueType>
+class AggregateFunctionUniqCombined final
+    : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>
+{
+public:
+    AggregateFunctionUniqCombined(const DataTypes & argument_types_, const Array & params_)
+        : IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>(argument_types_, params_, std::make_shared<DataTypeUInt64>())
+    {}
+
+    String getName() const override
+    {
+        if constexpr (std::is_same_v<HashValueType, UInt64>)
+            return "uniqCombined64";
+        else
+            return "uniqCombined";
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        if constexpr (std::is_same_v<T, String> || std::is_same_v<T, IPv6>)
+        {
+            StringRef value = columns[0]->getDataAt(row_num);
+            this->data(place).set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
+        }
+        else
+        {
+            const auto & value = assert_cast<const ColumnVector<T> &>(*columns[0]).getElement(row_num);
+
+            HashValueType hash;
+
+            if constexpr (std::is_same_v<T, UInt128>)
+            {
+                /// This specialization exists due to historical circumstances.
+                /// Initially UInt128 was introduced only for UUID, and then the other big-integer types were added.
+                hash = static_cast<HashValueType>(sipHash64(value));
+            }
+            else if constexpr (std::is_floating_point_v<T>)
+            {
+                hash = static_cast<HashValueType>(intHash64(bit_cast<UInt64>(value)));
+            }
+            else if constexpr (sizeof(T) > sizeof(UInt64))
+            {
+                hash = static_cast<HashValueType>(DefaultHash64<T>(value));
+            }
+            else
+            {
+                /// This specialization exists also for compatibility with the initial implementation.
+                hash = static_cast<HashValueType>(intHash64(value));
+            }
+
+            this->data(place).set.insert(hash);
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).set.merge(this->data(rhs).set);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).set.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).set.read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
+    }
+};
+
+/** For multiple arguments. To compute, hashes them.
+  * You can pass multiple arguments as is; You can also pass one argument - a tuple.
+  * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples.
+  */
+template <bool is_exact, bool argument_is_tuple, UInt8 K, typename HashValueType>
+class AggregateFunctionUniqCombinedVariadic final : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
+                                                           AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>
+{
+private:
+    size_t num_args = 0;
+
+public:
+    explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
+            AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>(arguments, params, std::make_shared<DataTypeUInt64>())
+    {
+        if (argument_is_tuple)
+            num_args = typeid_cast<const DataTypeTuple &>(*arguments[0]).getElements().size();
+        else
+            num_args = arguments.size();
+    }
+
+    String getName() const override
+    {
+        if constexpr (std::is_same_v<HashValueType, UInt64>)
+            return "uniqCombined64";
+        else
+            return "uniqCombined";
+    }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        this->data(place).set.insert(typename AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>::Set::value_type(
+            UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).set.merge(this->data(rhs).set);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).set.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena *) const override
+    {
+        this->data(place).set.read(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
+    }
+};
+
+
+template <UInt8 K, typename HashValueType>
+struct WithK
+{
+    template <typename T>
+    using AggregateFunction = AggregateFunctionUniqCombined<T, K, HashValueType>;
+
+    template <bool is_exact, bool argument_is_tuple>
+    using AggregateFunctionVariadic = AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>;
+};
+
+template <UInt8 K, typename HashValueType>
+AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types, const Array & params)
+{
+    /// We use exact hash function if the arguments are not contiguous in memory, because only exact hash function has support for this case.
+    bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types);
+
+    if (argument_types.size() == 1)
+    {
+        const IDataType & argument_type = *argument_types[0];
+
+        AggregateFunctionPtr res(createWithNumericType<WithK<K, HashValueType>::template AggregateFunction>(*argument_types[0], argument_types, params));
+
+        WhichDataType which(argument_type);
+        if (res)
+            return res;
+        else if (which.isDate())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate::FieldType>>(argument_types, params);
+        else if (which.isDate32())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate32::FieldType>>(argument_types, params);
+        else if (which.isDateTime())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDateTime::FieldType>>(argument_types, params);
+        else if (which.isStringOrFixedString())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<String>>(argument_types, params);
+        else if (which.isUUID())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeUUID::FieldType>>(argument_types, params);
+        else if (which.isIPv4())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeIPv4::FieldType>>(argument_types, params);
+        else if (which.isIPv6())
+            return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeIPv6::FieldType>>(argument_types, params);
+        else if (which.isTuple())
+        {
+            if (use_exact_hash_function)
+                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<true, true>>(argument_types, params);
+            else
+                return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<false, true>>(argument_types, params);
+        }
+    }
+
+    /// "Variadic" method also works as a fallback generic case for a single argument.
+    if (use_exact_hash_function)
+        return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<true, false>>(argument_types, params);
+    else
+        return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunctionVariadic<false, false>>(argument_types, params);
+}
+
+}
+
+template <UInt8 K>
+AggregateFunctionPtr createAggregateFunctionWithHashType(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params)
+{
+    if (use_64_bit_hash)
+        return createAggregateFunctionWithK<K, UInt64>(argument_types, params);
+    else
+        return createAggregateFunctionWithK<K, UInt32>(argument_types, params);
+}
+
+/// Let's instantiate these templates in separate translation units,
+/// otherwise this translation unit becomes too large.
+extern template AggregateFunctionPtr createAggregateFunctionWithHashType<12>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+extern template AggregateFunctionPtr createAggregateFunctionWithHashType<13>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+extern template AggregateFunctionPtr createAggregateFunctionWithHashType<14>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+extern template AggregateFunctionPtr createAggregateFunctionWithHashType<15>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+extern template AggregateFunctionPtr createAggregateFunctionWithHashType<16>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+extern template AggregateFunctionPtr createAggregateFunctionWithHashType<17>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+extern template AggregateFunctionPtr createAggregateFunctionWithHashType<18>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+extern template AggregateFunctionPtr createAggregateFunctionWithHashType<19>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+extern template AggregateFunctionPtr createAggregateFunctionWithHashType<20>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+
+}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined12.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined12.cpp
new file mode 100644
index 00000000000..ac4b1f21951
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined12.cpp
@@ -0,0 +1,6 @@
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
+
+namespace DB
+{
+template AggregateFunctionPtr createAggregateFunctionWithHashType<12>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined13.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined13.cpp
new file mode 100644
index 00000000000..96a7340cb32
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined13.cpp
@@ -0,0 +1,6 @@
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
+
+namespace DB
+{
+template AggregateFunctionPtr createAggregateFunctionWithHashType<13>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined14.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined14.cpp
new file mode 100644
index 00000000000..92d316e1b57
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined14.cpp
@@ -0,0 +1,6 @@
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
+
+namespace DB
+{
+template AggregateFunctionPtr createAggregateFunctionWithHashType<14>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined15.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined15.cpp
new file mode 100644
index 00000000000..aab81ee8f4e
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined15.cpp
@@ -0,0 +1,6 @@
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
+
+namespace DB
+{
+template AggregateFunctionPtr createAggregateFunctionWithHashType<15>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined16.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined16.cpp
new file mode 100644
index 00000000000..b1d572a5092
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined16.cpp
@@ -0,0 +1,6 @@
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
+
+namespace DB
+{
+template AggregateFunctionPtr createAggregateFunctionWithHashType<16>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined17.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined17.cpp
new file mode 100644
index 00000000000..d968437080b
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined17.cpp
@@ -0,0 +1,6 @@
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
+
+namespace DB
+{
+template AggregateFunctionPtr createAggregateFunctionWithHashType<17>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined18.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined18.cpp
new file mode 100644
index 00000000000..8940e021cfd
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined18.cpp
@@ -0,0 +1,6 @@
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
+
+namespace DB
+{
+template AggregateFunctionPtr createAggregateFunctionWithHashType<18>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined19.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined19.cpp
new file mode 100644
index 00000000000..58c2ee90bf1
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined19.cpp
@@ -0,0 +1,6 @@
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
+
+namespace DB
+{
+template AggregateFunctionPtr createAggregateFunctionWithHashType<19>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+}
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined20.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined20.cpp
new file mode 100644
index 00000000000..87e530a4c1b
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined20.cpp
@@ -0,0 +1,6 @@
+#include <AggregateFunctions/AggregateFunctionUniqCombined.h>
+
+namespace DB
+{
+template AggregateFunctionPtr createAggregateFunctionWithHashType<20>(bool use_64_bit_hash, const DataTypes & argument_types, const Array & params);
+}

From faee91a82047a02868e311056d686fa9778bf41d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 04:36:50 +0100
Subject: [PATCH 0361/1097] Fix style

---
 src/AggregateFunctions/AggregateFunctionUniqCombined.cpp | 6 ++++++
 src/AggregateFunctions/AggregateFunctionUniqCombined.h   | 5 -----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
index cb298a39ce3..89e0a77f45c 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
@@ -3,6 +3,12 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ARGUMENT_OUT_OF_BOUND;
+}
+
 namespace
 {
 
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h
index ec60ffd140e..10774442610 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h
@@ -35,11 +35,6 @@ namespace DB
 
 struct Settings;
 
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int ARGUMENT_OUT_OF_BOUND;
-}
 
 // Unlike HashTableGrower always grows to power of 2.
 struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<>

From dd90aac732cce06f51e996691e9d3bb95a62ebde Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Sun, 12 Nov 2023 11:51:19 +0800
Subject: [PATCH 0362/1097] Empty commit


From fb8f9f5feda244f7607a27a88b173ac964c83ee5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 04:52:49 +0100
Subject: [PATCH 0363/1097] Fix error

---
 tests/ci/build_check.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index 51e601e2b34..4c0b9964db7 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -434,8 +434,9 @@ FORMAT JSONCompactEachRow"""
         profile_data_file = temp_path / "profile.json"
         with open(profile_data_file, "wb") as profile_fd:
             for profile_source in profiles_dir.iterdir():
-                with open(profiles_dir / profile_source, "rb") as ps_fd:
-                    profile_fd.write(ps_fd.read())
+                if profile_source != 'binary_sizes.txt':
+                    with open(profiles_dir / profile_source, "rb") as ps_fd:
+                        profile_fd.write(ps_fd.read())
 
         logging.info(
             "::notice ::Log Uploading profile data, path: %s, size: %s, query: %s",

From c74aea08b222338f1d91eee35cfcaec0875f1add Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sun, 12 Nov 2023 04:10:47 +0000
Subject: [PATCH 0364/1097] Automatic style fix

---
 tests/ci/build_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index 4c0b9964db7..5802280429b 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -434,7 +434,7 @@ FORMAT JSONCompactEachRow"""
         profile_data_file = temp_path / "profile.json"
         with open(profile_data_file, "wb") as profile_fd:
             for profile_source in profiles_dir.iterdir():
-                if profile_source != 'binary_sizes.txt':
+                if profile_source != "binary_sizes.txt":
                     with open(profiles_dir / profile_source, "rb") as ps_fd:
                         profile_fd.write(ps_fd.read())
 

From 345f3935ff395a29842d510d6dc9d0720fb7f5fb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 06:39:16 +0100
Subject: [PATCH 0365/1097] Something

---
 src/AggregateFunctions/AggregateFunctionSumMap.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index 9dcca2bea77..8a203ccdc48 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -219,6 +219,18 @@ public:
                 auto value = value_column[values_vec_offset + i];
                 Field key = key_column[keys_vec_offset + i];
 
+                /// Compatibility with previous versions.
+                if (key.getType() == Field::Types::Decimal32)
+                {
+                    auto source = key.get<DecimalField<Decimal32>>();
+                    key = DecimalField<Decimal128>(source.getValue(), source.getScale());
+                }
+                else if (key.getType() == Field::Types::Decimal64)
+                {
+                    auto source = key.get<DecimalField<Decimal64>>();
+                    key = DecimalField<Decimal128>(source.getValue(), source.getScale());
+                }
+
                 if (!keepKey(key))
                     continue;
 

From f77ca207200e3d065c768036fd10521eaac875fc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 06:39:53 +0100
Subject: [PATCH 0366/1097] Miscellaneous

---
 src/Columns/ColumnDecimal.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index 012ae03bab2..baccfc69147 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -6,9 +6,7 @@
 #include <Common/HashTable/Hash.h>
 #include <Common/RadixSort.h>
 
-#include <base/unaligned.h>
 #include <base/sort.h>
-#include <base/scope_guard.h>
 
 #include <IO/WriteHelpers.h>
 
@@ -20,8 +18,6 @@
 #include <Processors/Transforms/ColumnGathererTransform.h>
 
 
-template <typename T> bool decimalLess(T x, T y, UInt32 x_scale, UInt32 y_scale);
-
 namespace DB
 {
 

From e6149cb1f0e01f4e0fd3c91cdbb14eb61d92b4c9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 06:44:43 +0100
Subject: [PATCH 0367/1097] Fix idiotic behavior of sumMap aggregate function

---
 .../AggregateFunctionSumMap.cpp               | 26 +++++++++----------
 .../0_stateless/02913_sum_map_state.reference |  2 +-
 .../0_stateless/02913_sum_map_state.sql       |  1 +
 3 files changed, 15 insertions(+), 14 deletions(-)
 create mode 100644 tests/queries/0_stateless/02913_sum_map_state.sql

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index 8a203ccdc48..6a2f4209e60 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -216,24 +216,24 @@ public:
             // Insert column values for all keys
             for (size_t i = 0; i < keys_vec_size; ++i)
             {
-                auto value = value_column[values_vec_offset + i];
+                Field value = value_column[values_vec_offset + i];
                 Field key = key_column[keys_vec_offset + i];
 
-                /// Compatibility with previous versions.
-                if (key.getType() == Field::Types::Decimal32)
-                {
-                    auto source = key.get<DecimalField<Decimal32>>();
-                    key = DecimalField<Decimal128>(source.getValue(), source.getScale());
-                }
-                else if (key.getType() == Field::Types::Decimal64)
-                {
-                    auto source = key.get<DecimalField<Decimal64>>();
-                    key = DecimalField<Decimal128>(source.getValue(), source.getScale());
-                }
-
                 if (!keepKey(key))
                     continue;
 
+                /// Compatibility with previous versions.
+                if (value.getType() == Field::Types::Decimal32)
+                {
+                    auto source = value.get<DecimalField<Decimal32>>();
+                    value = DecimalField<Decimal128>(source.getValue(), source.getScale());
+                }
+                else if (value.getType() == Field::Types::Decimal64)
+                {
+                    auto source = value.get<DecimalField<Decimal64>>();
+                    value = DecimalField<Decimal128>(source.getValue(), source.getScale());
+                }
+
                 auto [it, inserted] = merged_maps.emplace(key, Array());
 
                 if (inserted)
diff --git a/tests/queries/0_stateless/02913_sum_map_state.reference b/tests/queries/0_stateless/02913_sum_map_state.reference
index 98e4f7a2f3c..bfe24f39796 100644
--- a/tests/queries/0_stateless/02913_sum_map_state.reference
+++ b/tests/queries/0_stateless/02913_sum_map_state.reference
@@ -1 +1 @@
-0200000000010000000000000053542827302E312701000000020000000000000053542827302E3127
+0200000000010000000000000000000000000000000100000002000000000000000000000000000000
diff --git a/tests/queries/0_stateless/02913_sum_map_state.sql b/tests/queries/0_stateless/02913_sum_map_state.sql
new file mode 100644
index 00000000000..9f4fd27bb2d
--- /dev/null
+++ b/tests/queries/0_stateless/02913_sum_map_state.sql
@@ -0,0 +1 @@
+SELECT hex(sumMappedArraysState([CAST('0.1', 'Decimal(3)'), CAST('1', 'Decimal(3)')], [CAST('1.2', 'Decimal(3)'), CAST('2', 'Decimal(3)')]));

From ce668e44919458e66f1e18a18be1a58b478b5582 Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Sun, 12 Nov 2023 14:45:17 +0800
Subject: [PATCH 0368/1097] Enable --secure flag for clickhouse-client for
 hostnames pointing to clickhouse cloud

---
 src/Client/ConnectionParameters.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp
index 3942527e5b5..34893dd6e0c 100644
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@@ -30,7 +30,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
     , port(connection_port.value_or(getPortFromConfig(config)))
 {
     bool is_secure = config.getBool("secure", false);
-    security = is_secure ? Protocol::Secure::Enable : Protocol::Secure::Disable;
+    bool is_clickhouse_cloud = connection_host.ends_with(".clickhouse.cloud");
+    security = (is_secure || is_clickhouse_cloud) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
 
     default_database = config.getString("database", "");
 

From 3cb41bf0f2e0dd9a0bf109a3d39deece89ff0ab1 Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Sun, 12 Nov 2023 15:00:10 +0800
Subject: [PATCH 0369/1097] Enable --secure flag for clickhouse-client for
 hostnames pointing to clickhouse cloud

---
 src/Client/ConnectionParameters.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp
index 34893dd6e0c..b8669c72cf7 100644
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@@ -30,7 +30,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
     , port(connection_port.value_or(getPortFromConfig(config)))
 {
     bool is_secure = config.getBool("secure", false);
-    bool is_clickhouse_cloud = connection_host.ends_with(".clickhouse.cloud");
+    bool is_clickhouse_cloud = connection_host.ends_with(".clickhouse.cloud") || connection_host.ends_with(".clickhouse-staging.com");
     security = (is_secure || is_clickhouse_cloud) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
 
     default_database = config.getString("database", "");

From 17669111fc5bb5f3117c1303f53ad7c703a4a863 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 12 Nov 2023 08:07:14 +0100
Subject: [PATCH 0370/1097] Python

---
 tests/ci/build_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index 5802280429b..d1aedb07b5b 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -434,7 +434,7 @@ FORMAT JSONCompactEachRow"""
         profile_data_file = temp_path / "profile.json"
         with open(profile_data_file, "wb") as profile_fd:
             for profile_source in profiles_dir.iterdir():
-                if profile_source != "binary_sizes.txt":
+                if profile_source.name != "binary_sizes.txt":
                     with open(profiles_dir / profile_source, "rb") as ps_fd:
                         profile_fd.write(ps_fd.read())
 

From a511f41438f41192cc038b3a6c633fafe9b8090d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 12 Nov 2023 10:48:27 +0000
Subject: [PATCH 0371/1097] Docs: Fix overly long quote which pushed the right
 pane out of view

---
 docs/en/sql-reference/functions/rounding-functions.md | 6 +-----
 docs/en/sql-reference/functions/tuple-functions.md    | 3 ++-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index e9a0ed72466..84839c2489c 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -107,11 +107,7 @@ round(3.65, 1) = 3.6
 
 Rounds a number to a specified decimal position.
 
-- If the rounding number is halfway between two numbers, the function uses banker’s rounding.
-
-        Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2.
-
-        It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
+- If the rounding number is halfway between two numbers, the function uses banker’s rounding. Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2. It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
 
 - In other cases, the function rounds numbers to the nearest integer.
 
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index 2f7c6377ee1..5930239dc56 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -171,7 +171,8 @@ Result:
 Can be used with [MinHash](../../sql-reference/functions/hash-functions.md#ngramminhash) functions for detection of semi-duplicate strings:
 
 ``` sql
-SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseInsensitive(string)) as HammingDistance FROM (SELECT 'ClickHouse is a column-oriented database management system for online analytical processing of queries.' AS string);
+SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseInsensitive(string)) AS HammingDistance
+FROM (SELECT 'ClickHouse is a column-oriented database management system for online analytical processing of queries.' AS string);
 ```
 
 Result:

From 6e5407f875fc9ba4582f419ac059a8cf9dd91de2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 9 Nov 2023 19:27:35 +0000
Subject: [PATCH 0372/1097] Bump gRPC to v1.48.4

---
 contrib/grpc                           | 2 +-
 contrib/sparse-checkout/update-grpc.sh | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index 267af8c3a1e..8825d16db86 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit 267af8c3a1ea4a5a4d9e5a070ad2d1ac7c701923
+Subproject commit 8825d16db8659286471d6ea6065c5d4f747a772f
diff --git a/contrib/sparse-checkout/update-grpc.sh b/contrib/sparse-checkout/update-grpc.sh
index 4571bd6307d..21628ce8dd1 100755
--- a/contrib/sparse-checkout/update-grpc.sh
+++ b/contrib/sparse-checkout/update-grpc.sh
@@ -7,6 +7,7 @@ echo '/*' > $FILES_TO_CHECKOUT
 echo '!/test/*' >> $FILES_TO_CHECKOUT
 echo '/test/build/*' >> $FILES_TO_CHECKOUT
 echo '/test/core/tsi/alts/fake_handshaker/*' >> $FILES_TO_CHECKOUT
+echo '/test/core/event_engine/fuzzing_event_engine/*' >> $FILES_TO_CHECKOUT
 echo '!/tools/*' >> $FILES_TO_CHECKOUT
 echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
 echo '!/examples/*' >> $FILES_TO_CHECKOUT

From f017b2d083e2c85755627169f85bde8f44750f17 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 9 Nov 2023 19:35:34 +0000
Subject: [PATCH 0373/1097] Bump gRPC to v1.49.3

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index 8825d16db86..3e413c201cf 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit 8825d16db8659286471d6ea6065c5d4f747a772f
+Subproject commit 3e413c201cfd26b714ce3b6d44e78c2a926eb211

From 528c5a2c0a8b97c282f7ad7b600d45b113268319 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 10 Nov 2023 11:44:37 +0000
Subject: [PATCH 0374/1097] Bump gRPC to 1.50.3

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index 3e413c201cf..ba284ede2b0 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit 3e413c201cfd26b714ce3b6d44e78c2a926eb211
+Subproject commit ba284ede2b015f7ecf20306be9179ab0ea1b0701

From f6b001444cb3f6a9fc688c8ee1814715079d25b9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 10 Nov 2023 14:37:28 +0000
Subject: [PATCH 0375/1097] Bump gRPC to 1.51.3

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index ba284ede2b0..0933f367faa 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit ba284ede2b015f7ecf20306be9179ab0ea1b0701
+Subproject commit 0933f367faae9dc7c3ac249dd657744eb1361288

From a77f9ce7b24c19a271d95498ec3b23a6b33afa79 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 10 Nov 2023 14:41:55 +0000
Subject: [PATCH 0376/1097] Bump gRPC to v1.52.2

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index 0933f367faa..cb05b5513df 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit 0933f367faae9dc7c3ac249dd657744eb1361288
+Subproject commit cb05b5513df78301be77fde8e47a20ec3fd58d48

From c75497e7468f6003afc474926e3393fca7e1d3ad Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 10 Nov 2023 14:47:16 +0000
Subject: [PATCH 0377/1097] Bump gRPC to v1.53.2

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index cb05b5513df..560e192e608 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit cb05b5513df78301be77fde8e47a20ec3fd58d48
+Subproject commit 560e192e608944ba2dd3611b9e961c96416f86c8

From 28dbfb94c2e513a1120b859e7b3051cbf6b427a2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 10 Nov 2023 14:50:29 +0000
Subject: [PATCH 0378/1097] Bump gRPC to v1.54.3

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index 560e192e608..b723ecae099 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit 560e192e608944ba2dd3611b9e961c96416f86c8
+Subproject commit b723ecae0991bb873fe87a595dfb187178733fde

From 90711b1b99fed5415d7fe16eef1b87f5e97c50eb Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 12 Nov 2023 11:06:58 +0000
Subject: [PATCH 0379/1097] Fix test failures

---
 src/Interpreters/Cache/QueryCache.cpp | 6 +++---
 src/Parsers/ASTLiteral.cpp            | 6 +++++-
 src/Parsers/ASTWithAlias.cpp          | 2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 2a67d7b93dd..e8b52bbc6a0 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -148,16 +148,16 @@ QueryCache::Key::Key(ASTPtr ast_, const String & user_name_)
 }
 
 /// Hashing of ASTs must consider aliases (issue #56258)
-const bool dont_ignore_aliases = false;
+constexpr bool ignore_aliases = false;
 
 bool QueryCache::Key::operator==(const Key & other) const
 {
-    return ast->getTreeHash(dont_ignore_aliases) == other.ast->getTreeHash(dont_ignore_aliases);
+    return ast->getTreeHash(ignore_aliases) == other.ast->getTreeHash(ignore_aliases);
 }
 
 size_t QueryCache::KeyHasher::operator()(const Key & key) const
 {
-    IAST::Hash hash = key.ast->getTreeHash(dont_ignore_aliases);
+    IAST::Hash hash = key.ast->getTreeHash(ignore_aliases);
     return hash.low64;
 }
 
diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp
index d165914a71f..8dedc5dc95d 100644
--- a/src/Parsers/ASTLiteral.cpp
+++ b/src/Parsers/ASTLiteral.cpp
@@ -12,7 +12,11 @@ namespace DB
 
 void ASTLiteral::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
-    ASTWithAlias::updateTreeHashImpl(hash_state, ignore_aliases);
+    const char * prefix = "Literal_";
+    hash_state.update(prefix, strlen(prefix));
+    applyVisitor(FieldVisitorHash(hash_state), value);
+    if (!ignore_aliases)
+        ASTWithAlias::updateTreeHashImpl(hash_state, ignore_aliases);
 }
 
 ASTPtr ASTLiteral::clone() const
diff --git a/src/Parsers/ASTWithAlias.cpp b/src/Parsers/ASTWithAlias.cpp
index 221b956255b..5d1122ae4d8 100644
--- a/src/Parsers/ASTWithAlias.cpp
+++ b/src/Parsers/ASTWithAlias.cpp
@@ -45,7 +45,7 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
 
 void ASTWithAlias::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
 {
-    if (!ignore_aliases && !alias.empty())
+    if (!alias.empty() && !ignore_aliases)
         hash_state.update(alias);
     IAST::updateTreeHashImpl(hash_state, ignore_aliases);
 }

From 504bf730b1a78f406dabc3322c787b2cd6f909a4 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 12 Nov 2023 11:42:30 +0000
Subject: [PATCH 0380/1097] Remove 02725_keeper_fault_inject_sequential_cleanup
 for now

---
 ...25_keeper_fault_inject_sequential_cleanup.reference |  0
 .../02725_keeper_fault_inject_sequential_cleanup.sql   | 10 ----------
 2 files changed, 10 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.reference
 delete mode 100644 tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql

diff --git a/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.reference b/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.reference
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql b/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql
deleted file mode 100644
index e1db4ba2fa6..00000000000
--- a/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-DROP TABLE IF EXISTS keeper_fault_inject_sequential_cleanup;
-
-CREATE TABLE keeper_fault_inject_sequential_cleanup (d Int8) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_02725/tables/keeper_fault_inject_sequential_cleanup', '1') ORDER BY d;
-
-INSERT INTO keeper_fault_inject_sequential_cleanup VALUES (1);
-INSERT INTO keeper_fault_inject_sequential_cleanup SETTINGS insert_deduplicate = 0 VALUES (1);
-INSERT INTO keeper_fault_inject_sequential_cleanup SETTINGS insert_deduplicate = 0, insert_keeper_fault_injection_probability = 0.4, insert_keeper_fault_injection_seed = 5619964844601345291 VALUES (1);
-
--- with database ordinary it produced a warning
-DROP TABLE keeper_fault_inject_sequential_cleanup;

From 52c825db449800fcdaeb541d3866c9e5bc7f5fe8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 9 Nov 2023 14:00:02 +0000
Subject: [PATCH 0381/1097] Query cache: Allow to ignore non-deterministic
 queries

Fixes: #56504
---
 docs/en/operations/query-cache.md             |  2 +-
 docs/en/operations/settings/settings.md       | 11 ++--
 src/Core/Settings.h                           |  3 +-
 src/Core/SettingsEnums.cpp                    |  6 +++
 src/Core/SettingsEnums.h                      | 10 ++++
 src/Interpreters/executeQuery.cpp             | 51 ++++++++++---------
 ...cache_nondeterministic_functions.reference |  4 ++
 ...query_cache_nondeterministic_functions.sql | 26 +++++++---
 .../02888_obsolete_settings.reference         |  1 +
 9 files changed, 78 insertions(+), 36 deletions(-)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index 665ae6cdfdc..e1f43484082 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -169,7 +169,7 @@ Also, results of queries with non-deterministic functions are not cached by defa
   [`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc.
 
 To force caching of results of queries with non-deterministic functions regardless, use setting
-[query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
+[query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling).
 
 Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
 row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 2f3805e8e55..4cae272880a 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1657,16 +1657,17 @@ Possible values:
 
 Default value: `1`.
 
-## query_cache_store_results_of_queries_with_nondeterministic_functions {#query-cache-store-results-of-queries-with-nondeterministic-functions}
+## query_cache_non_deterministic_function_handling {#query-cache-nondeterministic-function-handling}
 
-If turned on, then results of `SELECT` queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query cache](../query-cache.md).
+Controls how the [query cache](../query-cache.md) handles `SELECT` queries with non-deterministic functions like `rand()` or `now()`.
 
 Possible values:
 
-- 0 - Disabled
-- 1 - Enabled
+- `'throw'` - Throw an exception.
+- `'save'` - Cache the query result even if it is non-deterministic.
+- `'ignore'` - Don't cache the query result (but also don't throw an exception).
 
-Default value: `0`.
+Default value: `throw`.
 
 ## query_cache_min_query_runs {#query-cache-min-query-runs}
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 5c41c0b0829..4eb70edb8e2 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -619,7 +619,7 @@ class IColumn;
     M(Bool, use_query_cache, false, "Enable the query cache", 0) \
     M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
     M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
-    M(Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false, "Store results of queries with non-deterministic functions (e.g. rand(), now()) in the query cache", 0) \
+    M(QueryCacheNondeterministicFunctionHandling, query_cache_nondeterministic_function_handling, QueryCacheNondeterministicFunctionHandling::Throw, "How the query cache handles queries with non-deterministic functions, e.g. now()", 0) \
     M(UInt64, query_cache_max_size_in_bytes, 0, "The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. ", 0) \
     M(UInt64, query_cache_max_entries, 0, "The maximum number of query results the current user may store in the query cache. 0 means unlimited.", 0) \
     M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \
@@ -877,6 +877,7 @@ class IColumn;
     MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \
     MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
     MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
+    MAKE_OBSOLETE(M, Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false) \
 
     /** The section above is for obsolete settings. Do not add anything there. */
 
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 836993b724a..bec9e3a6afe 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -69,6 +69,12 @@ IMPLEMENT_SETTING_ENUM(DistributedProductMode, ErrorCodes::UNKNOWN_DISTRIBUTED_P
      {"allow",  DistributedProductMode::ALLOW}})
 
 
+IMPLEMENT_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling, ErrorCodes::BAD_ARGUMENTS,
+    {{"throw",   QueryCacheNondeterministicFunctionHandling::Throw},
+     {"save",  QueryCacheNondeterministicFunctionHandling::Save},
+     {"ignore", QueryCacheNondeterministicFunctionHandling::Ignore}})
+
+
 IMPLEMENT_SETTING_ENUM(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS,
     {{"basic",       FormatSettings::DateTimeInputFormat::Basic},
      {"best_effort", FormatSettings::DateTimeInputFormat::BestEffort},
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 29776f66608..0d6e87f25c2 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -70,6 +70,16 @@ enum class DistributedProductMode
 
 DECLARE_SETTING_ENUM(DistributedProductMode)
 
+/// How the query cache handles queries with non-deterministic functions, e.g. now()
+enum class QueryCacheNondeterministicFunctionHandling
+{
+    Throw,
+    Save,
+    Ignore
+};
+
+DECLARE_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling)
+
 
 DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeInputFormat)
 
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 8cd3c8ab848..be59ad22925 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1106,32 +1106,37 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                     /// top of the pipeline which stores the result in the query cache.
                     if (can_use_query_cache && settings.enable_writes_to_query_cache)
                     {
-                        if (astContainsNonDeterministicFunctions(ast, context) && !settings.query_cache_store_results_of_queries_with_nondeterministic_functions)
+                        const bool ast_contains_nondeterministic_functions = astContainsNonDeterministicFunctions(ast, context);
+                        const QueryCacheNondeterministicFunctionHandling nondeterministic_function_handling = settings.query_cache_nondeterministic_function_handling;
+                        if (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Throw)
                             throw Exception(ErrorCodes::CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS,
-                                "Unable to cache the query result because the query contains a non-deterministic function. Use setting `query_cache_store_results_of_queries_with_nondeterministic_functions = 1` to cache the query result regardless");
+                                "Unable to cache the query result because the query contains a non-deterministic function. Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'`to cache the query result regardless or omit caching.");
 
-                        QueryCache::Key key(
-                            ast, res.pipeline.getHeader(),
-                            context->getUserName(), settings.query_cache_share_between_users,
-                            std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
-                            settings.query_cache_compress_entries);
+                        if (!ast_contains_nondeterministic_functions || (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save))
+                        {
+                            QueryCache::Key key(
+                                ast, res.pipeline.getHeader(),
+                                context->getUserName(), settings.query_cache_share_between_users,
+                                std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
+                                settings.query_cache_compress_entries);
 
-                        const size_t num_query_runs = query_cache->recordQueryRun(key);
-                        if (num_query_runs <= settings.query_cache_min_query_runs)
-                        {
-                            LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert because the query ran {} times but the minimum required number of query runs to cache the query result is {}", num_query_runs, settings.query_cache_min_query_runs);
-                        }
-                        else
-                        {
-                            auto query_cache_writer = std::make_shared<QueryCache::Writer>(query_cache->createWriter(
-                                             key,
-                                             std::chrono::milliseconds(settings.query_cache_min_query_duration.totalMilliseconds()),
-                                             settings.query_cache_squash_partial_results,
-                                             settings.max_block_size,
-                                             settings.query_cache_max_size_in_bytes,
-                                             settings.query_cache_max_entries));
-                            res.pipeline.writeResultIntoQueryCache(query_cache_writer);
-                            query_cache_usage = QueryCache::Usage::Write;
+                            const size_t num_query_runs = query_cache->recordQueryRun(key);
+                            if (num_query_runs <= settings.query_cache_min_query_runs)
+                            {
+                                LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert because the query ran {} times but the minimum required number of query runs to cache the query result is {}", num_query_runs, settings.query_cache_min_query_runs);
+                            }
+                            else
+                            {
+                                auto query_cache_writer = std::make_shared<QueryCache::Writer>(query_cache->createWriter(
+                                                 key,
+                                                 std::chrono::milliseconds(settings.query_cache_min_query_duration.totalMilliseconds()),
+                                                 settings.query_cache_squash_partial_results,
+                                                 settings.max_block_size,
+                                                 settings.query_cache_max_size_in_bytes,
+                                                 settings.query_cache_max_entries));
+                                res.pipeline.writeResultIntoQueryCache(query_cache_writer);
+                                query_cache_usage = QueryCache::Usage::Write;
+                            }
                         }
                     }
 
diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference
index e666f54d4c4..732f96219bd 100644
--- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference
+++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference
@@ -2,3 +2,7 @@
 ---
 1
 1
+---
+1
+0
+---
diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
index 62e0b099d7a..f6f7eff1cc5 100644
--- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
+++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
@@ -1,16 +1,30 @@
--- Tags: no-parallel
 -- Tag no-parallel: Messes with internal cache
+-- Tags: no-parallel
 
 SYSTEM DROP QUERY CACHE;
 
 -- rand() is non-deterministic, the query is rejected by default
-SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; -- { serverError CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS }
-SELECT COUNT(*) FROM system.query_cache;
+-- to throw is the default behavior
+SELECT count(rand(1)) SETTINGS use_query_cache = true; -- { serverError CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS }
+SELECT count(rand(1)) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'throw'; -- { serverError CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS }
+SELECT count(*) FROM system.query_cache;
+
+SYSTEM DROP QUERY CACHE;
 
 SELECT '---';
 
--- Force caching using a setting
-SELECT COUNT(RAND(1)) SETTINGS use_query_cache = true, query_cache_store_results_of_queries_with_nondeterministic_functions = true;
-SELECT COUNT(*) FROM system.query_cache;
+-- 'save' forces caching
+SELECT count(rand(1)) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'save';
+SELECT count(*) FROM system.query_cache;
 
 SYSTEM DROP QUERY CACHE;
+
+SELECT '---';
+
+-- 'ignore' suppresses the exception but doesn't cache
+SELECT count(rand(1)) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'ignore';
+SELECT count(*) FROM system.query_cache;
+
+SYSTEM DROP QUERY CACHE;
+
+SELECT '---';
diff --git a/tests/queries/0_stateless/02888_obsolete_settings.reference b/tests/queries/0_stateless/02888_obsolete_settings.reference
index 6ee5216cd73..63553092c0c 100644
--- a/tests/queries/0_stateless/02888_obsolete_settings.reference
+++ b/tests/queries/0_stateless/02888_obsolete_settings.reference
@@ -42,6 +42,7 @@ optimize_duplicate_order_by_and_distinct
 optimize_fuse_sum_count_avg
 parallel_replicas_min_number_of_granules_to_enable
 partial_merge_join_optimizations
+query_cache_store_results_of_queries_with_nondeterministic_functions
 query_plan_optimize_projection
 replication_alter_columns_timeout
 restore_threads

From 9b61de2b8be913bc445f5725b70a75b6730e080d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 9 Nov 2023 14:28:42 +0000
Subject: [PATCH 0382/1097] Fix whitespace

---
 src/Core/SettingsEnums.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index bec9e3a6afe..b853b0d0a0b 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -70,8 +70,8 @@ IMPLEMENT_SETTING_ENUM(DistributedProductMode, ErrorCodes::UNKNOWN_DISTRIBUTED_P
 
 
 IMPLEMENT_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling, ErrorCodes::BAD_ARGUMENTS,
-    {{"throw",   QueryCacheNondeterministicFunctionHandling::Throw},
-     {"save",  QueryCacheNondeterministicFunctionHandling::Save},
+    {{"throw",  QueryCacheNondeterministicFunctionHandling::Throw},
+     {"save",   QueryCacheNondeterministicFunctionHandling::Save},
      {"ignore", QueryCacheNondeterministicFunctionHandling::Ignore}})
 
 
From 91f53514d9da76aebfaf5313b1f4ca502370d62c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 9 Nov 2023 19:54:09 +0000
Subject: [PATCH 0383/1097] Incorporate review feedback

---
 docs/en/operations/query-cache.md       |  5 +++++
 docs/en/operations/settings/settings.md |  2 +-
 src/Interpreters/executeQuery.cpp       | 10 +++++++---
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index e1f43484082..def0f48b968 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -171,6 +171,11 @@ Also, results of queries with non-deterministic functions are not cached by defa
 To force caching of results of queries with non-deterministic functions regardless, use setting
 [query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling).
 
+:::note
+Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether
+results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect.
+:::
+
 Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
 row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
 be marked accessible by other users (i.e. shared) by supplying setting
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 4cae272880a..67b62501dd9 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1665,7 +1665,7 @@ Possible values:
 
 - `'throw'` - Throw an exception.
 - `'save'` - Cache the query result even if it is non-deterministic.
-- `'ignore'` - Don't cache the query result (but also don't throw an exception).
+- `'ignore'` - Don't cache the query result and don't throw an exception.
 
 Default value: `throw`.
 
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index be59ad22925..bd64822fa40 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1108,11 +1108,13 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                     {
                         const bool ast_contains_nondeterministic_functions = astContainsNonDeterministicFunctions(ast, context);
                         const QueryCacheNondeterministicFunctionHandling nondeterministic_function_handling = settings.query_cache_nondeterministic_function_handling;
+
                         if (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Throw)
                             throw Exception(ErrorCodes::CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS,
-                                "Unable to cache the query result because the query contains a non-deterministic function. Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'`to cache the query result regardless or omit caching.");
+                                "The query result was not cached because the query contains a non-deterministic function."
+                                " Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching");
 
-                        if (!ast_contains_nondeterministic_functions || (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save))
+                        if (!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save)
                         {
                             QueryCache::Key key(
                                 ast, res.pipeline.getHeader(),
@@ -1123,7 +1125,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                             const size_t num_query_runs = query_cache->recordQueryRun(key);
                             if (num_query_runs <= settings.query_cache_min_query_runs)
                             {
-                                LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert because the query ran {} times but the minimum required number of query runs to cache the query result is {}", num_query_runs, settings.query_cache_min_query_runs);
+                                LOG_TRACE(&Poco::Logger::get("QueryCache"),
+                                        "Skipped insert because the query ran {} times but the minimum required number of query runs to cache the query result is {}",
+                                        num_query_runs, settings.query_cache_min_query_runs);
                             }
                             else
                             {

From bea529c9a19983fa9fedc9c4a0f9037923dc2d50 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 10 Nov 2023 11:39:56 +0000
Subject: [PATCH 0384/1097] Try to stabilize test results

---
 .../functions/date-time-functions.md          |  2 +-
 src/Common/ErrorCodes.cpp                     |  2 +-
 src/Interpreters/executeQuery.cpp             |  4 ++--
 ...cache_nondeterministic_functions.reference |  6 +++---
 ...query_cache_nondeterministic_functions.sql | 21 +++++++------------
 5 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 55d09be7847..43f7c9cc61e 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1381,7 +1381,7 @@ toStartOfFifteenMinutes(toDateTime('2023-04-21 10:20:00')): 2023-04-21 10:15:00
 toStartOfFifteenMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:15:00
 ```
 
-## toStartOfInterval(time_or_data, INTERVAL x unit \[, time_zone\])
+## toStartOfInterval(date_or_date_with_time, INTERVAL x unit \[, time_zone\])
 
 This function generalizes other `toStartOf*()` functions. For example,
 - `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`,
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 28f8e6c6021..95ca49d2713 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -583,7 +583,7 @@
     M(701, CLUSTER_DOESNT_EXIST) \
     M(702, CLIENT_INFO_DOES_NOT_MATCH) \
     M(703, INVALID_IDENTIFIER) \
-    M(704, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
+    M(704, QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS) \
     M(705, TABLE_NOT_EMPTY) \
     M(706, LIBSSH_ERROR) \
     M(999, KEEPER_EXCEPTION) \
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index bd64822fa40..bfa54e7db35 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -96,7 +96,7 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS;
+    extern const int QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS;
     extern const int INTO_OUTFILE_NOT_ALLOWED;
     extern const int INVALID_TRANSACTION;
     extern const int LOGICAL_ERROR;
@@ -1110,7 +1110,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                         const QueryCacheNondeterministicFunctionHandling nondeterministic_function_handling = settings.query_cache_nondeterministic_function_handling;
 
                         if (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Throw)
-                            throw Exception(ErrorCodes::CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS,
+                            throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS,
                                 "The query result was not cached because the query contains a non-deterministic function."
                                 " Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching");
 
diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference
index 732f96219bd..74dcf748395 100644
--- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference
+++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference
@@ -1,8 +1,8 @@
+-- query_cache_nondeterministic_function_handling = throw
 0
----
+-- query_cache_nondeterministic_function_handling = save
 1
 1
----
+-- query_cache_nondeterministic_function_handling = ignore
 1
 0
----
diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
index f6f7eff1cc5..1192a19e26b 100644
--- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
+++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
@@ -3,28 +3,21 @@
 
 SYSTEM DROP QUERY CACHE;
 
--- rand() is non-deterministic, the query is rejected by default
--- to throw is the default behavior
-SELECT count(rand(1)) SETTINGS use_query_cache = true; -- { serverError CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS }
-SELECT count(rand(1)) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'throw'; -- { serverError CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS }
+SELECT '-- query_cache_nondeterministic_function_handling = throw';
+SELECT count(now()) SETTINGS use_query_cache = true; -- { serverError QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS }
+SELECT count(now()) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'throw'; -- { serverError QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS }
 SELECT count(*) FROM system.query_cache;
 
 SYSTEM DROP QUERY CACHE;
 
-SELECT '---';
-
--- 'save' forces caching
-SELECT count(rand(1)) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'save';
+SELECT '-- query_cache_nondeterministic_function_handling = save';
+SELECT count(now()) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'save';
 SELECT count(*) FROM system.query_cache;
 
 SYSTEM DROP QUERY CACHE;
 
-SELECT '---';
-
--- 'ignore' suppresses the exception but doesn't cache
-SELECT count(rand(1)) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'ignore';
+SELECT '-- query_cache_nondeterministic_function_handling = ignore';
+SELECT count(now()) SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'ignore';
 SELECT count(*) FROM system.query_cache;
 
 SYSTEM DROP QUERY CACHE;
-
-SELECT '---';

From d3ac89613176d42026e30cd1b71a86e4cc7d4b1f Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sun, 12 Nov 2023 19:02:42 +0000
Subject: [PATCH 0385/1097] Fix conversion from Nullable(Enum) to
 Nullable(String)

---
 src/Functions/FunctionsConversion.h           | 62 ++++++++++++++++++-
 .../02910_nullable_enum_cast.reference        |  4 ++
 .../0_stateless/02910_nullable_enum_cast.sql  |  4 ++
 3 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02910_nullable_enum_cast.reference
 create mode 100644 tests/queries/0_stateless/02910_nullable_enum_cast.sql

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index bda5fce1ac8..07708caf34e 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -4172,6 +4172,61 @@ arguments, result_type, input_rows_count); \
         };
     }
 
+    template <typename EnumType>
+    WrapperType createEnumToStringWrapper() const
+    {
+        const char * function_name = cast_name;
+        return [function_name] (
+            ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/)
+        {
+            using ColumnEnumType = EnumType::ColumnType;
+
+            const auto & first_col = arguments.front().column.get();
+            const auto & first_type = arguments.front().type.get();
+
+            const ColumnEnumType * enum_col = typeid_cast<const ColumnEnumType *>(first_col);
+            const EnumType * enum_type = typeid_cast<const EnumType *>(first_type);
+
+            if (enum_col && nullable_col && nullable_col->size() != enum_col->size())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original");
+
+            if (enum_col && enum_type)
+            {
+                const auto size = enum_col->size();
+                const auto & enum_data = enum_col->getData();
+
+                auto res = res_type->createColumn();
+
+                if (nullable_col)
+                {
+                    for (size_t i = 0; i < size; ++i)
+                    {
+                        if (!nullable_col->isNullAt(i))
+                        {
+                            const auto & value = enum_type->getNameForValue(enum_data[i]);
+                            res->insertData(value.data, value.size);
+                        }
+                        else
+                            res->insertDefault();
+                    }
+                }
+                else
+                {
+                    for (size_t i = 0; i < size; ++i)
+                    {
+                        const auto & value = enum_type->getNameForValue(enum_data[i]);
+                        res->insertData(value.data, value.size);
+                    }
+                }
+
+                return res;
+            }
+            else
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}",
+                    first_col->getName(), function_name);
+        };
+    }
+
     static WrapperType createIdentityWrapper(const DataTypePtr &)
     {
         return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/)
@@ -4559,7 +4614,12 @@ arguments, result_type, input_rows_count); \
 
             if constexpr (WhichDataType(ToDataType::type_id).isStringOrFixedString())
             {
-                if (from_type->getCustomSerialization())
+                if constexpr (WhichDataType(FromDataType::type_id).isEnum())
+                {
+                    ret = createEnumToStringWrapper<FromDataType>();
+                    return true;
+                }
+                else if (from_type->getCustomSerialization())
                 {
                     ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
                     {
diff --git a/tests/queries/0_stateless/02910_nullable_enum_cast.reference b/tests/queries/0_stateless/02910_nullable_enum_cast.reference
new file mode 100644
index 00000000000..6ce48f2ea6b
--- /dev/null
+++ b/tests/queries/0_stateless/02910_nullable_enum_cast.reference
@@ -0,0 +1,4 @@
+\N
+\N
+A
+A
diff --git a/tests/queries/0_stateless/02910_nullable_enum_cast.sql b/tests/queries/0_stateless/02910_nullable_enum_cast.sql
new file mode 100644
index 00000000000..09189539c5a
--- /dev/null
+++ b/tests/queries/0_stateless/02910_nullable_enum_cast.sql
@@ -0,0 +1,4 @@
+SELECT CAST(materialize(CAST(NULL, 'Nullable(Enum(\'A\' = 1, \'B\' = 2))')), 'Nullable(String)');
+SELECT CAST(CAST(NULL, 'Nullable(Enum(\'A\' = 1, \'B\' = 2))'), 'Nullable(String)');
+SELECT CAST(materialize(CAST(1, 'Nullable(Enum(\'A\' = 1, \'B\' = 2))')), 'Nullable(String)');
+SELECT CAST(CAST(1, 'Nullable(Enum(\'A\' = 1, \'B\' = 2))'), 'Nullable(String)');

From c63a6f5cf5985ff3d0c80307fe14940b0a9df062 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Sun, 12 Nov 2023 19:31:38 +0000
Subject: [PATCH 0386/1097] change comment style

---
 src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
index 0a7de63942a..0fdf8356106 100644
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
@@ -334,11 +334,12 @@ void DatabasePostgreSQL::removeOutdatedTables()
     {
         tryLogCurrentException(__PRETTY_FUNCTION__);
 
-        /// Avoid repeated interrupting other normal routines (they acquire locks!) 
-        /// for the case of unavailable connection, since it is possible to be 
-        /// unsuccessful again, and the unsuccessful conn is very time-consuming: 
-        /// connection period is exclusive and timeout is at least 2 seconds for 
-        /// PostgreSQL.
+        /** Avoid repeated interrupting other normal routines (they acquire locks!) 
+          * for the case of unavailable connection, since it is possible to be 
+          * unsuccessful again, and the unsuccessful conn is very time-consuming: 
+          * connection period is exclusive and timeout is at least 2 seconds for 
+          * PostgreSQL.
+          */
         cleaner_task->scheduleAfter(reschedule_error_multiplier * cleaner_reschedule_ms);
         return;
     }

From e67dfc7e04458c4a7da8165cb0ab4c0f272fb5b1 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 2 Nov 2023 16:43:06 +0100
Subject: [PATCH 0387/1097] Add a script to view backups and to extract
 information from them.

---
 utils/backup/print_backup_info.py       |  208 ----
 utils/backupview/backupview.py          | 1351 +++++++++++++++++++++++
 utils/backupview/test/test.py           |  140 +++
 utils/backupview/test/test_backup_1.zip |  Bin 0 -> 10192 bytes
 4 files changed, 1491 insertions(+), 208 deletions(-)
 delete mode 100755 utils/backup/print_backup_info.py
 create mode 100755 utils/backupview/backupview.py
 create mode 100755 utils/backupview/test/test.py
 create mode 100644 utils/backupview/test/test_backup_1.zip

diff --git a/utils/backup/print_backup_info.py b/utils/backup/print_backup_info.py
deleted file mode 100755
index 54e5c745a8c..00000000000
--- a/utils/backup/print_backup_info.py
+++ /dev/null
@@ -1,208 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: UTF-8 -*-
-"""
-print_backup_info: Extract information about a backup from ".backup" file
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Usage: print_backup_info <path-to-backup-xml>
-"""
-import sys
-import os
-import xml.etree.ElementTree as ET
-
-
-def main():
-    if len(sys.argv) != 2:
-        print(__doc__)
-        sys.exit(1)
-    backup_xml = sys.argv[1]
-
-    if not os.path.isfile(backup_xml):
-        print("error: {} does not exist".format(backup_xml))
-        sys.exit(1)
-
-    # Process the file line-by-line
-    tree = ET.parse(backup_xml)
-    root = tree.getroot()
-    contents = root.find("contents")
-
-    version_node = root.find("version")
-    version = int(version_node.text) if (version_node != None) else None
-
-    timestamp_node = root.find("timestamp")
-    timestamp = timestamp_node.text if (timestamp_node != None) else None
-
-    base_backup_node = root.find("base_backup")
-    base_backup = base_backup_node.text if (base_backup_node != None) else None
-
-    number_of_files = 0
-    size_of_files = 0
-    number_of_files_from_base_backup = 0
-    size_of_files_from_base_backup = 0
-    databases = set()
-    tables = {}
-
-    for file in contents:
-        name = file.find("name").text
-        size = int(file.find("size").text)
-
-        use_base_node = file.find("use_base")
-        use_base = (use_base_node.text == "true") if (use_base_node != None) else False
-
-        if use_base:
-            base_size_node = file.find("base_size")
-            base_size = int(base_size_node.text) if (base_size_node != None) else size
-        else:
-            base_size = 0
-
-        data_file_node = file.find("data_file")
-        data_file = data_file_node.text if (data_file_node != None) else name
-
-        has_data_file = name == data_file
-
-        if has_data_file:
-            if size > base_size:
-                number_of_files += 1
-                size_of_files += size - base_size
-            if base_size > 0:
-                number_of_files_from_base_backup += 1
-                size_of_files_from_base_backup += base_size
-
-        table_name = extract_table_name_from_path(name)
-        if table_name:
-            if table_name not in tables:
-                tables[table_name] = [0, 0, 0, 0]
-            if not name.endswith(".sql") and has_data_file:
-                table_info = tables[table_name]
-                if size > base_size:
-                    table_info[0] += 1
-                    table_info[1] += size - base_size
-                if base_size > 0:
-                    table_info[2] += 1
-                    table_info[3] += base_size
-                tables[table_name] = table_info
-
-        database_name = extract_database_name_from_path(name)
-        if database_name:
-            databases.add(database_name)
-
-    size_of_backup = size_of_files + os.path.getsize(backup_xml)
-
-    print(f"version={version}")
-    print(f"timestamp={timestamp}")
-    print(f"base_backup={base_backup}")
-    print(f"size_of_backup={size_of_backup}")
-    print(f"number_of_files={number_of_files}")
-    print(f"size_of_files={size_of_files}")
-    print(f"number_of_files_from_base_backup={number_of_files_from_base_backup}")
-    print(f"size_of_files_from_base_backup={size_of_files_from_base_backup}")
-    print(f"number_of_databases={len(databases)}")
-    print(f"number_of_tables={len(tables)}")
-
-    print()
-
-    print(f"{len(databases)} database(s):")
-    for database_name in sorted(databases):
-        print(database_name)
-
-    print()
-
-    print(f"{len(tables)} table(s):")
-    table_info_format = "{:>70} | {:>20} | {:>20} | {:>26} | {:>30}"
-    table_info_separator_line = (
-        "{:->70}-+-{:->20}-+-{:->20}-+-{:->26}-+-{:->30}".format("", "", "", "", "")
-    )
-    table_info_title_line = table_info_format.format(
-        "table name",
-        "num_files",
-        "size_of_files",
-        "num_files_from_base_backup",
-        "size_of_files_from_base_backup",
-    )
-    print(table_info_title_line)
-    print(table_info_separator_line)
-    for table_name in sorted(tables):
-        table_info = tables[table_name]
-        print(
-            table_info_format.format(
-                table_name, table_info[0], table_info[1], table_info[2], table_info[3]
-            )
-        )
-
-
-# Extracts a table name from a path inside a backup.
-# For example, extracts 'default.tbl' from 'shards/1/replicas/1/data/default/tbl/all_0_0_0/data.bin'.
-def extract_table_name_from_path(path):
-    path = strip_shards_replicas_from_path(path)
-    if not path:
-        return None
-    if path.startswith("metadata/"):
-        path = path[len("metadata/") :]
-        sep = path.find("/")
-        if sep == -1:
-            return None
-        database_name = path[:sep]
-        path = path[sep + 1 :]
-        sep = path.find(".sql")
-        if sep == -1:
-            return None
-        table_name = path[:sep]
-        return database_name + "." + table_name
-    if path.startswith("data/"):
-        path = path[len("data/") :]
-        sep = path.find("/")
-        if sep == -1:
-            return None
-        database_name = path[:sep]
-        path = path[sep + 1 :]
-        sep = path.find("/")
-        if sep == -1:
-            return None
-        table_name = path[:sep]
-        return database_name + "." + table_name
-    return None
-
-
-# Extracts a database name from a path inside a backup.
-# For example, extracts 'default' from 'shards/1/replicas/1/data/default/tbl/all_0_0_0/data.bin'.
-def extract_database_name_from_path(path):
-    path = strip_shards_replicas_from_path(path)
-    if not path:
-        return None
-    if path.startswith("metadata/"):
-        path = path[len("metadata/") :]
-        sep = path.find(".sql")
-        if sep == -1 or path.find("/") != -1:
-            return None
-        return path[:sep]
-    if path.startswith("data/"):
-        path = path[len("data/") :]
-        sep = path.find("/")
-        if sep == -1:
-            return None
-        return path[:sep]
-    return None
-
-
-# Removes a prefix "shards/<number>/replicas/<number>/" from a path.
-def strip_shards_replicas_from_path(path):
-    if path.startswith("shards"):
-        sep = path.find("/")
-        if sep == -1:
-            return None
-        sep = path.find("/", sep + 1)
-        if sep == -1:
-            return None
-        path = path[sep + 1 :]
-    if path.startswith("replicas"):
-        sep = path.find("/")
-        if sep == -1:
-            return None
-        sep = path.find("/", sep + 1)
-        if sep == -1:
-            return None
-        path = path[sep + 1 :]
-    return path
-
-
-if __name__ == "__main__":
-    main()
diff --git a/utils/backupview/backupview.py b/utils/backupview/backupview.py
new file mode 100755
index 00000000000..111307a3d93
--- /dev/null
+++ b/utils/backupview/backupview.py
@@ -0,0 +1,1351 @@
+#!/usr/bin/env python3
+
+import bisect
+import os.path
+import xml.etree.ElementTree as ET
+from urllib.parse import urlparse
+import shutil
+
+import zipfile  # For reading backups from zip archives
+import boto3  # For reading backups from S3
+
+
+## Examples:
+## from backupview import open_backup
+##
+## Get information about the backup's contents:
+## backup = open_backup("/path/to/backup/")
+## print(backup.get_databases()))
+## for database in backup.get_databases():
+##     print(backup.get_create_query(database=database))
+##     for table in backup.get_tables(database=database):
+##         print(backup.get_create_query(database=database, table=table))
+##         print(backup.get_partitions(database=database, table=table))
+##         print(backup.get_parts(database=database, table=table))
+##
+## Extract everything from the backup to a folder:
+## backup.extract_all(out="/where/to/extract/1/")
+##
+## Extract the data of a single table:
+## backup.extract_table_data(database="mydb", table="mytable", out="/where/to/extract/2/")
+## backup.extract_table_data(table="mydb.mytable", part="all_1_1", out="/where/to/extract/3/")
+## backup.extract_table_data(database="mydb", table="mytable", partition="2022", out="/where/to/extract/4/")
+## backup.extract_table_metadata(table=('mydb', 'mytable'), out="/where/to/extract/5.sql")
+##
+## Get a list of all files in the backup:
+## print(backup.get_files())
+##
+## Get information about files in the backup:
+## print(backup.get_file_infos())
+##
+## Extract files to a folder:
+## backup.extract_dir("/shards/1/replicas/1/", out="/where/to/extract/6/")
+## backup.extract_file("/shards/1/replicas/1/metadata/mydb/mytable.sql", out="/where/to/extract/7.sql")
+##
+## Reading from S3:
+## backup = open_backup(S3("uri", "access_key_id", "secret_access_key"))
+## backup.extract_table_data(table="mydb.mytable", partition="2022", out="/where/to/extract/8/")
+
+
+# Opens a backup for viewing.
+def open_backup(backup_name, base_backup=None):
+    return Backup(backup_name, base_backup=base_backup)
+
+
+# Main class, an instance of Backup is returned by the open_backup() function.
+class Backup:
+    def __init__(self, backup_name, base_backup=None):
+        self.__location = None
+        self.__close_base_backup = False
+        self.__base_backup = base_backup
+        self.__reader = None
+
+        try:
+            self.__location = Location(backup_name)
+            if TypeChecks.is_location_like(base_backup):
+                self.__base_backup = Location(base_backup)
+            self.__reader = self.__location.create_reader()
+            self.__parse_backup_metadata()
+        except:
+            self.close()
+            raise
+
+    def close(self):
+        if self.__reader is not None:
+            self.__reader.close()
+            self.__reader = None
+        if (
+            (self.__base_backup is not None)
+            and (not TypeChecks.is_location_like(self.__base_backup))
+            and self.__close_base_backup
+        ):
+            self.__base_backup.close()
+            self.__base_backup = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    # Get general information about the backup.
+
+    # Returns the name of the backup, e.g. File('/path/to/backup/')
+    def get_name(self):
+        return str(self.get_location())
+
+    def get_location(self):
+        return self.__location
+
+    def __repr__(self):
+        return "Backup(" + repr(self.get_location()) + ")"
+
+    # Returns the base backup or None if there is no base backup.
+    def get_base_backup(self):
+        if TypeChecks.is_location_like(self.__base_backup):
+            self.__close_base_backup = True
+            self.__base_backup = open_backup(self.__base_backup)
+        return self.__base_backup
+
+    def get_base_backup_location(self):
+        if self.__base_backup is None:
+            return None
+        if TypeChecks.is_location_like(self.__base_backup):
+            return self.__base_backup
+        return self.__base_backup.get_location()
+
+    def get_base_backup_name(self):
+        if self.__base_backup is None:
+            return None
+        return str(self.get_base_backup_location())
+
+    # Returns the version of the backup.
+    def get_version(self):
+        return self.__version
+
+    # Returns the timestamp of the backup.
+    def get_timestamp(self):
+        return self.__timestamp
+
+    # Get high-level information about the contents of the backup.
+
+    # Returns shards stored in the backup.
+    def get_shards(self):
+        if self.dir_exists("/shards/"):
+            return self.get_subdirs("/shards/")
+        return ["1"]
+
+    # Returns replicas stored in the backup.
+    def get_replicas(self, shard="1"):
+        if self.dir_exists(f"/shards/{shard}/replicas/"):
+            return self.get_subdirs(f"/shards/{shard}/replicas/")
+        elif self.dir_exists("/replicas/"):
+            return self.get_subdirs("/replicas/")
+        else:
+            return ["1"]
+
+    # Returns databases stored in the backup.
+    def get_databases(self, shard="1", replica="1"):
+        res = []
+        for path in self.__get_paths_in_backup(shard=shard, replica=replica):
+            dir = path + "metadata/"
+            if self.dir_exists(dir):
+                files = self.get_files_in_dir(dir)
+                subdirs = self.get_subdirs(dir)
+                res += [Backup.__unescape_for_filename(name) for name in subdirs]
+                res += [
+                    Backup.__unescape_for_filename(os.path.splitext(name)[0])
+                    for name in files
+                    if name.endswith(".sql")
+                ]
+        return sorted(set(res))
+
+    # Returns tables stored in the backup.
+    # b.get_tables(database='mydb') returns the names of tables in that database 'mydb';
+    # b.get_tables() returns a list of tuples (db, table) for all tables in the backup.
+    def get_tables(self, database=None, shard="1", replica="1"):
+        if database is None:
+            databases = self.get_databases(shard=shard, replica=replica)
+        else:
+            databases = [database]
+        res = []
+        paths = self.__get_paths_in_backup(shard=shard, replica=replica)
+        for path in paths:
+            if self.dir_exists(f"{path}metadata/"):
+                for db in databases:
+                    dir = path + "metadata/" + Backup.__escape_for_filename(db) + "/"
+                    if self.dir_exists(dir):
+                        files = self.get_files_in_dir(dir)
+                        tables = [
+                            Backup.__unescape_for_filename(os.path.splitext(name)[0])
+                            for name in files
+                            if name.endswith(".sql")
+                        ]
+                        if database is None:
+                            tables = [(db, table) for table in tables]
+                        res += tables
+        return sorted(set(res))
+
+    # Returns the create query of a table or a database.
+    # The function can return None if there is no create query in the backup for such table or database.
+    # b.get_create_query(database='mydb') returns the create query of the database `mydb`;
+    # b.get_create_query(database='mydb', table='mytable') returns the create query of the table `mydb`.`mytable`;
+    # b.get_create_query(table='mydb.mytable') and b.get_create_query(table=('mydb', 'mytable')) also returns the create query of the table `mydb`.`mytable`.
+    def get_create_query(self, table=None, database=None, shard="1", replica="1"):
+        path = self.get_create_query_path(
+            table=table, database=database, shard=shard, replica=replica
+        )
+        if path is None:
+            return None
+        return self.read_file(path).decode("utf-8")
+
+    def get_table_metadata(self, table, database=None, shard="1", replica="1"):
+        return self.get_create_query(
+            table=table, database=database, shard=shard, replica=replica
+        )
+
+    def get_database_metadata(self, database, shard="1", replica="1"):
+        return self.get_create_query(database=database, shard=shard, replica=replica)
+
+    # Like get_create_query(), but returns the path to the corresponding file containing the create query in the backup.
+    def get_create_query_path(self, table=None, database=None, shard="1", replica="1"):
+        if database is None:
+            database, table = Backup.__split_database_table(table)
+        if table is None:
+            suffix = "metadata/" + Backup.__escape_for_filename(database) + ".sql"
+        else:
+            suffix = (
+                "metadata/"
+                + Backup.__escape_for_filename(database)
+                + "/"
+                + Backup.__escape_for_filename(table)
+                + ".sql"
+            )
+        for path in self.__get_paths_in_backup(shard=shard, replica=replica):
+            metadata_path = path + suffix
+            if self.file_exists(metadata_path):
+                return metadata_path
+        return None
+
+    def get_table_metadata_path(self, table, database=None, shard="1", replica="1"):
+        return self.get_create_query_path(
+            table=table, database=database, shard=shard, replica=replica
+        )
+
+    def get_database_metadata_path(self, database, shard="1", replica="1"):
+        return self.get_create_query_path(
+            database=database, shard=shard, replica=replica
+        )
+
+    # Returns the names of parts of a specified table.
+    # If the 'partition' parameter is specified, the function returns only parts related to that partition.
+    # The table can be specified either as b.get_parts(database='mydb', table='mytable') or
+    # b.get_parts(table='mydb.mytable') or b.get_parts(table=('mydb', 'mytable')).
+    def get_parts(self, table, database=None, partition=None, shard="1", replica="1"):
+        data_path = self.get_table_data_path(
+            table=table, database=database, shard=shard, replica=replica
+        )
+        if data_path is None:
+            return []
+        part_names = self.get_subdirs(data_path)
+        if "mutations" in part_names:
+            part_names.remove("mutations")
+        if partition is not None:
+            part_names = [
+                part_name
+                for part_name in part_names
+                if Backup.__extract_partition_id_from_part_name(part_name) == partition
+            ]
+        return part_names
+
+    # Returns the names of partitions of a specified table.
+    # The table can be specified either as b.get_partitions(database='mydb', table='mytable') or
+    # b.get_partitions(table='mydb.mytable') or b.get_partitions(table=('mydb', 'mytable'))
+    def get_partitions(self, table, database=None, shard="1", replica="1"):
+        parts = self.get_parts(
+            table=table, database=database, shard=shard, replica=replica
+        )
+        partitions = []
+        prev_partition = None
+        for part in parts:
+            partition = Backup.__extract_partition_id_from_part_name(part)
+            if partition != prev_partition:
+                partitions.append(partition)
+                prev_partition = partition
+        return partitions
+
+    # Returns the path to the 'data' folder of a specified table in the backup.
+    # The function can return None if there is no such folder in the backup.
+    # The table can be specified either as b.get_table_data_path(database='mydb', table='mytable')
+    # b.get_table_data_path(table='mydb.mytable') or b.get_table_data_path(table=('mydb', 'mytable'))
+    def get_table_data_path(self, table, database=None, shard="1", replica="1"):
+        if database is None:
+            database, table = Backup.__split_database_table(table)
+        suffix = (
+            "metadata/"
+            + Backup.__escape_for_filename(database)
+            + "/"
+            + Backup.__escape_for_filename(table)
+            + ".sql"
+        )
+        for path in self.__get_paths_in_backup(shard=shard, replica=replica):
+            if self.file_exists(path + suffix):
+                data_path = (
+                    path
+                    + "data/"
+                    + Backup.__escape_for_filename(database)
+                    + "/"
+                    + Backup.__escape_for_filename(table)
+                    + "/"
+                )
+                return data_path if self.dir_exists(data_path) else None
+        return None
+
+    # Returns the paths to files in the 'data' folder of a specified table in the backup.
+    # If any of the parameters 'part' and 'partition' is specified the function returns only the files related to that part or partition.
+    # The table can be specified either as b.get_table_data_files(database='mydb', table='mytable')
+    # b.get_table_data_files(table='mydb.mytable') or b.get_table_data_files(table=('mydb', 'mytable'))
+    def get_table_data_files(
+        self, table, database=None, part=None, partition=None, shard="1", replica="1"
+    ):
+        data_path = self.get_table_data_path(
+            table=table, database=database, shard=shard, replica=replica
+        )
+        if data_path is None:
+            return []
+        if (part is not None) and (partition is not None):
+            raise Exception(
+                "get_table_data_files: `only_part` and `only_partition` cannot be set together"
+            )
+        files = []
+        if part is not None:
+            files = self.get_files_in_dir(os.path.join(data_path, part), recursive=True)
+        elif partition is not None:
+            for part in self.get_parts(
+                table=table,
+                database=database,
+                partition=partition,
+                shard=shard,
+                replica=replica,
+            ):
+                files += self.get_files_in_dir(
+                    os.path.join(data_path, part), recursive=True
+                )
+        else:
+            files = self.get_files_in_dir(data_path, recursive=True)
+        return [data_path + file for file in files]
+
+    # Extracts the create query of a table or a database to a specified destination.
+    # The function returns a tuple (files_extracted, bytes_extracted).
+    # The function does nothing if there is no create query for such table or database in the backup.
+    def extract_create_query(
+        self, table=None, database=None, shard="1", replica="1", out=None, out_path=""
+    ):
+        file = self.get_create_query_path(
+            table=table, database=database, shard=shard, replica=replica
+        )
+        if file is None:
+            return (0, 0)
+        return self.extract_file(path=file, out=out, out_path=out_path)
+
+    def extract_table_metadata(
+        self, table, database=None, shard="1", replica="1", out=None, out_path=""
+    ):
+        return self.extract_create_query(
+            table=table,
+            database=database,
+            shard=shard,
+            replica=replica,
+            out=out,
+            out_path=out_path,
+        )
+
+    def extract_database_metadata(
+        self, database, shard="1", replica="1", out=None, out_path=""
+    ):
+        return self.extract_create_query(
+            database=database, shard=shard, replica=replica, out=out, out_path=out_path
+        )
+
+    # Extracts the data of a table or a database to a specified destination.
+    # The function returns a tuple (files_extracted, bytes_extracted).
+    # The function does nothing if there is no data for such table in the backup.
+    def extract_table_data(
+        self,
+        table,
+        database=None,
+        part=None,
+        partition=None,
+        shard="1",
+        replica="1",
+        out=None,
+        out_path="",
+    ):
+        files = self.get_table_data_files(
+            table=table,
+            database=database,
+            part=part,
+            partition=partition,
+            shard=shard,
+            replica=replica,
+        )
+        data_path = self.get_table_data_path(
+            table=table, database=database, shard=shard, replica=replica
+        )
+        return self.extract_files(
+            path=data_path,
+            files=Backup.__remove_prefix_path(files, data_path),
+            out=out,
+            out_path=out_path,
+        )
+
+    # Get low-level information about files in the backup.
+
+    # Returns a list of all files in the backup.
+    def get_files(self):
+        return self.get_files_in_dir(path="/", recursive=True)
+
+    # Returns True if a specified file exists in the backup.
+    def file_exists(self, path):
+        if not path.startswith("/"):
+            path = "/" + path
+        return path in self.__file_infos
+
+    # Returns True if a specified folder exists in the backup.
+    def dir_exists(self, path):
+        if not path.startswith("/"):
+            path = "/" + path
+        if not path.endswith("/"):
+            path += "/"
+        if path == "/":
+            return True
+        pos = bisect.bisect_left(self.__file_paths, path)
+        return (pos < len(self.__file_paths)) and self.__file_paths[pos].startswith(
+            path
+        )
+
+    # Returns the size of a file in the backup.
+    # The function raises an exception of the file doesn't exist.
+    def get_file_size(self, path):
+        fi = self.get_file_info(path)
+        return fi.size
+
+    # Returns the information about a file in the backup.
+    # The function raises an exception of the file doesn't exist.
+    def get_file_info(self, path):
+        if not path.startswith("/"):
+            path = "/" + path
+        fi = self.__file_infos.get(path)
+        if fi is None:
+            raise Exception(f"File {path} not found in backup {self}")
+        return fi
+
+    # Returns the information about multiple or all files files in the backup.
+    def get_file_infos(self, paths=None):
+        if paths is None:
+            return self.__file_infos.values()
+        return [self.get_file_info(path) for path in paths]
+
+    # Finds the information about a file in the backup by its checksum.
+    # The function raises an exception of the file doesn't exist.
+    def get_file_info_by_checksum(self, checksum):
+        fi = self.__file_infos_by_checksum.get(checksum)
+        if fi is None:
+            raise Exception(f"File with checksum={checksum} not found in backup {self}")
+        return fi
+
+    # Returns all files in a directory inside the backup.
+    def get_files_in_dir(self, path, recursive=False):
+        if not path.startswith("/"):
+            path = "/" + path
+        if not path.endswith("/"):
+            path += "/"
+        if path == "/" and recursive:
+            return self.__file_paths
+        pos = bisect.bisect_left(self.__file_paths, path)
+        files = []
+        while pos < len(self.__file_paths):
+            file = self.__file_paths[pos]
+            if not file.startswith(path):
+                break
+            file = file[len(path) :]
+            if recursive or (file.find("/") == -1):
+                files.append(file)
+            pos += 1
+        return files
+
+    # Returns all subdirectories in a directory inside the backup.
+    def get_subdirs(self, path):
+        if not path.startswith("/"):
+            path = "/" + path
+        if not path.endswith("/"):
+            path += "/"
+        pos = bisect.bisect_left(self.__file_paths, path)
+        subdirs = []
+        prev_subdir = ""
+        while pos < len(self.__file_paths):
+            file = self.__file_paths[pos]
+            if not file.startswith(path):
+                break
+            file = file[len(path) :]
+            sep = file.find("/")
+            if sep != -1:
+                subdir = file[:sep]
+                if subdir != prev_subdir:
+                    subdirs.append(subdir)
+                    prev_subdir = subdir
+            pos += 1
+        return subdirs
+
+    # Opens a file for reading from the backup.
+    def open_file(self, path):
+        fi = self.get_file_info(path)
+        if fi.size == 0:
+            return EmptyFileObj()
+        elif fi.base_size == 0:
+            return self.__reader.open_file(fi.data_file)
+        elif fi.size == fi.base_size:
+            base_fi = self.get_base_backup().get_file_info_by_checksum(fi.base_checksum)
+            return self.get_base_backup().open_file(base_fi.name)
+        else:
+            base_fi = self.get_base_backup().get_file_info_by_checksum(fi.base_checksum)
+            base_stream = self.get_base_backup().open_file(base_fi.name)
+            stream = self.__reader.open_file(fi.data_file)
+            return ConcatFileObj(base_stream, stream)
+
+    # Reads a file and returns its contents.
+    def read_file(self, path):
+        fi = self.get_file_info(path)
+        if fi.size == 0:
+            return b""
+        elif fi.base_size == 0:
+            return self.__reader.read_file(fi.data_file)
+        elif fi.size == fi.base_size:
+            base_fi = self.get_base_backup().get_file_info_by_checksum(fi.base_checksum)
+            return self.get_base_backup().read_file(base_fi.name)
+        else:
+            base_fi = self.get_base_backup().get_file_info_by_checksum(fi.base_checksum)
+            return self.get_base_backup().read_file(
+                base_fi.name
+            ) + self.__reader.read_file(fi.data_file)
+
+    # Extracts a file from the backup to a specified destination.
+    def extract_file(self, path, out=None, out_path="", make_dirs=True):
+        if (out is None) and (len(out_path) > 0):
+            return self.extract_file(path, out=out_path, make_dirs=make_dirs)
+
+        if TypeChecks.is_file_opened_for_writing(out):
+            ostream = out
+            fi = self.get_file_info(path)
+            with self.open_file(path) as istream:
+                shutil.copyfileobj(istream, ostream)
+            return ExtractionInfo(num_files=1, num_bytes=fi.size)
+
+        if TypeChecks.is_location_like(out):
+            with Location(out).create_writer() as writer:
+                return self.extract_file(
+                    path, out=writer, out_path=out_path, make_dirs=make_dirs
+                )
+
+        TypeChecks.check_is_writer(out)
+        writer = out
+
+        fi = self.get_file_info(path)
+
+        if make_dirs:
+            sep = out_path.rfind("/")
+            if sep != -1:
+                subdir = out_path[: sep + 1]
+                writer.make_dirs(subdir)
+
+        if fi.size == 0:
+            writer.create_empty_file(out_path)
+        elif fi.base_size == 0:
+            self.__reader.extract_file(fi.data_file, writer=writer, out_path=out_path)
+        elif fi.size == fi.base_size:
+            base_fi = self.get_base_backup().get_file_info_by_checksum(fi.base_checksum)
+            self.get_base_backup().extract_file(
+                path=base_fi.name, out=writer, out_path=out_path
+            )
+        else:
+            with self.open_file(path) as istream:
+                with writer.open_file(out_path) as ostream:
+                    shutil.copyfileobj(istream, ostream)
+
+        return ExtractionInfo(num_files=1, num_bytes=fi.size)
+
+    # Extracts multiple files from the backup to a specified destination.
+    def extract_files(self, path, files, out=None, out_path=""):
+        if (out is None) and (len(out_path) > 0):
+            return self.extract_files(path, files, out=out_path)
+
+        if TypeChecks.is_location_like(out):
+            with Location(out).create_writer() as writer:
+                return self.extract_files(path, files, out=writer, out_path=out_path)
+
+        TypeChecks.check_is_writer(out)
+        writer = out
+
+        subdirs = set()
+        for file in files:
+            sep = file.rfind("/")
+            if sep != -1:
+                subdirs.add(file[: sep + 1])
+        for subdir in subdirs:
+            writer.make_dirs(os.path.join(out_path, subdir))
+
+        extracted_files_info = ExtractionInfo()
+        for file in files:
+            extracted_files_info.add(
+                self.extract_file(
+                    os.path.join(path, file),
+                    out=writer,
+                    out_path=os.path.join(out_path, file),
+                    make_dirs=False,
+                )
+            )
+
+        return extracted_files_info
+
+    def extract_dir(self, path, out=None, out_path=""):
+        files = self.get_files_in_dir(path, recursive=True)
+        return self.extract_files(path=path, files=files, out=out, out_path=out_path)
+
+    def extract_all(self, out=None, out_path=""):
+        return self.extract_dir("/", out=out, out_path=out_path)
+
+    # Checks that all files in the backup exist and have the expected sizes.
+    def check_files(self):
+        data_files = {}
+        for fi in self.__file_infos.values():
+            if fi.size > fi.base_size:
+                data_file = fi.data_file
+                if data_file in data_files:
+                    prev_fi = data_files[data_file]
+                    if (
+                        (fi.size != prev_fi.size)
+                        or (fi.checksum != prev_fi.checksum)
+                        or (fi.use_base != prev_fi.use_base)
+                        or (fi.base_size != prev_fi.base_size)
+                        or (fi.base_checksum != prev_fi.base_checksum)
+                        or (fi.encrypted_by_disk != prev_fi.encrypted_by_disk)
+                    ):
+                        raise Exception(
+                            f"Files {prev_fi.name} and {fi.name} uses the same data file but their file infos are different: {prev_fi} and {fi}, backup: {self}"
+                        )
+                else:
+                    data_files[data_file] = fi
+
+            if fi.base_size > 0:
+                fi_base = self.get_base_backup().get_file_info_by_checksum(
+                    fi.base_checksum
+                )
+                if fi.base_size != fi_base.size:
+                    raise Exception(
+                        f"Size of file {fi_base.name} in the base backup is different ({fi.base_size} != {fi_base.size}) "
+                        f"from it's base size in this backup, backup={self}, base_backup={self.get_base_backup()}"
+                    )
+                if fi.size < fi_base.size:
+                    raise Exception(
+                        f"File {fi.name} has a smaller size ({fi.size} < {fi_base.size}) than the size of a corresponding file {fi_base.name} "
+                        f"in the base backup, backup={self}, base_backup={self.get_base_backup()}"
+                    )
+
+        for fi in data_files.values():
+            if not self.__reader.file_exists(fi.data_file):
+                raise Exception(
+                    f"File {fi.data_file} must exist but not found inside backup {self} "
+                )
+            actual_size = self.__reader.get_file_size(fi.data_file)
+            expected_size = fi.size - fi.base_size
+            if actual_size != expected_size:
+                raise Exception(
+                    f"File {fi.data_file} has unexpected size {actual_size} != {expected_size} inside backup {self}"
+                )
+
+        if self.get_base_backup() is not None:
+            self.get_base_backup().check_files()
+
+    def __parse_backup_metadata(self):
+        metadata_str = self.__reader.read_file(".backup")
+
+        xmlroot = ET.fromstring(metadata_str)
+
+        version_node = xmlroot.find("version")
+        self.__version = int(version_node.text) if (version_node is not None) else None
+
+        timestamp_node = xmlroot.find("timestamp")
+        self.__timestamp = timestamp_node.text if (timestamp_node is not None) else None
+
+        if self.__base_backup is None:
+            base_backup_node = xmlroot.find("base_backup")
+            if base_backup_node is not None:
+                self.__base_backup = Location(base_backup_node.text)
+
+        self.__file_infos = {}
+        self.__file_infos_by_checksum = {}
+        self.__file_paths = []
+
+        contents = xmlroot.find("contents")
+        for file in contents:
+            name = file.find("name").text
+            if not name.startswith("/"):
+                name = "/" + name
+
+            fi = FileInfo(name)
+            fi.size = int(file.find("size").text)
+
+            if fi.size != 0:
+                checksum_node = file.find("checksum")
+                fi.checksum = checksum_node.text
+
+                encrypted_by_disk_node = file.find("encrypted_by_disk")
+                if encrypted_by_disk_node is not None:
+                    fi.encrypted_by_disk = encrypted_by_disk_node.text == "true"
+
+                base_size_node = file.find("base_size")
+                if base_size_node is not None:
+                    fi.base_size = int(base_size_node.text)
+                else:
+                    use_base_node = file.find("use_base")
+                    if (use_base_node is not None) and (use_base_node.text == "true"):
+                        fi.base_size = fi.size
+
+                if fi.base_size > 0:
+                    fi.use_base = True
+
+                if fi.use_base:
+                    if fi.base_size == fi.size:
+                        fi.base_checksum = fi.checksum
+                    else:
+                        base_checksum_node = file.find("base_checksum")
+                        fi.base_checksum = base_checksum_node.text
+
+                if fi.size > fi.base_size:
+                    data_file_node = file.find("data_file")
+                    data_file = (
+                        data_file_node.text if (data_file_node is not None) else fi.name
+                    )
+                    if not data_file.startswith("/"):
+                        data_file = "/" + data_file
+                    fi.data_file = data_file
+
+            self.__file_infos[fi.name] = fi
+            if fi.size > 0:
+                self.__file_infos_by_checksum[fi.checksum] = fi
+            self.__file_paths.append(fi.name)
+
+        metadata_fi = FileInfo("/.backup")
+        metadata_fi.size = len(metadata_str)
+        metadata_fi.data_file = metadata_fi.name
+        self.__file_infos[metadata_fi.name] = metadata_fi
+        self.__file_paths.append(metadata_fi.name)
+
+        self.__file_paths.sort()
+
+    def __get_paths_in_backup(self, shard, replica):
+        paths = []
+        if self.dir_exists(f"/shards/{shard}/replicas/{replica}/metadata/"):
+            paths.append(f"/shards/{shard}/replicas/{replica}/")
+        if self.dir_exists(f"/shards/{shard}metadata/"):
+            paths.append(f"/shards/{shard}/")
+        if self.dir_exists(f"/replicas/{replica}/metadata/"):
+            paths.append(f"/replicas/{replica}/")
+        if self.dir_exists(f"/metadata/"):
+            paths.append(f"/")
+        return paths
+
+    def __split_database_table(table):
+        if isinstance(table, tuple):
+            return table[0], table[1]
+        elif isinstance(table, str) and (table.find(".") != -1):
+            return table.split(".", maxsplit=1)
+
+    def __remove_prefix_path(files, prefix_path):
+        for file in files:
+            if not file.startswith(prefix_path):
+                raise Exception(
+                    f"remove_prefix_path: File '{file}' doesn't have the expected prefix '{prefix_path}'"
+                )
+        return [file[len(prefix_path) :] for file in files]
+
+    def __escape_for_filename(text):
+        res = ""
+        for c in text:
+            if (c.isascii() and c.isalnum()) or c == "_":
+                res += c
+            else:
+                for b in c.encode("utf-8"):
+                    res += f"%{b:X}"
+        return res
+
+    def __unescape_for_filename(text):
+        res = b""
+        i = 0
+        while i < len(text):
+            c = text[i]
+            if c == "%" and i + 2 < len(text):
+                res += bytes.fromhex(text[i + 1 : i + 3])
+                i += 3
+            else:
+                res += c.encode("ascii")
+                i += 1
+        return res.decode("utf-8")
+
+    def __extract_partition_id_from_part_name(part_name):
+        underscore = part_name.find("_")
+        if underscore <= 0:
+            return None
+        return part_name[:underscore]
+
+
+# Information about a single file inside a backup.
+class FileInfo:
+    def __init__(
+        self,
+        name,
+        size=0,
+        checksum="00000000000000000000000000000000",
+        data_file="",
+        use_base=False,
+        base_size=0,
+        base_checksum="00000000000000000000000000000000",
+        encrypted_by_disk=False,
+    ):
+        self.name = name
+        self.size = size
+        self.checksum = checksum
+        self.data_file = data_file
+        self.use_base = use_base
+        self.base_size = base_size
+        self.base_checksum = base_checksum
+        self.encrypted_by_disk = encrypted_by_disk
+
+    def __repr__(self):
+        res = "FileInfo("
+        res += f"name='{self.name}'"
+        res += f", size={self.size}"
+        if self.checksum != "00000000000000000000000000000000":
+            res += f", checksum='{self.checksum}'"
+        if self.data_file:
+            res += f", data_file='{self.data_file}'"
+        if self.use_base:
+            res += f", use_base={self.use_base}"
+            res += f", base_size={self.base_size}"
+            res += f", base_checksum='{self.base_checksum}'"
+        if self.encrypted_by_disk:
+            res += f", encrypted_by_disk={self.encrypted_by_disk}"
+        res += ")"
+        return res
+
+    def __eq__(self, other):
+        if not isinstance(other, FileInfo):
+            return False
+        return (
+            (self.name == other.name)
+            and (self.size == other.size)
+            and (self.checksum == other.checksum)
+            and (self.data_file == other.data_file)
+            and (self.use_base == other.use_base)
+            and (self.base_size == other.base_size)
+            and (self.base_checksum == other.base_checksum)
+            and (self.encrypted_by_disk == other.encrypted_by_disk)
+        )
+
+
+# Information about extracted files.
+class ExtractionInfo:
+    def __init__(self, num_files=0, num_bytes=0):
+        self.num_files = num_files
+        self.num_bytes = num_bytes
+
+    def __repr__(self):
+        return f"ExtractionInfo(num_files={self.num_files}, num_bytes={self.num_bytes})"
+
+    def __eq__(self, other):
+        if not isinstance(other, ExtractionInfo):
+            return False
+        return self.num_files == other.num_files and self.num_bytes == other.num_bytes
+
+    def add(self, other):
+        self.num_files += other.num_files
+        self.num_bytes += other.num_bytes
+
+
+# File('<path>') can be used to specify the location of a backup or a destination for extracting data.
+class File:
+    def __init__(self, path):
+        self.path = path
+
+    def __repr__(self):
+        return f"File('{self.path}')"
+
+
+# S3('<uri>', '<access_key_id>', '<secret_access_key>') can be used to specify the location of a backup.
+class S3:
+    def __init__(self, uri, access_key_id=None, secret_access_key=None):
+        self.uri = uri
+        self.access_key_id = access_key_id
+        self.secret_access_key = secret_access_key
+
+    def __repr__(self):
+        str = f"S3('{self.uri}'"
+        if self.access_key_id:
+            str += f", '{self.access_key_id}'"
+        if self.secret_access_key:
+            str += f", '{self.secret_access_key}'"
+        str += ")"
+        return str
+
+
+####################################################################################################
+# Implementation - helper classes and functions.
+
+
+# Helps to check types.
+class TypeChecks:
+    def is_location_like(obj):
+        return Location.can_init_from(obj)
+
+    def is_file_opened_for_reading(obj):
+        return callable(getattr(obj, "read", None))
+
+    def is_file_opened_for_writing(obj):
+        return callable(getattr(obj, "write", None))
+
+    def is_reader(obj):
+        return (
+            isinstance(obj, FileReader)
+            or isinstance(obj, S3Reader)
+            or isinstance(obj, ZipReader)
+        )
+
+    def is_writer(obj):
+        return isinstance(obj, FileWriter)
+
+    def check_is_writer(obj):
+        if TypeChecks.is_writer(obj):
+            return
+        raise Exception(f"{obj} is not a writer")
+
+
+# Helps to represents either File() or S3() location and to parse them from a string.
+class Location:
+    def __init__(self, obj):
+        self.__location = None
+        if isinstance(obj, Location):
+            self.__location = obj.__location
+        elif isinstance(obj, File) or isinstance(obj, S3):
+            self.__location = obj
+        elif isinstance(obj, str) and len(obj) > 0:
+            self.__location = Location.__parse_location(obj)
+        else:
+            raise Exception("Cannot parse a location from {obj}")
+
+    def can_init_from(obj):
+        if isinstance(obj, Location):
+            return True
+        elif isinstance(obj, File) or isinstance(obj, S3):
+            return True
+        elif isinstance(obj, str) and len(obj) > 0:
+            return True
+        else:
+            return False
+
+    def __repr__(self):
+        return repr(self.__location)
+
+    def create_reader(self):
+        if isinstance(self.__location, File):
+            path = self.__location.path
+            path, zip_filename = Location.__split_filename_if_archive(path)
+            reader = FileReader(path)
+            if zip_filename is not None:
+                reader = ZipReader(reader, zip_filename)
+            return reader
+
+        if isinstance(self.__location, S3):
+            uri = self.__location.uri
+            uri, zip_filename = Location.__split_filename_if_archive(uri)
+            reader = S3Reader(
+                uri,
+                self.__location.access_key_id,
+                self.__location.secret_access_key,
+            )
+            if zip_filename is not None:
+                reader = ZipReader(reader, zip_filename)
+            return reader
+
+        raise Exception(f"Couldn't create a reader from {self}")
+
+    def create_writer(self):
+        if isinstance(self.__location, File):
+            return FileWriter(self.__location.path)
+
+        raise Exception(f"Couldn't create a writer to {self}")
+
+    def __parse_location(desc):
+        startpos = len(desc) - len(desc.lstrip())
+
+        opening_parenthesis = desc.find("(", startpos)
+        if opening_parenthesis == -1:
+            endpos = len(desc.rstrip())
+            if startpos == endpos:
+                raise Exception(
+                    f"Couldn't parse a location from '{desc}': empty string"
+                )
+            return File(desc[startpos:endpos])
+
+        closing_parenthesis = desc.find(")", opening_parenthesis)
+        if closing_parenthesis == -1:
+            raise Exception(
+                f"Couldn't parse a location from '{desc}': No closing parenthesis"
+            )
+
+        name = desc[startpos:opening_parenthesis]
+        args = desc[opening_parenthesis + 1 : closing_parenthesis].split(",")
+        args = [Location.__unquote_argument(arg.strip()) for arg in args]
+        endpos = closing_parenthesis + 1
+
+        if name == "File":
+            if len(args) == 1:
+                return File(args[0])
+            else:
+                raise Exception(
+                    f"Couldn't parse a location from '{desc}': File(<path>) requires a single argument, got {len(args)} arguments"
+                )
+
+        if name == "S3":
+            if len(args) == 1:
+                return S3(args[0])
+            elif len(args) == 2:
+                return S3(args[0], args[1])
+            elif len(args) == 3:
+                return S3(args[0], args[1], args[2])
+            else:
+                raise Exception(
+                    f"Couldn't parse a location from '{desc}': S3(<uri> [, <access_key_id>, <secret_access_key>]) requires from 1 to 3 arguments, got {len(args)} arguments"
+                )
+
+        raise Exception(
+            f"Couldn't parse a location from '{desc}': Unknown type {name} (only File and S3 are supported)"
+        )
+
+    def __unquote_argument(arg):
+        if arg.startswith("'"):
+            return arg.strip("'")
+        elif arg.startswith('"'):
+            return arg.strip('"')
+        else:
+            return arg
+
+    def __split_filename_if_archive(path):
+        is_archive = path.endswith(".zip") or path.endswith(".zipx")
+        if not is_archive:
+            return path, None
+        sep = path.rfind("/")
+        if sep == -1:
+            return "", path
+        return path[: sep + 1], path[sep + 1 :]
+
+
+# Represents an empty file object.
+class EmptyFileObj:
+    def close(self):
+        pass
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
+    def read(self, count=None):
+        return b""
+
+
+# Represent a file object which concatenate data from two file objects.
+class ConcatFileObj:
+    def __init__(self, fileobj1, fileobj2):
+        self.__fileobj1 = fileobj1
+        self.__fileobj2 = fileobj2
+        self.__first_is_already_read = False
+
+    def close(self):
+        if self.__fileobj1 is not None:
+            self.__fileobj1.close()
+            self.__fileobj1 = None
+        if self.__fileobj2 is not None:
+            self.__fileobj2.close()
+            self.__fileobj2 = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def read(self, count=None):
+        read_data = b""
+
+        if count != 0 and not self.__first_is_already_read:
+            read_data += self.__fileobj1.read(count)
+            if (count is None) or (count > len(read_data)):
+                self.__first_is_already_read = True
+            if count is not None:
+                count -= len(read_data)
+
+        if count != 0:
+            read_data += self.__fileobj2.read(count)
+
+        return read_data
+
+
+# Helps to read a File() backup.
+class FileReader:
+    def __init__(self, root_path):
+        self.__root_path = root_path
+
+    def close(self):
+        pass
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
+    def file_exists(self, path):
+        return os.path.isfile(self.get_abs_path(path))
+
+    def get_file_size(self, path):
+        return os.path.getsize(self.get_abs_path(path))
+
+    def read_file(self, path):
+        with self.open_file(path) as f:
+            return f.read()
+
+    def open_file(self, path):
+        return open(self.get_abs_path(path), "rb")
+
+    def extract_file(self, path, writer, out_path):
+        if isinstance(writer, FileWriter):
+            shutil.copyfile(self.get_abs_path(path), writer.get_abs_path(out_path))
+        else:
+            with self.open_file(path) as istream:
+                with writer.open_file(out_path) as ostream:
+                    shutil.copyfileobj(istream, ostream)
+
+    def get_abs_path(self, path):
+        if path.startswith("/"):
+            path = path[1:]
+        return os.path.join(self.__root_path, path)
+
+
+# Helps to extract files to a File() destination.
+class FileWriter:
+    def __init__(self, root_path):
+        self.__root_path = root_path
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
+    def open_file(self, path):
+        return open(self.get_abs_path(path), "wb")
+
+    def create_empty_file(self, path):
+        with self.open_file(path) as file:
+            pass
+
+    def make_dirs(self, path):
+        abs_path = self.get_abs_path(path)
+        if not os.path.isdir(abs_path):
+            os.makedirs(abs_path)
+
+    def get_abs_path(self, path):
+        if path.startswith("/"):
+            path = path[1:]
+        return os.path.join(self.__root_path, path)
+
+
+# Helps to read a S3() backup.
+class S3Reader:
+    def __init__(self, uri, access_key_id, secret_access_key):
+        s3_uri = S3URI(uri)
+        self.__bucket = s3_uri.bucket
+        self.__key = s3_uri.key
+        self.__client = None
+
+        try:
+            self.__client = boto3.client(
+                "s3",
+                endpoint_url=s3_uri.endpoint,
+                aws_access_key_id=access_key_id,
+                aws_secret_access_key=secret_access_key,
+            )
+        except:
+            self.close()
+            raise
+
+    def close(self):
+        if self.__client is not None:
+            self.__client.close()
+            self.__client = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def file_exists(self, path):
+        try:
+            self.__client.head_object(Bucket=self.__bucket, Key=self.get_key(path))
+            return True
+        except botocore.exceptions.ClientError as e:
+            if e.response["Error"]["Code"] == "404":
+                return False
+            else:
+                raise
+
+    def get_file_size(self, path):
+        response = self.__client.head_object(
+            Bucket=self.__bucket, Key=self.get_key(path)
+        )
+        return response["ContentLength"]
+
+    def read_file(self, path):
+        with self.open_file(path) as f:
+            return f.read()
+
+    def open_file(self, path):
+        response = self.__client.get_object(
+            Bucket=self.__bucket, Key=self.get_key(path)
+        )
+        return response["Body"]
+
+    def extract_file(self, path, writer, out_path):
+        if isinstance(writer, FileWriter):
+            self.__client.download_file(
+                Bucket=self.__bucket,
+                Key=self.get_key(path),
+                Filename=writer.get_abs_path(out_path),
+            )
+        else:
+            with writer.open_file(out_path) as ostream:
+                self.__client.download_fileobj(
+                    Bucket=self.__bucket, Key=self.get_key(path), Fileobj=ostream
+                )
+
+    def get_key(self, path):
+        if path.startswith("/"):
+            path = path[1:]
+        return self.__key + "/" + path
+
+
+# Parses a S3 URI with detecting endpoint, bucket name, and key.
+class S3URI:
+    def __init__(self, uri):
+        parsed_url = urlparse(uri, allow_fragments=False)
+        if not self.__parse_virtual_hosted(parsed_url) and not self.__parse_path_style(
+            parsed_url
+        ):
+            raise Exception(f"S3URI: Could not parse {uri}")
+
+    # https://bucket-name.s3.Region.amazonaws.com/key
+    def __parse_virtual_hosted(self, parsed_url):
+        host = parsed_url.netloc
+        if host.find(".s3") == -1:
+            return False
+        self.bucket, new_host = path.split(".s3", maxsplit=1)
+        if len(self.bucket) < 3:
+            return False
+        new_host = "s3" + new_host
+        self.endpoint = parsed_url.scheme + "://" + new_host
+        path = parsed_url.path
+        if path.startswith("/"):
+            path = path[1:]
+        if path.endswith("/"):
+            path = path[:-1]
+        self.key = path
+        return True
+
+    # https://s3.Region.amazonaws.com/bucket-name/key
+    def __parse_path_style(self, parsed_url):
+        self.endpoint = parsed_url.scheme + "://" + parsed_url.netloc
+        path = parsed_url.path
+        if path.startswith("/"):
+            path = path[1:]
+        if path.endswith("/"):
+            path = path[:-1]
+        if path.find("/") == -1:
+            self.bucket = path
+            self.key = ""
+        else:
+            self.bucket, self.key = path.split("/", maxsplit=1)
+        if len(self.bucket) < 3:
+            return False
+        return True
+
+
+# Helps to read a backup from a zip-archive.
+class ZipReader:
+    def __init__(self, base_reader, archive_name):
+        self.__base_reader = None
+        self.__zipfileobj = None
+        self.__zipfile = None
+
+        try:
+            self.__base_reader = base_reader
+            self.__zipfileobj = base_reader.open_file(archive_name)
+            self.__zipfile = zipfile.ZipFile(self.__zipfileobj)
+        except:
+            self.close()
+            raise
+
+    def close(self):
+        if self.__zipfile is not None:
+            self.__zipfile.close()
+            self.__zipfile = None
+        if self.__zipfileobj is not None:
+            self.__zipfileobj.close()
+            self.__zipfileobj = None
+        if self.__base_reader is not None:
+            self.__base_reader.close()
+            self.__base_reader = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def file_exists(self, path):
+        return self.__get_zippath(path).is_file()
+
+    def get_file_size(self, path):
+        return self.__get_zipinfo(path).file_size
+
+    def read_file(self, path):
+        return self.__get_zippath(path).read_bytes()
+
+    def open_file(self, path):
+        return self.__get_zippath(path).open(mode="rb")
+
+    def extract_file(self, path, writer, out_path):
+        with self.open_file(path) as istream:
+            with writer.open_file(out_path) as ostream:
+                shutil.copyfileobj(istream, ostream)
+
+    def __get_zippath(self, path):
+        if path.startswith("/"):
+            path = path[1:]
+        return zipfile.Path(self.__zipfile, path)
+
+    def __get_zipinfo(self, path):
+        if path.startswith("/"):
+            path = path[1:]
+        return self.__zipfile.getinfo(path)
diff --git a/utils/backupview/test/test.py b/utils/backupview/test/test.py
new file mode 100755
index 00000000000..dc6a32d384d
--- /dev/null
+++ b/utils/backupview/test/test.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+
+# Tests for the backupview utility.
+# Use pytest ./test.py to run.
+
+import pytest
+
+import os.path
+import sys
+import tempfile
+import pathlib
+
+script_dir = os.path.dirname(os.path.realpath(__file__))
+backupview_dir = os.path.abspath(os.path.join(script_dir, ".."))
+if backupview_dir not in sys.path:
+    sys.path.append(backupview_dir)
+from backupview import open_backup, FileInfo, ExtractionInfo
+
+
+def calculate_num_files_and_total_size(dir):
+    count = 0
+    total_size = 0
+    for root, _, files in os.walk(dir, topdown=False):
+        for name in files:
+            if name:
+                count += 1
+                total_size += os.path.getsize(os.path.join(dir, root, name))
+    return (count, total_size)
+
+
+###########################################################################################
+# Actual tests
+
+
+def test_backup_1():
+    with open_backup(os.path.join(script_dir, "test_backup_1.zip")) as b:
+        assert b.get_subdirs("/") == ["shards"]
+        assert b.dir_exists("/shards")
+        assert not b.file_exists("/shards")
+        assert b.get_subdirs("/shards/") == ["1"]
+        assert b.get_subdirs("/shards/1/replicas/1") == ["data", "metadata"]
+        assert b.get_subdirs("/shards/1/replicas/1/metadata/") == ["mydb"]
+        assert b.get_files_in_dir("/shards/1/replicas/1/metadata/") == ["mydb.sql"]
+        assert b.file_exists("/shards/1/replicas/1/metadata/mydb.sql")
+
+        assert b.get_subdirs("/shards/1/replicas/1/data/mydb/tbl1") == [
+            "all_0_0_0",
+            "all_1_1_0",
+            "all_2_2_0",
+            "all_3_3_0",
+        ]
+
+        assert b.get_files_in_dir("/shards/1/replicas/1/data/mydb/tbl1/all_0_0_0") == [
+            "checksums.txt",
+            "columns.txt",
+            "count.txt",
+            "data.bin",
+            "data.cmrk3",
+            "default_compression_codec.txt",
+            "metadata_version.txt",
+            "serialization.json",
+        ]
+
+        assert b.get_databases() == ["mydb"]
+        assert b.get_tables(database="mydb") == ["tbl1", "tbl2"]
+        assert b.get_tables() == [("mydb", "tbl1"), ("mydb", "tbl2")]
+
+        assert b.get_create_query(database="mydb").startswith("CREATE DATABASE mydb")
+
+        assert b.get_create_query(table="mydb.tbl1").startswith(
+            "CREATE TABLE mydb.tbl1"
+        )
+
+        assert b.get_create_query(table=("mydb", "tbl1")).startswith(
+            "CREATE TABLE mydb.tbl1"
+        )
+
+        assert b.get_create_query(database="mydb", table="tbl2").startswith(
+            "CREATE TABLE mydb.tbl2"
+        )
+
+        assert b.get_partitions(table="mydb.tbl2") == ["all"]
+        assert b.get_parts(table="mydb.tbl2") == [
+            "all_0_0_0",
+            "all_1_1_0",
+            "all_2_2_0",
+        ]
+
+        assert b.get_parts(table="mydb.tbl2", partition="all") == [
+            "all_0_0_0",
+            "all_1_1_0",
+            "all_2_2_0",
+        ]
+
+        assert (
+            b.get_table_data_path(table="mydb.tbl1")
+            == "/shards/1/replicas/1/data/mydb/tbl1/"
+        )
+
+        assert b.get_table_data_files(table="mydb.tbl1", part="all_1_1_0") == [
+            "/shards/1/replicas/1/data/mydb/tbl1/checksums.txt",
+            "/shards/1/replicas/1/data/mydb/tbl1/columns.txt",
+            "/shards/1/replicas/1/data/mydb/tbl1/count.txt",
+            "/shards/1/replicas/1/data/mydb/tbl1/data.bin",
+            "/shards/1/replicas/1/data/mydb/tbl1/data.cmrk3",
+            "/shards/1/replicas/1/data/mydb/tbl1/default_compression_codec.txt",
+            "/shards/1/replicas/1/data/mydb/tbl1/metadata_version.txt",
+            "/shards/1/replicas/1/data/mydb/tbl1/serialization.json",
+        ]
+
+        assert (
+            b.read_file(
+                "/shards/1/replicas/1/data/mydb/tbl1/all_1_1_0/default_compression_codec.txt"
+            )
+            == b"CODEC(LZ4)"
+        )
+
+        with b.open_file(
+            "/shards/1/replicas/1/data/mydb/tbl1/all_1_1_0/default_compression_codec.txt"
+        ) as f:
+            assert f.read() == b"CODEC(LZ4)"
+
+        assert b.get_file_info(
+            "/shards/1/replicas/1/data/mydb/tbl1/all_1_1_0/default_compression_codec.txt"
+        ) == FileInfo(
+            name="/shards/1/replicas/1/data/mydb/tbl1/all_1_1_0/default_compression_codec.txt",
+            size=10,
+            checksum="557036eda0fb0a277a7caf9b9c8d4dd6",
+            data_file="/shards/1/replicas/1/data/mydb/tbl1/all_0_0_0/default_compression_codec.txt",
+        )
+
+        temp_dir = tempfile.TemporaryDirectory()
+        temp_dir_path = temp_dir.name
+
+        assert b.extract_table_data(
+            table="mydb.tbl1", out=temp_dir_path
+        ) == ExtractionInfo(num_files=32, num_bytes=1728)
+
+        assert calculate_num_files_and_total_size(temp_dir_path) == (32, 1728)
+        temp_dir.cleanup()
diff --git a/utils/backupview/test/test_backup_1.zip b/utils/backupview/test/test_backup_1.zip
new file mode 100644
index 0000000000000000000000000000000000000000..658c358536431c0070d114f1b4194b9e3196d0f2
GIT binary patch
literal 10192
zcmcIp2{@E%`=4QC2_Yfb%2G<qAcRDn97|*wl5NZkj%92!I5~-Ar~0y#gmSWEOR}_Z
zbn1i%i7Xw8vb0bqe3Fyod&hVUZ<^-y|M$-IT$juBKKJi_?&Z1f`}dj|v!VnMEF2t&
zoOt(xLrvt<Iw%C9MhSsnM<5W21U%6*!1tZ)JMRHCzTh*rb^C|LDQk}7WNi`~FC4j^
zWJ_-CeVp>8J}2t<rdCXfF7HHii44W{*7*e0;kKFUMIpGr(1v2%GoO?KJ8j-7g`n9I
zM_Gx%;-bzr$;nfpZ=*W&KZU8UF-Enk8$CnJ<bD>;R^qCxef*F2pSYo%Y#Fy$pFJhM
z#G2@P^<7C=Zqd@w7excVJ4CA22j(S-y|fLq>#Z-HeNgpOshzO-XPIdH%P`@zP_jL@
z-xZ-h9Q*F#?Xs^X-u^Ic5~}rimw)_#r2outmBJXh*{^+<M7cs{kqt_qG=1MRDUwSd
zniT%<%<O;mR@HL0IXitEnQ_&Ma``M`l`RruV}9K1kN@%~n8~x?5{2F3JpL5dKi4i|
zfh^pQQL`cLNIH-m8%el+Hqn@8qj=!cx{gVf1c|E|JJ#)GQ4&SnDNLkf$0^(FlPmGz
zk@pJhn;2?~<j-^dTVp6s(C{bziWk{jB{gn)4BG{g1$(bXSNZ=L^u#dqX4Hs=v8d83
zmD)0i@}wrSP3YfRbo_odkatch^NgWLvq}c`W<<VZdzM=&8!y0Df!=vApHQuqaaeE2
zsIjp$<yl-;;d^bd%$e3c+@wQ^wC}0N+I#`5^-c{dB{tL*dn?{mY;CCYf*#M>6UxVZ
z{f&aS6}c)!Y~0OSMb&PXqbAh|futnq+&68XCM0(sD6LjAb)8~0(h5;pXKR|=@H0BL
z`rg~)W185MasgDhA<y)%S41G8b#`?wz9aMu>43}3>{R!<{*y=Xn`T~_R}g2eho~e>
zP%>@J>ME-{Ym?1fjI((>-rdv~!EUWPauQkNw6<a=Yub(YU5V+1@flOq?X~6dTf>Pa
zw+@@LVXR*~YCXIv&`|wt;i0N0sr{GZGr0V`6axuItrT-dCWn8)q+}Og6ST0{w~Ekk
z+s#h$5lYHpgRs_8f%bWF31x&0!zV3_OjC!2&J}Ui$qj9o>?m@0yMOF)t!d;<HDRr5
zw*`dnHH^g+S6&iNNOHP4&L*|3+taM5wY6mx|J3AI{dAScZNKjEUd5=jm(n7v3!Czt
zix0S*R^X@+BRbWm?f`U5trWTaGv5Ti19zxH+eDZ>cj%$xAMV<dj1<pm7-G*Rcdl3c
zdw=bx=XW()`14M7H)#vsOMOMq*eKj6OW3$!we{akx}1f+2(1F1vkq!J(;l6Qld16z
zg8kk0Zwttn^z`^!9Q_XMZqkNQiX>IhEEC1&mhJ-tm6lrHm$yVr>ub~FWV5a}Y+H?L
z?J1BuWS5pGG*W`cHFwwQP_D)Au}_O{-X(mb=&4^+&zF`DLxfEyaS|+c7$l3FSx$+7
za-AiHAV5eR9Uo=6moOv}BOcG0&26*ekKQYuBV?K8ebP0<Sq}&0amgo+J&!;3lV>=W
ztW4H6QG9(v)N@p<N7{cWLbnfp+7#mVC2$sJHh0A!I1qDJ3~;L~Jk%?O;)W+XQ<T2_
z5-0s741?DM7QaSz@g=zv@r=X@F8e+sg!SLp;f(jkGf+%|(KfFjX99!0(%;?dPar8X
z5U`y&0eBJ#rwshi^P<3dAXjSF*ae7a1C}OSQA9Tvq9-N5i=ya%*dN)3*gm`PA1PNK
zvKQW8D$s>Yarg1olv3pixax>*Oe@KIxYo6N@csqV@mv5e9av*pC{ChJfVcmg7*e`b
z^_%la!TFY0n+ZX-Sp)~w{E7s3ZwbQCJ^LLF;sL8U*WKUI_9q^R5{@$&J#orykCcPY
zUaW1K4wnLAREOwA_EbSP{HJ_`OR`9{H1+MQJ3hM>iJq`}Fc?vS7`GnncpcFDQBQ#4
zLUzZK+<(LS11c(dP<*_tc#-EFg;*1V@lRUABwr4DAZTF=noC`D+Y73@scQ$Y=3^V9
zv=EONrMYOCbNig@=#~f1L216xBTnuz@;No~@!0m%46U>&5fKraV^8#W4G{By%Ow*G
zSEF&j58~=4AAOWp;A)zv1;;YwY7Lz9K9y-HywFym@_pwV&%$@LW;ah<RrPZ-?y>Bd
zB5}*VkSIOLvNfk~xn)W}QF}C_+7<evU7dBxDsBa?rkM+LwTgpS`9Hk@dh6Hp2)e$X
zYPkWrI&Ag-m#Z;2%vY|y7&|_;0$0;S!E?1t)P0}6j&m2<8<TshSNq1pcXfN`#f)61
zqknN8-8^phZ)qPA6(k<jq3sZ~$Fgg;znQj?z>jwImwuyHD{wW<T%fBP-8hewJ#*<2
zyjav_7i<|Cs5*v0qe~Fi!Z}Ve%4()60HdnK)U|+7qTfSHuQYBoP7<tGSu6@(toS^|
zJ={H3&|w)pq~oaj!VZ3{+{?}US<Ab6lVnKWPLD@psz{X1FGp_52?iG&&?l#E;p^%B
z(Xmn(o0lRErvfZC7Z@wkTXJ0bvd!;{)}BtR9B#QGX%fuM&8?vO_71nq|Fr<n@Rln8
zVwWrifLjM)1+3l76tBgizOI#p)<zFo<eQtfS(nBYTR(W%WNcn)K{hloR%n(tZ!>>j
z++@PWg*|pI=hm1!A7>_kHAdGwWanw3&4$7(>{(lEdg?{?<N`KGh27ZKGr<ZdhXj@$
zTqV6+{P9#9<L28AYOzuQlMk%n6%{q&+<;^*U>U(haCULU2ax=6L?16-vI~XU+W>z%
zyAY{Rl*wQH0y;-ym0MAeQz`A&3|>K!t)%vP_nTI**Z1gD6m<?1<OhquPqNDf>m$ui
z_ssYqcn8?E8?Z=JZ9&Bo+w85ZCtHN33U0XZ{^%r2u$%0Gbs;Z~R4~cP(yv}S^x?DL
zi~PHBrtiD{2z^|0|IVE|EH%6*bXg$&x$*vKI2d(maARVjcU6J9y=-=UBtWzv(7;lM
zD|YT2bG@vJPW3zN8*jUcy+xw$f8FiyZ|HFrR@QJNpC_qfw$RCp>m^#b9)o5_W1LN#
zRzZ&D5;{{xEfG{bG~nt$^dSX!dC&Ft#`DQ;(5UplM8SNE@%F6`n_9{&(1XfYgQ)sL
zezI7ZEr(y85!v8}oL6DRO)Slw%XKy+%I^UyNTii?HlsY{p~($3$#>b&Y@!3~zVH)i
z|CUpZ<FS;8PQvB=B`Fz2NEBy;Yw#(918*<?rT=1c>{XE;ok-gw6p~huNJ~2#q9b<v
z2Hx)S&GY9*que)R530|1HmqwIb~Z2?6BV4wS52%=j$FhFTul=NuQ9PwO&{ak$rf3~
zo4Pe!C)@$PtA*_&unNVx_@*~uIjy@&-H@od&nKRnb`o#+*Z;F6S{+sRqg^fh;2vfL
zjS0<M;GIeAO$xdgbg^zY+(I*YpQX?jkA9+)lM|A_&c`UJskqCH!eBjyOrpmcA0G$=
zwuAvo_Iq2Yle8kmk3``m2XOmY`TBAD`IET=t?IArH?-j3`QUGEc0kQYRUK_&(qY2F
zFBj{cUn%5j=vo=2FR$Ma>#8rO?<zmcRUGoqg^*&d>s)&IY&%mMVrBE?MTb8%=(+~#
zhq(I{=+ysoG(eo~!*KtJnoG&H=rWE=H?1AkGPuY#F{3!-;A*>MwUdE;7l5^usr>+j
zfT5nAr|B+NxReOu`N4mEI_@S^SW5)v$!-Mx&8xNDQaCK0FZ{RZcSnxBuXC5*5>r}f
zIVJI%_P?!!*GH`Po_?d<6^Z3A7RxF#T_+kq*26UPCp!;jp?RgmCXrA2AIq?Gs}<$=
zvUyu;ZF_SvL_~f4ixXj+ugXUcvxV4Q;go)z$5-T~c~01C^5fuO>7h5Sp`W8!k_OqA
z&Jo$o^&8H>J)1J^Y*k$0XhFmy+^satY%DRDTfBS~XSt9nZ-$5JLH;6xn9R_=3u`n!
zp8q8=ylI?0`hEU%{JRuHx3%E&2om;09cKNe0t1cL$$2ApVkD#|)@nu>Hi`LGWwBnf
z6+L{X&;l#wZ8mVdDZDiAqMKauDt^a+yPoxZP9MGWy27K3<qJx~f;tWd%Vl*QHFv%K
zD%9Y!>J-w<7|9~Y`t1)Vz)9zxOllB=pHaX~%N0H<6-s;f5U=%!z&Dr$Vw8ro+$JhD
zO5YSi-$We<zrBr0mD0CeUA%39sA9CAN}ba8_bt1hu?AEsl`eswZQ-<EQ)P@4q1G<C
z7z0LP=84vC8-+S((`7|`SJpi7`fY*K`bHO6%bdW)V>nGTIF~@FSWtLo7~d)e=39<w
zTvGi&`+_)74wH_v8;H^6u5oI0P+ATLEE^!iav{9bN+rxgfFtZuD?wRIy7tp>MKiP#
zl>VdRYz2~%Nh>WS1DUr{8uJiflf2YQPy&#y{Y$u_8CnU-oY8UCt!COvOS?@*R$7|E
zQOrOvzNVH2=Y8e`emjMsTpHaR0s!Oi-Jqe3J+#j>49yNDq3AezJTP(Kr|>oAA(l^J
zC=W%~{xe+BU$atR_<*+^lux3A{m#p@sc7c_yV=F;5)=+fF45sw*T95hNHsxmp#&2h
z_gA>M3?mQ9%+PUi`QV;!{<9wRC{Qkj4k64B6Jmjxfnq?389IhHT#UH{nxko;AW-Io
z4l)iGWDySoqI|#B1^{g!lXuSYRRl`5&<}72u4slq1!XDdIPcfOJOcdd%n4|AnY0qf
zQY@{tP@aOWeIZ=Y46Otm@aZ_+>tG&%87om|dfG#sSB!*~hXC)yg>eMDccC*rC@eE9
z@_VAdiJwuVQ>Sm*L)A`4_&eGy84f@dq0=}hF*7WLDS6;*4(bWYg-*hBxwazA=t&(s
ZsB^KIF&ohMBM|byFB~XV>T^TY{{UmW*AxH%

literal 0
HcmV?d00001


From 8a8afc54fc8d7abf7bc1e82a189b0edf87b5c74e Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sun, 12 Nov 2023 23:41:04 +0100
Subject: [PATCH 0388/1097] Improve the test.

---
 utils/backupview/test/test.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/utils/backupview/test/test.py b/utils/backupview/test/test.py
index dc6a32d384d..561d3c22224 100755
--- a/utils/backupview/test/test.py
+++ b/utils/backupview/test/test.py
@@ -17,15 +17,20 @@ if backupview_dir not in sys.path:
 from backupview import open_backup, FileInfo, ExtractionInfo
 
 
-def calculate_num_files_and_total_size(dir):
+def calculate_num_files(dir):
     count = 0
+    for _, _, files in os.walk(dir, topdown=False):
+        count += len([1 for name in files if name])
+    return count
+
+
+def calculate_total_size(dir):
     total_size = 0
     for root, _, files in os.walk(dir, topdown=False):
-        for name in files:
-            if name:
-                count += 1
-                total_size += os.path.getsize(os.path.join(dir, root, name))
-    return (count, total_size)
+        total_size += sum(
+            [os.path.getsize(os.path.join(dir, root, name)) for name in files if name]
+        )
+    return total_size
 
 
 ###########################################################################################
@@ -129,12 +134,11 @@ def test_backup_1():
             data_file="/shards/1/replicas/1/data/mydb/tbl1/all_0_0_0/default_compression_codec.txt",
         )
 
-        temp_dir = tempfile.TemporaryDirectory()
-        temp_dir_path = temp_dir.name
-
-        assert b.extract_table_data(
-            table="mydb.tbl1", out=temp_dir_path
-        ) == ExtractionInfo(num_files=32, num_bytes=1728)
-
-        assert calculate_num_files_and_total_size(temp_dir_path) == (32, 1728)
-        temp_dir.cleanup()
+        with tempfile.TemporaryDirectory() as temp_dir:
+            res = b.extract_table_data(table="mydb.tbl1", out=temp_dir)
+            num_files = res.num_files
+            num_bytes = res.num_bytes
+            assert calculate_num_files(temp_dir) == num_files
+            assert calculate_total_size(temp_dir) == num_bytes
+            assert num_files == 32
+            assert num_bytes == 1728

From 6807e52d2c90a3b256fb4624a650b63e3563a669 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sun, 12 Nov 2023 23:49:25 +0100
Subject: [PATCH 0389/1097] Use S3(*args) to simplify conditions in
 __parse_location().

---
 utils/backupview/backupview.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/utils/backupview/backupview.py b/utils/backupview/backupview.py
index 111307a3d93..c9eac87f0ae 100755
--- a/utils/backupview/backupview.py
+++ b/utils/backupview/backupview.py
@@ -1016,12 +1016,8 @@ class Location:
                 )
 
         if name == "S3":
-            if len(args) == 1:
-                return S3(args[0])
-            elif len(args) == 2:
-                return S3(args[0], args[1])
-            elif len(args) == 3:
-                return S3(args[0], args[1], args[2])
+            if 1 <= len(args) and len(args) <= 3:
+                return S3(*args)
             else:
                 raise Exception(
                     f"Couldn't parse a location from '{desc}': S3(<uri> [, <access_key_id>, <secret_access_key>]) requires from 1 to 3 arguments, got {len(args)} arguments"

From 9c954046afb31527bd807b134e9592258f83d5a7 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Sun, 12 Nov 2023 23:23:56 +0000
Subject: [PATCH 0390/1097] style fix again

---
 src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
index 0fdf8356106..24f04c16029 100644
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
@@ -334,10 +334,10 @@ void DatabasePostgreSQL::removeOutdatedTables()
     {
         tryLogCurrentException(__PRETTY_FUNCTION__);
 
-        /** Avoid repeated interrupting other normal routines (they acquire locks!) 
-          * for the case of unavailable connection, since it is possible to be 
-          * unsuccessful again, and the unsuccessful conn is very time-consuming: 
-          * connection period is exclusive and timeout is at least 2 seconds for 
+        /** Avoid repeated interrupting other normal routines (they acquire locks!)
+          * for the case of unavailable connection, since it is possible to be
+          * unsuccessful again, and the unsuccessful conn is very time-consuming:
+          * connection period is exclusive and timeout is at least 2 seconds for
           * PostgreSQL.
           */
         cleaner_task->scheduleAfter(reschedule_error_multiplier * cleaner_reschedule_ms);

From 9194f77e71ba69bc290d907015b11709d6f8d8e8 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Mon, 13 Nov 2023 02:11:31 +0000
Subject: [PATCH 0391/1097] change default value of PG's conn timeout and try
 times to avoid endless wait when conn url is unavailable

---
 src/Core/PostgreSQL/PoolWithFailover.h | 2 +-
 src/Core/PostgreSQL/Utils.cpp          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h
index bf9c34e6723..bf3782afba4 100644
--- a/src/Core/PostgreSQL/PoolWithFailover.h
+++ b/src/Core/PostgreSQL/PoolWithFailover.h
@@ -14,7 +14,7 @@
 
 static constexpr inline auto POSTGRESQL_POOL_DEFAULT_SIZE = 16;
 static constexpr inline auto POSTGRESQL_POOL_WAIT_TIMEOUT = 5000;
-static constexpr inline auto POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES = 5;
+static constexpr inline auto POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES = 2;
 
 namespace postgres
 {
diff --git a/src/Core/PostgreSQL/Utils.cpp b/src/Core/PostgreSQL/Utils.cpp
index b4ad19c819a..810bf62fdab 100644
--- a/src/Core/PostgreSQL/Utils.cpp
+++ b/src/Core/PostgreSQL/Utils.cpp
@@ -16,7 +16,7 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S
         << " port=" << port
         << " user=" << DB::quote << user
         << " password=" << DB::quote << password
-        << " connect_timeout=10";
+        << " connect_timeout=2";
     return {out.str(), host + ':' + DB::toString(port)};
 }
 

From c9f5ef0b1acc8e9d768d291495928401592f4173 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 03:14:29 +0100
Subject: [PATCH 0392/1097] Fix error

---
 tests/ci/build_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index d1aedb07b5b..39a1f0eaf4c 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -462,7 +462,7 @@ FROM input('size UInt64, file String')
 FORMAT Regexp
 SETTINGS format_regexp = '^\\s*(\\d+) (.+)$'
 """
-        binary_sizes_file = temp_path / "binary_sizes.txt"
+        binary_sizes_file = profiles_dir / "binary_sizes.txt"
 
         logging.info(
             "::notice ::Log Uploading binary sizes data, path: %s, size: %s, query: %s",

From be1e061764e86a709f5bc0e87bab77bc8175149a Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Mon, 13 Nov 2023 10:31:56 +0800
Subject: [PATCH 0393/1097] Use SSL port for clickhouse-client for hostnames
 pointing to clickhouse cloud

---
 programs/client/Client.cpp          |  2 +-
 programs/local/LocalServer.cpp      |  2 +-
 src/Client/ConnectionParameters.cpp | 15 +++++++++------
 src/Client/ConnectionParameters.h   |  4 ++--
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 723707d9ec3..d29824581fa 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -425,7 +425,7 @@ void Client::connect()
     if (hosts_and_ports.empty())
     {
         String host = config().getString("host", "localhost");
-        UInt16 port = ConnectionParameters::getPortFromConfig(config());
+        UInt16 port = ConnectionParameters::getPortFromConfig(config(), host);
         hosts_and_ports.emplace_back(HostAndPort{host, port});
     }
 
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 452af7a4ec0..36020d22cc0 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -424,7 +424,7 @@ void LocalServer::setupUsers()
 
 void LocalServer::connect()
 {
-    connection_parameters = ConnectionParameters(config());
+    connection_parameters = ConnectionParameters(config(), "localhost");
     connection = LocalConnection::createConnection(
         connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name);
 }
diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp
index b8669c72cf7..1da3065f38f 100644
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@@ -27,7 +27,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
                                            std::string connection_host,
                                            std::optional<UInt16> connection_port)
     : host(connection_host)
-    , port(connection_port.value_or(getPortFromConfig(config)))
+    , port(connection_port.value_or(getPortFromConfig(config, connection_host)))
 {
     bool is_secure = config.getBool("secure", false);
     bool is_clickhouse_cloud = connection_host.ends_with(".clickhouse.cloud") || connection_host.ends_with(".clickhouse-staging.com");
@@ -113,16 +113,19 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
     timeouts.sync_request_timeout = Poco::Timespan(config.getInt("sync_request_timeout", DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC), 0);
 }
 
-ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config)
-    : ConnectionParameters(config, config.getString("host", "localhost"), getPortFromConfig(config))
+ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config,
+                                           std::string connection_host)
+    : ConnectionParameters(config, config.getString("host", "localhost"), getPortFromConfig(config, connection_host))
 {
 }
 
-UInt16 ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config)
+UInt16 ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config,
+                                               std::string connection_host)
 {
     bool is_secure = config.getBool("secure", false);
+    bool is_clickhouse_cloud = connection_host.ends_with(".clickhouse.cloud") || connection_host.ends_with(".clickhouse-staging.com");
     return config.getInt("port",
-        config.getInt(is_secure ? "tcp_port_secure" : "tcp_port",
-            is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT));
+        config.getInt(is_secure || is_clickhouse_cloud ? "tcp_port_secure" : "tcp_port",
+            is_secure || is_clickhouse_cloud ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT));
 }
 }
diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h
index 6b090d76b22..5f375f09c83 100644
--- a/src/Client/ConnectionParameters.h
+++ b/src/Client/ConnectionParameters.h
@@ -26,10 +26,10 @@ struct ConnectionParameters
     ConnectionTimeouts timeouts;
 
     ConnectionParameters() = default;
-    ConnectionParameters(const Poco::Util::AbstractConfiguration & config);
+    ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host);
     ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, std::optional<UInt16> port);
 
-    static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config);
+    static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, std::string connection_host);
 
     /// Ask to enter the user's password if password option contains this value.
     /// "\n" is used because there is hardly a chance that a user would use '\n' as password.

From 7fc3cebd91aca381690d0cbcf40a54cc734c67e6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 04:00:13 +0100
Subject: [PATCH 0394/1097] Fix error

---
 tests/ci/build_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index 39a1f0eaf4c..a9c2d9b7f8e 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -459,8 +459,8 @@ FORMAT JSONCompactEachRow"""
 )
 SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', '{instance_id}', file, size
 FROM input('size UInt64, file String')
-FORMAT Regexp
 SETTINGS format_regexp = '^\\s*(\\d+) (.+)$'
+FORMAT Regexp
 """
         binary_sizes_file = profiles_dir / "binary_sizes.txt"
 

From 04b75a3ac922139f3326dda4976bd48afbc3862b Mon Sep 17 00:00:00 2001
From: rondo_1895 <cqupt1991@163.com>
Date: Mon, 13 Nov 2023 11:37:19 +0800
Subject: [PATCH 0395/1097] Update except.md

Symbol Error
---
 docs/en/sql-reference/statements/select/except.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/select/except.md b/docs/en/sql-reference/statements/select/except.md
index cc4bb9d1c24..8ba7544d21f 100644
--- a/docs/en/sql-reference/statements/select/except.md
+++ b/docs/en/sql-reference/statements/select/except.md
@@ -103,7 +103,7 @@ INSERT INTO holdings VALUES
    ('Bitcoin', 200),
    ('Ethereum', 250),
    ('Ethereum', 5000),
-   ('DOGEFI', 10);
+   ('DOGEFI', 10),
    ('Bitcoin Diamond', 5000);
 ```
 

From f3b0550dd3ab4709ddf16c26a6a9175c08cabf74 Mon Sep 17 00:00:00 2001
From: justindeguzman <justin@justindeguzman.net>
Date: Sun, 12 Nov 2023 19:43:43 -0800
Subject: [PATCH 0396/1097] [Docs] Add details about sharding_key for
 distributed table engine

---
 docs/en/engines/table-engines/special/distributed.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index eb2979d1283..14431c4c43b 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -46,6 +46,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
 
 `sharding_key` - (optionally) sharding key
 
+Specifying the `sharding_key` is necessary for the following:
+
+- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key
+- For use with `optimize_skip_unused_shards` as the `sharding_key` is necessary to determine what shards should be queried
+
 #### policy_name
 
 `policy_name` - (optionally) policy name, it will be used to store temporary files for background send

From 943620995062ed9f45135adc7089751301d5c3ed Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 05:06:37 +0100
Subject: [PATCH 0397/1097] Fix error

---
 tests/ci/build_check.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index a9c2d9b7f8e..3a20ca846a1 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -460,8 +460,8 @@ FORMAT JSONCompactEachRow"""
 SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', '{instance_id}', file, size
 FROM input('size UInt64, file String')
 SETTINGS format_regexp = '^\\s*(\\d+) (.+)$'
-FORMAT Regexp
-"""
+FORMAT Regexp"""
+
         binary_sizes_file = profiles_dir / "binary_sizes.txt"
 
         logging.info(

From 194c696d56d8069ad48e414ec97b4f93bd696fd7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 06:25:39 +0100
Subject: [PATCH 0398/1097] Remove garbage from libssh

---
 contrib/libssh-cmake/CMakeLists.txt       | 12 ------
 contrib/libssh-cmake/IncludeSources.cmake | 52 +++--------------------
 2 files changed, 5 insertions(+), 59 deletions(-)

diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt
index 1e58a856119..7a3816d4dce 100644
--- a/contrib/libssh-cmake/CMakeLists.txt
+++ b/contrib/libssh-cmake/CMakeLists.txt
@@ -7,12 +7,6 @@ endif()
 
 set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh")
 set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh")
-# Specify search path for CMake modules to be loaded by include()
-# and find_package()
-list(APPEND CMAKE_MODULE_PATH "${LIB_SOURCE_DIR}/cmake/Modules")
-
-include(DefineCMakeDefaults)
-include(DefineCompilerFlags)
 
 project(libssh VERSION 0.9.7 LANGUAGES C)
 
@@ -29,12 +23,6 @@ set(APPLICATION_NAME ${PROJECT_NAME})
 set(LIBRARY_VERSION "4.8.7")
 set(LIBRARY_SOVERSION "4")
 
-# where to look first for cmake modules, before ${CMAKE_ROOT}/Modules/ is checked
-
-# add definitions
-
-include(DefinePlatformDefaults)
-
 # Copy library files to a lib sub-directory
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIB_BINARY_DIR}/lib")
 
diff --git a/contrib/libssh-cmake/IncludeSources.cmake b/contrib/libssh-cmake/IncludeSources.cmake
index d72cf11da1f..30348d5d7dd 100644
--- a/contrib/libssh-cmake/IncludeSources.cmake
+++ b/contrib/libssh-cmake/IncludeSources.cmake
@@ -1,20 +1,8 @@
-set(LIBSSH_LINK_LIBRARIES
-  ${LIBSSH_REQUIRED_LIBRARIES}
-)
-
-
 set(LIBSSH_LINK_LIBRARIES
   ${LIBSSH_LINK_LIBRARIES}
   OpenSSL::Crypto
 )
 
-if (MINGW AND Threads_FOUND)
-  set(LIBSSH_LINK_LIBRARIES
-    ${LIBSSH_LINK_LIBRARIES}
-    Threads::Threads
-  )
-endif()
-
 set(libssh_SRCS
   ${LIB_SOURCE_DIR}/src/agent.c
   ${LIB_SOURCE_DIR}/src/auth.c
@@ -66,30 +54,11 @@ set(libssh_SRCS
   ${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
 )
 
-if (DEFAULT_C_NO_DEPRECATION_FLAGS)
-    set_source_files_properties(known_hosts.c
-                                PROPERTIES
-                                    COMPILE_FLAGS ${DEFAULT_C_NO_DEPRECATION_FLAGS})
-endif()
-
-if (CMAKE_USE_PTHREADS_INIT)
-    set(libssh_SRCS
-        ${libssh_SRCS}
-        ${LIB_SOURCE_DIR}/src/threads/noop.c
-        ${LIB_SOURCE_DIR}/src/threads/pthread.c
-    )
-elseif (CMAKE_USE_WIN32_THREADS_INIT)
-        set(libssh_SRCS
-            ${libssh_SRCS}
-            ${LIB_SOURCE_DIR}/src/threads/noop.c
-            ${LIB_SOURCE_DIR}/src/threads/winlocks.c
-        )
-else()
-    set(libssh_SRCS
-        ${libssh_SRCS}
-        ${LIB_SOURCE_DIR}/src/threads/noop.c
-    )
-endif()
+set(libssh_SRCS
+    ${libssh_SRCS}
+    ${LIB_SOURCE_DIR}/src/threads/noop.c
+    ${LIB_SOURCE_DIR}/src/threads/pthread.c
+)
 
 # LIBCRYPT specific
 set(libssh_SRCS
@@ -127,14 +96,3 @@ target_compile_options(_ssh
                      PRIVATE
                         ${DEFAULT_C_COMPILE_FLAGS}
                         -D_GNU_SOURCE)
-
-
-set_target_properties(_ssh
-    PROPERTIES
-      VERSION
-        ${LIBRARY_VERSION}
-      SOVERSION
-        ${LIBRARY_SOVERSION}
-      DEFINE_SYMBOL
-        LIBSSH_EXPORTS
-)

From c4de4045f15df2a835d7fb73793d75e3216e62bf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 06:29:45 +0100
Subject: [PATCH 0399/1097] Fix error

---
 .../AggregateFunctionSumMap.cpp               | 62 ++++++++++++++-----
 1 file changed, 46 insertions(+), 16 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index 6a2f4209e60..04bc908396a 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -222,18 +222,6 @@ public:
                 if (!keepKey(key))
                     continue;
 
-                /// Compatibility with previous versions.
-                if (value.getType() == Field::Types::Decimal32)
-                {
-                    auto source = value.get<DecimalField<Decimal32>>();
-                    value = DecimalField<Decimal128>(source.getValue(), source.getScale());
-                }
-                else if (value.getType() == Field::Types::Decimal64)
-                {
-                    auto source = value.get<DecimalField<Decimal64>>();
-                    value = DecimalField<Decimal128>(source.getValue(), source.getScale());
-                }
-
                 auto [it, inserted] = merged_maps.emplace(key, Array());
 
                 if (inserted)
@@ -288,12 +276,32 @@ public:
         {
             case 0:
             {
-                serialize = [&](size_t col_idx, const Array & values){ values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {}); };
+                serialize = [&](size_t col_idx, const Array & values)
+                {
+                    values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {});
+                };
                 break;
             }
             case 1:
             {
-                serialize = [&](size_t col_idx, const Array & values){ promoted_values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {}); };
+                serialize = [&](size_t col_idx, const Array & values)
+                {
+                    Field value = values[col_idx];
+
+                    /// Compatibility with previous versions.
+                    if (value.getType() == Field::Types::Decimal32)
+                    {
+                        auto source = value.get<DecimalField<Decimal32>>();
+                        value = DecimalField<Decimal128>(source.getValue(), source.getScale());
+                    }
+                    else if (value.getType() == Field::Types::Decimal64)
+                    {
+                        auto source = value.get<DecimalField<Decimal64>>();
+                        value = DecimalField<Decimal128>(source.getValue(), source.getScale());
+                    }
+
+                    promoted_values_serializations[col_idx]->serializeBinary(value, buf, {});
+                };
                 break;
             }
             default:
@@ -322,12 +330,34 @@ public:
         {
             case 0:
             {
-                deserialize = [&](size_t col_idx, Array & values){ values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {}); };
+                deserialize = [&](size_t col_idx, Array & values)
+                {
+                    values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {});
+                };
                 break;
             }
             case 1:
             {
-                deserialize = [&](size_t col_idx, Array & values){ promoted_values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {}); };
+                deserialize = [&](size_t col_idx, Array & values)
+                {
+                    Field & value = values[col_idx];
+                    promoted_values_serializations[col_idx]->deserializeBinary(value, buf, {});
+
+                    /// Compatibility with previous versions.
+                    if (value.getType() == Field::Types::Decimal128)
+                    {
+                        auto source = value.get<DecimalField<Decimal128>>();
+                        WhichDataType value_type(values_types[col_idx]);
+                        if (value_type.isDecimal32())
+                        {
+                            value = DecimalField<Decimal32>(source.getValue(), source.getScale());
+                        }
+                        else if (value_type.isDecimal64())
+                        {
+                            value = DecimalField<Decimal64>(source.getValue(), source.getScale());
+                        }
+                    }
+                };
                 break;
             }
             default:

From 5b3742221ac5e1321ba52e5614988c944405f7b2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 06:25:39 +0100
Subject: [PATCH 0400/1097] Remove garbage from libssh

---
 contrib/libssh-cmake/CMakeLists.txt       | 12 ------
 contrib/libssh-cmake/IncludeSources.cmake | 52 +++--------------------
 2 files changed, 5 insertions(+), 59 deletions(-)

diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt
index 1e58a856119..7a3816d4dce 100644
--- a/contrib/libssh-cmake/CMakeLists.txt
+++ b/contrib/libssh-cmake/CMakeLists.txt
@@ -7,12 +7,6 @@ endif()
 
 set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh")
 set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh")
-# Specify search path for CMake modules to be loaded by include()
-# and find_package()
-list(APPEND CMAKE_MODULE_PATH "${LIB_SOURCE_DIR}/cmake/Modules")
-
-include(DefineCMakeDefaults)
-include(DefineCompilerFlags)
 
 project(libssh VERSION 0.9.7 LANGUAGES C)
 
@@ -29,12 +23,6 @@ set(APPLICATION_NAME ${PROJECT_NAME})
 set(LIBRARY_VERSION "4.8.7")
 set(LIBRARY_SOVERSION "4")
 
-# where to look first for cmake modules, before ${CMAKE_ROOT}/Modules/ is checked
-
-# add definitions
-
-include(DefinePlatformDefaults)
-
 # Copy library files to a lib sub-directory
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIB_BINARY_DIR}/lib")
 
diff --git a/contrib/libssh-cmake/IncludeSources.cmake b/contrib/libssh-cmake/IncludeSources.cmake
index d72cf11da1f..30348d5d7dd 100644
--- a/contrib/libssh-cmake/IncludeSources.cmake
+++ b/contrib/libssh-cmake/IncludeSources.cmake
@@ -1,20 +1,8 @@
-set(LIBSSH_LINK_LIBRARIES
-  ${LIBSSH_REQUIRED_LIBRARIES}
-)
-
-
 set(LIBSSH_LINK_LIBRARIES
   ${LIBSSH_LINK_LIBRARIES}
   OpenSSL::Crypto
 )
 
-if (MINGW AND Threads_FOUND)
-  set(LIBSSH_LINK_LIBRARIES
-    ${LIBSSH_LINK_LIBRARIES}
-    Threads::Threads
-  )
-endif()
-
 set(libssh_SRCS
   ${LIB_SOURCE_DIR}/src/agent.c
   ${LIB_SOURCE_DIR}/src/auth.c
@@ -66,30 +54,11 @@ set(libssh_SRCS
   ${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
 )
 
-if (DEFAULT_C_NO_DEPRECATION_FLAGS)
-    set_source_files_properties(known_hosts.c
-                                PROPERTIES
-                                    COMPILE_FLAGS ${DEFAULT_C_NO_DEPRECATION_FLAGS})
-endif()
-
-if (CMAKE_USE_PTHREADS_INIT)
-    set(libssh_SRCS
-        ${libssh_SRCS}
-        ${LIB_SOURCE_DIR}/src/threads/noop.c
-        ${LIB_SOURCE_DIR}/src/threads/pthread.c
-    )
-elseif (CMAKE_USE_WIN32_THREADS_INIT)
-        set(libssh_SRCS
-            ${libssh_SRCS}
-            ${LIB_SOURCE_DIR}/src/threads/noop.c
-            ${LIB_SOURCE_DIR}/src/threads/winlocks.c
-        )
-else()
-    set(libssh_SRCS
-        ${libssh_SRCS}
-        ${LIB_SOURCE_DIR}/src/threads/noop.c
-    )
-endif()
+set(libssh_SRCS
+    ${libssh_SRCS}
+    ${LIB_SOURCE_DIR}/src/threads/noop.c
+    ${LIB_SOURCE_DIR}/src/threads/pthread.c
+)
 
 # LIBCRYPT specific
 set(libssh_SRCS
@@ -127,14 +96,3 @@ target_compile_options(_ssh
                      PRIVATE
                         ${DEFAULT_C_COMPILE_FLAGS}
                         -D_GNU_SOURCE)
-
-
-set_target_properties(_ssh
-    PROPERTIES
-      VERSION
-        ${LIBRARY_VERSION}
-      SOVERSION
-        ${LIBRARY_SOVERSION}
-      DEFINE_SYMBOL
-        LIBSSH_EXPORTS
-)

From 33a33f5c0a845c1a7b04efc51d3dc3967a499746 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 07:06:52 +0100
Subject: [PATCH 0401/1097] Delete a file

---
 contrib/arrow-cmake/build/cmake/config.h.in | 1 -
 1 file changed, 1 deletion(-)
 delete mode 120000 contrib/arrow-cmake/build/cmake/config.h.in

diff --git a/contrib/arrow-cmake/build/cmake/config.h.in b/contrib/arrow-cmake/build/cmake/config.h.in
deleted file mode 120000
index eb28c214748..00000000000
--- a/contrib/arrow-cmake/build/cmake/config.h.in
+++ /dev/null
@@ -1 +0,0 @@
-../../../thrift/build/cmake/config.h.in
\ No newline at end of file

From 3807dcc469b78d474607c8cb9c2b4e78213368ca Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 07:08:21 +0100
Subject: [PATCH 0402/1097] Delete a file (2)

---
 .../configs/users.d/0a_users_no_default_access.xml       | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml

diff --git a/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml b/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml
deleted file mode 100644
index b8f38f04ca9..00000000000
--- a/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-<clickhouse>
-    <users>
-        <default>
-            <password></password>
-            <profile>default</profile>
-            <quota>default</quota>
-        </default>
-    </users>
-</clickhouse>

From 36d6d5b71fc6bda79fd0553510de04aa8d3ae77c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 07:24:07 +0100
Subject: [PATCH 0403/1097] Remove some entries from analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index bbb9abda079..6021e7ccf38 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -17,13 +17,11 @@
 01584_distributed_buffer_cannot_find_column
 01586_columns_pruning
 01624_soft_constraints
-01651_bugs_from_15889
 01656_test_query_log_factories_info
 01739_index_hint
 02880_indexHint__partition_id
 01747_join_view_filter_dictionary
 01761_cast_to_enum_nullable
-01786_explain_merge_tree
 01925_join_materialized_columns
 01925_test_storage_merge_aliases
 01947_mv_subquery
@@ -34,26 +32,20 @@
 02345_implicit_transaction
 02352_grouby_shadows_arg
 02354_annoy
-02402_merge_engine_with_view
 02428_parameterized_view
 02458_use_structure_from_insertion_table
 02479_race_condition_between_insert_and_droppin_mv
 02493_inconsistent_hex_and_binary_number
 02554_fix_grouping_sets_predicate_push_down
 02575_merge_prewhere_different_default_kind
-02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
 01009_global_array_join_names
 00917_multiple_joins_denny_crane
 00636_partition_key_parts_pruning
 01825_type_json_multiple_files
-00002_log_and_exception_messages_formatting
 02725_agg_projection_resprect_PK
-02703_row_policy_for_database
 02721_url_cluster
 02534_s3_cluster_insert_select_schema_inference
 02765_parallel_replicas_final_modifier
 02784_parallel_replicas_automatic_decision_join
 02818_parameterized_view_with_cte_multiple_usage
-01940_custom_tld_sharding_key
 02815_range_dict_no_direct_join
-02845_threads_count_in_distributed_queries

From 9a7f5ab0a1e265598b7db147d3be95c398ecbb99 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 08:54:24 +0100
Subject: [PATCH 0404/1097] Better use of build cache

---
 src/CMakeLists.txt                            |  1 -
 src/Client/Connection.cpp                     |  2 +-
 src/Common/config_version.cpp.in              | 14 +++++++++++
 src/Common/config_version.h                   | 23 ++++++++++++++++++
 src/Common/config_version.h.in                | 24 -------------------
 src/Daemon/BaseDaemon.cpp                     |  6 +----
 src/Interpreters/ClientInfo.cpp               |  2 +-
 src/Processors/Formats/Impl/Parquet/Write.cpp |  2 +-
 8 files changed, 41 insertions(+), 33 deletions(-)
 create mode 100644 src/Common/config_version.h
 delete mode 100644 src/Common/config_version.h.in

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0257b7d329b..bac06a2f108 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,7 +18,6 @@ include (../cmake/version.cmake)
 message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSION_OFFICIAL}")
 include (configure_config.cmake)
 configure_file (Common/config.h.in ${CONFIG_INCLUDE_PATH}/config.h)
-configure_file (Common/config_version.h.in ${CONFIG_INCLUDE_PATH}/config_version.h)
 configure_file (Common/config_version.cpp.in ${CONFIG_INCLUDE_PATH}/config_version.cpp)
 
 if (USE_DEBUG_HELPERS)
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index cf25ee6e856..7af9c99f50c 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -296,7 +296,7 @@ void Connection::sendHello()
                         "Parameters 'default_database', 'user' and 'password' must not contain ASCII control characters");
 
     writeVarUInt(Protocol::Client::Hello, *out);
-    writeStringBinary((VERSION_NAME " ") + client_name, *out);
+    writeStringBinary(std::string(VERSION_NAME) + " " + client_name, *out);
     writeVarUInt(VERSION_MAJOR, *out);
     writeVarUInt(VERSION_MINOR, *out);
     // NOTE For backward compatibility of the protocol, client cannot send its version_patch.
diff --git a/src/Common/config_version.cpp.in b/src/Common/config_version.cpp.in
index f31e82bf582..26ae0f0f4d7 100644
--- a/src/Common/config_version.cpp.in
+++ b/src/Common/config_version.cpp.in
@@ -1,3 +1,17 @@
 /// This file was autogenerated by CMake
 
+#include "config_version.h"
+
+const unsigned VERSION_REVISION = @VERSION_REVISION@;
+const char * VERSION_NAME = "@VERSION_NAME@";
+const unsigned VERSION_MAJOR = @VERSION_MAJOR@;
+const unsigned VERSION_MINOR = @VERSION_MINOR@;
+const unsigned VERSION_PATCH = @VERSION_PATCH@;
+const char * VERSION_STRING = "@VERSION_STRING@";
+const char * VERSION_STRING_SHORT = "@VERSION_STRING_SHORT@";
+const char * VERSION_OFFICIAL = "@VERSION_OFFICIAL@";
+const char * VERSION_FULL = "@VERSION_FULL@";
+const char * VERSION_DESCRIBE = "@VERSION_DESCRIBE@";
+const unsigned VERSION_INTEGER = @VERSION_INTEGER@;
+
 const char * VERSION_GITHASH = "@VERSION_GITHASH@";
diff --git a/src/Common/config_version.h b/src/Common/config_version.h
new file mode 100644
index 00000000000..b1e0ea67a68
--- /dev/null
+++ b/src/Common/config_version.h
@@ -0,0 +1,23 @@
+/// This file was autogenerated by CMake
+
+#pragma once
+
+/// These fields are changing only on every release, but we still don't want to have them in the header file,
+/// because it will make build cache less efficient.
+
+// NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
+// only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
+extern const unsigned VERSION_REVISION;
+extern const char * VERSION_NAME;
+extern const unsigned VERSION_MAJOR;
+extern const unsigned VERSION_MINOR;
+extern const unsigned VERSION_PATCH;
+extern const char * VERSION_STRING;
+extern const char * VERSION_STRING_SHORT;
+extern const char * VERSION_OFFICIAL;
+extern const char * VERSION_FULL;
+extern const char * VERSION_DESCRIBE;
+extern const unsigned VERSION_INTEGER;
+
+/// These fields are frequently changing and we don't want to have them in the header file to allow caching.
+extern const char * VERSION_GITHASH;
diff --git a/src/Common/config_version.h.in b/src/Common/config_version.h.in
deleted file mode 100644
index aa8ddeeb860..00000000000
--- a/src/Common/config_version.h.in
+++ /dev/null
@@ -1,24 +0,0 @@
-/// This file was autogenerated by CMake
-
-#pragma once
-
-// NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
-// only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-#cmakedefine VERSION_REVISION @VERSION_REVISION@
-#cmakedefine VERSION_NAME "@VERSION_NAME@"
-#cmakedefine VERSION_MAJOR @VERSION_MAJOR@
-#cmakedefine VERSION_MINOR @VERSION_MINOR@
-#cmakedefine VERSION_PATCH @VERSION_PATCH@
-#cmakedefine VERSION_STRING "@VERSION_STRING@"
-#cmakedefine VERSION_STRING_SHORT "@VERSION_STRING_SHORT@"
-#cmakedefine VERSION_OFFICIAL "@VERSION_OFFICIAL@"
-#cmakedefine VERSION_FULL "@VERSION_FULL@"
-#cmakedefine VERSION_DESCRIBE "@VERSION_DESCRIBE@"
-#cmakedefine VERSION_INTEGER @VERSION_INTEGER@
-
-/// These fields are frequently changing and we don't want to have them in the header file to allow caching.
-extern const char * VERSION_GITHASH;
-
-#if !defined(VERSION_OFFICIAL)
-#   define VERSION_OFFICIAL ""
-#endif
diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index f9e402e51ee..dba4b13835d 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -485,10 +485,8 @@ private:
         {
             SentryWriter::onFault(sig, error_message, stack_trace);
 
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunreachable-code"
             /// Advice the user to send it manually.
-            if constexpr (std::string_view(VERSION_OFFICIAL).contains("official build"))
+            if (std::string_view(VERSION_OFFICIAL).contains("official build"))
             {
                 const auto & date_lut = DateLUT::instance();
 
@@ -506,8 +504,6 @@ private:
             {
                 LOG_FATAL(log, "This ClickHouse version is not official and should be upgraded to the official build.");
             }
-#pragma clang diagnostic pop
-
         }
 
         /// ClickHouse Keeper does not link to some part of Settings.
diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp
index 92ef5a0d159..b478daabc08 100644
--- a/src/Interpreters/ClientInfo.cpp
+++ b/src/Interpreters/ClientInfo.cpp
@@ -196,7 +196,7 @@ void ClientInfo::setInitialQuery()
     if (client_name.empty())
         client_name = VERSION_NAME;
     else
-        client_name = (VERSION_NAME " ") + client_name;
+        client_name = std::string(VERSION_NAME) + " " + client_name;
 }
 
 bool ClientInfo::clientVersionEquals(const ClientInfo & other, bool compare_patch) const
diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp
index 340ed5f510c..92d9df9b391 100644
--- a/src/Processors/Formats/Impl/Parquet/Write.cpp
+++ b/src/Processors/Formats/Impl/Parquet/Write.cpp
@@ -916,7 +916,7 @@ void writeFileFooter(std::vector<parq::RowGroup> row_groups, SchemaElements sche
     meta.row_groups = std::move(row_groups);
     for (auto & r : meta.row_groups)
         meta.num_rows += r.num_rows;
-    meta.__set_created_by(VERSION_NAME " " VERSION_DESCRIBE);
+    meta.__set_created_by(std::string(VERSION_NAME) + " " + VERSION_DESCRIBE);
 
     if (options.write_page_statistics || options.write_column_chunk_statistics)
     {

From a05091be04b179b3a046caee8a66260db3ffb901 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 09:06:18 +0100
Subject: [PATCH 0405/1097] Miscellaneous

---
 base/base/wide_integer_impl.h           | 10 +++++-----
 src/Common/Exception.h                  | 10 +++++-----
 src/Common/LoggingFormatStringHelpers.h |  8 ++++----
 src/Common/formatIPv6.h                 |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h
index fc4e9e551ca..8d1da7de642 100644
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@@ -65,7 +65,7 @@ class IsTupleLike
     static void check(...);
 
 public:
-    static constexpr const bool value = !std::is_void<decltype(check<T>(nullptr))>::value;
+    static constexpr const bool value = !std::is_void_v<decltype(check<T>(nullptr))>;
 };
 
 }
@@ -79,7 +79,7 @@ class numeric_limits<wide::integer<Bits, Signed>>
 {
 public:
     static constexpr bool is_specialized = true;
-    static constexpr bool is_signed = is_same<Signed, signed>::value;
+    static constexpr bool is_signed = is_same_v<Signed, signed>;
     static constexpr bool is_integer = true;
     static constexpr bool is_exact = true;
     static constexpr bool has_infinity = false;
@@ -91,7 +91,7 @@ public:
     static constexpr bool is_iec559 = false;
     static constexpr bool is_bounded = true;
     static constexpr bool is_modulo = true;
-    static constexpr int digits = Bits - (is_same<Signed, signed>::value ? 1 : 0);
+    static constexpr int digits = Bits - (is_same_v<Signed, signed> ? 1 : 0);
     static constexpr int digits10 = digits * 0.30103 /*std::log10(2)*/;
     static constexpr int max_digits10 = 0;
     static constexpr int radix = 2;
@@ -104,7 +104,7 @@ public:
 
     static constexpr wide::integer<Bits, Signed> min() noexcept
     {
-        if (is_same<Signed, signed>::value)
+        if (is_same_v<Signed, signed>)
         {
             using T = wide::integer<Bits, signed>;
             T res{};
@@ -118,7 +118,7 @@ public:
     {
         using T = wide::integer<Bits, Signed>;
         T res{};
-        res.items[T::_impl::big(0)] = is_same<Signed, signed>::value
+        res.items[T::_impl::big(0)] = is_same_v<Signed, signed>
             ? std::numeric_limits<typename wide::integer<Bits, Signed>::signed_base_type>::max()
             : std::numeric_limits<typename wide::integer<Bits, Signed>::base_type>::max();
         for (unsigned i = 1; i < wide::integer<Bits, Signed>::_impl::item_count; ++i)
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index a7ffa8adcd0..ac116f5ceca 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -73,8 +73,8 @@ protected:
     struct MessageMasked
     {
         std::string msg;
-        MessageMasked(const std::string & msg_);
-        MessageMasked(std::string && msg_);
+        explicit MessageMasked(const std::string & msg_);
+        explicit MessageMasked(std::string && msg_);
     };
 
     Exception(const MessageMasked & msg_masked, int code, bool remote_);
@@ -123,7 +123,7 @@ public:
     Exception(CreateFromSTDTag, const std::exception & exc);
 
     Exception * clone() const override { return new Exception(*this); }
-    void rethrow() const override { throw *this; }
+    void rethrow() const override { throw *this; } // NOLINT
     const char * name() const noexcept override { return "DB::Exception"; }
     const char * what() const noexcept override { return message().data(); }
 
@@ -181,7 +181,7 @@ public:
         : Exception(msg, code), saved_errno(saved_errno_), path(path_) {}
 
     ErrnoException * clone() const override { return new ErrnoException(*this); }
-    void rethrow() const override { throw *this; }
+    void rethrow() const override { throw *this; } // NOLINT
 
     int getErrno() const { return saved_errno; }
     std::optional<std::string> getPath() const { return path; }
@@ -219,7 +219,7 @@ public:
     void setFileName(const String & file_name_) { file_name = file_name_; }
 
     Exception * clone() const override { return new ParsingException(*this); }
-    void rethrow() const override { throw *this; }
+    void rethrow() const override { throw *this; } // NOLINT
 
 private:
     ssize_t line_number{-1};
diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h
index 06320509c17..ef7ec0c6144 100644
--- a/src/Common/LoggingFormatStringHelpers.h
+++ b/src/Common/LoggingFormatStringHelpers.h
@@ -106,8 +106,8 @@ template <typename T> constexpr std::string_view tryGetStaticFormatString(T && x
             /// Most likely it was a string literal.
             /// Unfortunately, there's no good way to check if something is a string literal.
             /// But fmtlib requires a format string to be compile-time constant unless fmt::runtime is used.
-            static_assert(std::is_nothrow_convertible<T, const char * const>::value);
-            static_assert(!std::is_pointer<T>::value);
+            static_assert(std::is_nothrow_convertible_v<T, const char * const>);
+            static_assert(!std::is_pointer_v<T>);
             return std::string_view(x);
         }
     }
@@ -127,8 +127,8 @@ template<> struct ConstexprIfsAreNotIfdefs<true>
     {
         /// See tryGetStaticFormatString(...)
         static_assert(!std::is_same_v<std::string, std::decay_t<T>>);
-        static_assert(std::is_nothrow_convertible<T, const char * const>::value);
-        static_assert(!std::is_pointer<T>::value);
+        static_assert(std::is_nothrow_convertible_v<T, const char * const>);
+        static_assert(!std::is_pointer_v<T>);
         return std::string_view(x);
     }
 
diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h
index be4dfc7391e..f2a9ee960f3 100644
--- a/src/Common/formatIPv6.h
+++ b/src/Common/formatIPv6.h
@@ -45,7 +45,7 @@ void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_byte
  * @return            - true if parsed successfully, false otherwise.
  */
 template <typename T, typename EOFfunction>
-requires (std::is_same<typename std::remove_cv<T>::type, char>::value)
+requires (std::is_same_v<std::remove_cv_t<T>, char>)
 inline bool parseIPv4(T * &src, EOFfunction eof, unsigned char * dst, int32_t first_octet = -1)
 {
     if (src == nullptr || first_octet > 255)

From 6add71bd414340532a0769b2c16a1ac98aba3d5a Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Sun, 12 Nov 2023 17:49:19 +0100
Subject: [PATCH 0406/1097] Continue with work from #56621

---
 .../02912_ingestion_mv_deduplication.reference  |  2 +-
 .../02912_ingestion_mv_deduplication.sql        | 17 ++++++++---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference
index 946897a4fe3..335b55f05c8 100644
--- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference
+++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference
@@ -1,4 +1,4 @@
--- Original issue with max_insert_delayed_streams_for_parallel_write = 1
+-- Original issue with max_insert_delayed_streams_for_parallel_write <= 1
 -- Landing
 2022-09-01 12:23:34	42
 2023-09-01 12:23:34	42
diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql
index 68901b67c91..f206f0d7775 100644
--- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql
+++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql
@@ -1,7 +1,7 @@
--- Tags: replica
+-- Tags: zookeeper
 SET session_timezone = 'UTC';
 
-SELECT '-- Original issue with max_insert_delayed_streams_for_parallel_write = 1';
+SELECT '-- Original issue with max_insert_delayed_streams_for_parallel_write <= 1';
 /*
 
     This is the expected behavior when mv deduplication is set to false.
@@ -11,7 +11,7 @@ SELECT '-- Original issue with max_insert_delayed_streams_for_parallel_write = 1
     - 2nd insert gets both blocks inserted in mv table
 
 */
-SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1;
+SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 0;
 
 CREATE TABLE landing
 (
@@ -48,7 +48,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view
     This is the unexpected behavior due to setting max_insert_delayed_streams_for_parallel_write > 1.
 
     This unexpected behavior was present since version 21.9 or earlier but due to this PR https://github.com/ClickHouse/ClickHouse/pull/34780
-    when max_insert_delayed_streams_for_parallel_write setting it to 1 by default the issue was mitigated.
+    when max_insert_delayed_streams_for_parallel_write gets disabled by default the issue was mitigated.
 
     This is what happens:
 
@@ -57,7 +57,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view
     - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded
 
 */
-SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 10;
+SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000;
 
 CREATE TABLE landing
 (
@@ -85,14 +85,13 @@ SELECT * FROM landing FINAL ORDER BY time;
 SELECT '-- MV';
 SELECT * FROM mv FINAL ORDER BY hour;
 
-SET max_insert_delayed_streams_for_parallel_write = 1;
 DROP TABLE IF EXISTS landing SYNC;
 DROP TABLE IF EXISTS mv SYNC;
 
 SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1';
 /*
 
-    By setting  deduplicate_blocks_in_dependent_materialized_views = 1 we can make the code go through a different path getting an expected
+    By setting deduplicate_blocks_in_dependent_materialized_views = 1 we can make the code go through a different path getting an expected
     behavior again, even with max_insert_delayed_streams_for_parallel_write > 1.
 
     This is what happens now:
@@ -101,7 +100,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view
     - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables
 
 */
-SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 10;
+SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000;
 
 CREATE TABLE landing
 (
@@ -129,7 +128,6 @@ SELECT * FROM landing FINAL ORDER BY time;
 SELECT '-- MV';
 SELECT * FROM mv FINAL ORDER BY hour;
 
-SET max_insert_delayed_streams_for_parallel_write = 1;
 DROP TABLE IF EXISTS landing SYNC;
 DROP TABLE IF EXISTS mv SYNC;
 
@@ -142,6 +140,7 @@ SELECT '-- Regression introduced in https://github.com/ClickHouse/ClickHouse/pul
     max_insert_delayed_streams_for_parallel_write > 1 but it ended up adding a new regression.
 
 */
+SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 0;
 
 CREATE TABLE landing
 (

From f12b95df7d648e1f442f71f0d2bcd397b1a6897e Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 13 Nov 2023 00:26:06 +0100
Subject: [PATCH 0407/1097] Add documentation.

---
 docs/en/operations/utilities/backupview.md    | 48 +++++++++++++++++++
 docs/en/operations/utilities/index.md         |  1 +
 docs/ru/operations/utilities/backupview.md    | 48 +++++++++++++++++++
 docs/ru/operations/utilities/index.md         |  1 +
 .../aspell-ignore/en/aspell-dict.txt          |  1 +
 5 files changed, 99 insertions(+)
 create mode 100644 docs/en/operations/utilities/backupview.md
 create mode 100644 docs/ru/operations/utilities/backupview.md

diff --git a/docs/en/operations/utilities/backupview.md b/docs/en/operations/utilities/backupview.md
new file mode 100644
index 00000000000..773eb466711
--- /dev/null
+++ b/docs/en/operations/utilities/backupview.md
@@ -0,0 +1,48 @@
+---
+slug: /en/operations/utilities/backupview
+title: backupview
+---
+
+Python module to help analyzing backups made by the [BACKUP command](https://clickhouse.com/docs/en/operations/backup)
+The main motivation was to allows getting some information from a backup without actually restoring it.
+
+This module provides functions to
+- enumerate files contained in a backup
+- read files from a backup
+- get useful information in readable form about databases, tables, parts contained in a backup
+- check integrity of a backup
+
+## Example:
+
+```python
+from backupview import open_backup, FileInfo, S3
+
+# Open a backup. We could also use a local path:
+# backup = open_backup("/backups/my_backup_1/")
+backup = open_backup(S3("uri", "access_key_id", "secret_access_key"))
+
+# Get a list of databasess inside the backup.
+print(backup.get_databases()))
+
+# Get a list of tables inside the backup,
+# and for each table its create query and a list of parts and partitions.
+for db in backup.get_databases():
+    for tbl in backup.get_tables(database=db):
+        print(backup.get_create_query(database=db, table=tbl))
+        print(backup.get_partitions(database=db, table=tbl))
+        print(backup.get_parts(database=db, table=tbl))
+
+# Extract everything from the backup.
+backup.extract_all(table="mydb.mytable", out='/tmp/my_backup_1/all/')
+
+# Extract the data of a specific table.
+backup.extract_table_data(table="mydb.mytable", out='/tmp/my_backup_1/mytable/')
+
+# Extract a single partition.
+backup.extract_table_data(table="mydb.mytable", partition="202201", out='/tmp/my_backup_1/202201/')
+
+# Extract a single part.
+backup.extract_table_data(table="mydb.mytable", part="202201_100_200_3", out='/tmp/my_backup_1/202201_100_200_3/')
+```
+
+For more examples see the [test](https://github.com/ClickHouse/ClickHouse/blob/master/utils/backupview/test/test.py).
diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md
index 5667f99b6fa..15136f9dfb8 100644
--- a/docs/en/operations/utilities/index.md
+++ b/docs/en/operations/utilities/index.md
@@ -16,3 +16,4 @@ pagination_next: 'en/operations/utilities/clickhouse-copier'
 - [clickhouse-disks](../../operations/utilities/clickhouse-disks.md) -- Provides filesystem-like operations
  on files among different ClickHouse disks.
 - [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — A proxy server for ODBC driver.
+- [backupview](../../operations/utilities/backupview.md) — A python module to analyze ClickHouse backups.
diff --git a/docs/ru/operations/utilities/backupview.md b/docs/ru/operations/utilities/backupview.md
new file mode 100644
index 00000000000..e64a75853e3
--- /dev/null
+++ b/docs/ru/operations/utilities/backupview.md
@@ -0,0 +1,48 @@
+---
+slug: /en/operations/utilities/backupview
+title: backupview
+---
+
+Модуль на Питоне для анализа бэкапов, созданных командой [BACKUP](https://clickhouse.com/docs/ru/operations/backup)
+Главная идея этого модуля была в том, чтобы позволить извлечение информации из бэкапа без выполнения команды RESTORE.
+
+Этот модуль содержит функции для
+- получения списка файлов внутри бэкапа
+- чтения файлов из бэкапа
+- получения информации в читаемом виде о базах данных, таблицах, партах, содержащихся в бэкапе
+- проверки целостности бэкапа
+
+## Пример:
+
+```python
+from backupview import open_backup, S3
+
+# Открыть бэкап. Можно также использовать локальный путь:
+# backup = open_backup("/backups/my_backup_1/")
+backup = open_backup(S3("uri", "access_key_id", "secret_access_key"))
+
+# Получить список баз данных внутри бэкапа.
+print(backup.get_databases()))
+
+# Получить список таблиц внутри бэкапа,
+# и для каждой таблицы получить ее определение а также список партов и партиций.
+for db in backup.get_databases():
+    for tbl in backup.get_tables(database=db):
+        print(backup.get_create_query(database=db, table=tbl))
+        print(backup.get_partitions(database=db, table=tbl))
+        print(backup.get_parts(database=db, table=tbl))
+
+# Извлечь все содержимое бэкапа.
+backup.extract_all(table="mydb.mytable", out='/tmp/my_backup_1/all/')
+
+# Извлечь данные конкретной таблицы.
+backup.extract_table_data(table="mydb.mytable", out='/tmp/my_backup_1/mytable/')
+
+# Извлечь одну партицию из бэкапа.
+backup.extract_table_data(table="mydb.mytable", partition="202201", out='/tmp/my_backup_1/202201/')
+
+# Извлечь один парт из бэкапа.
+backup.extract_table_data(table="mydb.mytable", part="202201_100_200_3", out='/tmp/my_backup_1/202201_100_200_3/')
+```
+
+Больше примеров смотрите в [тесте](https://github.com/ClickHouse/ClickHouse/blob/master/utils/backupview/test/test.py).
diff --git a/docs/ru/operations/utilities/index.md b/docs/ru/operations/utilities/index.md
index b12d58a6d83..68cdfab40af 100644
--- a/docs/ru/operations/utilities/index.md
+++ b/docs/ru/operations/utilities/index.md
@@ -13,3 +13,4 @@ sidebar_position: 56
 -   [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — обфусцирует данные.
 -   [ClickHouse compressor](../../operations/utilities/clickhouse-compressor.md) — упаковывает и распаковывает данные.
 -   [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — прокси-сервер для ODBC.
+-   [backupview](../../operations/utilities/backupview.md) — модуль на Питоне для анализа бэкапов ClickHouse.
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index aedb267b3fb..1ea69e3b7f4 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1125,6 +1125,7 @@ azureBlobStorageCluster
 backend
 backoff
 backticks
+backupview
 balancer
 basename
 bcrypt

From 82c461031e70ce4309dec832cfd8dfee5fc3dd43 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 10:09:23 +0100
Subject: [PATCH 0408/1097] Fix build

---
 programs/client/Client.cpp                              | 5 +----
 programs/keeper/Keeper.cpp                              | 2 +-
 programs/server/Server.cpp                              | 2 +-
 src/Client/ClientBase.cpp                               | 2 +-
 src/Client/Connection.cpp                               | 2 +-
 src/Common/ClickHouseRevision.cpp                       | 2 +-
 src/Common/Exception.cpp                                | 2 +-
 src/Common/config_version.cpp.in                        | 2 +-
 src/Coordination/FourLetterCommand.h                    | 2 +-
 src/Daemon/BaseDaemon.cpp                               | 2 +-
 src/Daemon/SentryWriter.cpp                             | 2 +-
 src/Functions/serverConstants.cpp                       | 2 +-
 src/IO/ReadWriteBufferFromHTTP.h                        | 2 +-
 src/Interpreters/ClientInfo.cpp                         | 3 ++-
 src/Interpreters/CrashLog.cpp                           | 2 +-
 src/Processors/Formats/Impl/Parquet/Write.cpp           | 3 +--
 src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp | 2 +-
 src/Server/MySQLHandler.cpp                             | 3 +--
 src/Server/PostgreSQLHandler.cpp                        | 3 +--
 src/Server/TCPHandler.cpp                               | 6 +++---
 src/Storages/Kafka/StorageKafka.cpp                     | 6 +++---
 21 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 723707d9ec3..4e1b79e7c59 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -17,18 +17,15 @@
 #include "Core/Protocol.h"
 #include "Parsers/formatAST.h"
 
-#include <base/find_symbols.h>
-
 #include <Access/AccessControl.h>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 #include <Common/Exception.h>
 #include <Common/formatReadable.h>
 #include <Common/TerminalSize.h>
 #include <Common/Config/ConfigProcessor.h>
 #include <Common/Config/getClientConfigPath.h>
 
-#include <Core/QueryProcessingStage.h>
 #include <Columns/ColumnString.h>
 #include <Poco/Util/Application.h>
 
diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 8ebaf865cf4..6df1bbaa329 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -35,7 +35,7 @@
 
 #include "Core/Defines.h"
 #include "config.h"
-#include "config_version.h"
+#include <Common/config_version.h>
 #include "config_tools.h"
 
 
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 85ae6d7796c..9e974e796e0 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -98,7 +98,7 @@
 #include <unordered_set>
 
 #include "config.h"
-#include "config_version.h"
+#include <Common/config_version.h>
 
 #if defined(OS_LINUX)
 #    include <cstdlib>
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 9c7bfe5974f..28c95c653be 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -77,7 +77,7 @@
 #include <memory>
 #include <unordered_map>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 #include "config.h"
 
 namespace fs = std::filesystem;
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 7af9c99f50c..75ca66f2647 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -35,7 +35,7 @@
 #include <pcg_random.hpp>
 #include <base/scope_guard.h>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 #include "config.h"
 
 #if USE_SSL
diff --git a/src/Common/ClickHouseRevision.cpp b/src/Common/ClickHouseRevision.cpp
index 9dd91159f28..c7c27436466 100644
--- a/src/Common/ClickHouseRevision.cpp
+++ b/src/Common/ClickHouseRevision.cpp
@@ -1,5 +1,5 @@
 #include <Common/ClickHouseRevision.h>
-#include "config_version.h"
+#include <Common/config_version.h>
 
 namespace ClickHouseRevision
 {
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index d9f515b38b1..ed9fb00241d 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -21,7 +21,7 @@
 #include <Common/LockMemoryExceptionInThread.h>
 #include <filesystem>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 
 namespace fs = std::filesystem;
 
diff --git a/src/Common/config_version.cpp.in b/src/Common/config_version.cpp.in
index 26ae0f0f4d7..eb9ceb800b9 100644
--- a/src/Common/config_version.cpp.in
+++ b/src/Common/config_version.cpp.in
@@ -1,6 +1,6 @@
 /// This file was autogenerated by CMake
 
-#include "config_version.h"
+#include <Common/config_version.h>
 
 const unsigned VERSION_REVISION = @VERSION_REVISION@;
 const char * VERSION_NAME = "@VERSION_NAME@";
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index bb3c616e080..4702dd10415 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -7,7 +7,7 @@
 #include <Coordination/KeeperDispatcher.h>
 #include <IO/WriteBufferFromString.h>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 
 
 namespace DB
diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index dba4b13835d..8833156386f 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -60,7 +60,7 @@
 #include <Loggers/OwnFormattingChannel.h>
 #include <Loggers/OwnPatternFormatter.h>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 
 #if defined(OS_DARWIN)
 #   pragma clang diagnostic ignored "-Wunused-macros"
diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp
index 81ab103be02..d6e7144ca3b 100644
--- a/src/Daemon/SentryWriter.cpp
+++ b/src/Daemon/SentryWriter.cpp
@@ -14,7 +14,7 @@
 #include <Core/ServerUUID.h>
 
 #include "config.h"
-#include "config_version.h"
+#include <Common/config_version.h>
 
 #if USE_SENTRY && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
 
diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp
index 4294f97d771..9f1a3584df8 100644
--- a/src/Functions/serverConstants.cpp
+++ b/src/Functions/serverConstants.cpp
@@ -12,7 +12,7 @@
 
 #include <Poco/Environment.h>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 
 
 namespace DB
diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index 7385a4c89a2..8f0e2388e5b 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -23,7 +23,7 @@
 #include <Common/DNSResolver.h>
 #include <Common/RemoteHostFilter.h>
 #include "config.h"
-#include "config_version.h"
+#include <Common/config_version.h>
 
 #include <filesystem>
 
diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp
index b478daabc08..347ec115aba 100644
--- a/src/Interpreters/ClientInfo.cpp
+++ b/src/Interpreters/ClientInfo.cpp
@@ -7,10 +7,11 @@
 #include <base/getFQDNOrHostName.h>
 #include <unistd.h>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 
 #include <format>
 
+
 namespace DB
 {
 
diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp
index ec693eb7931..6b966445580 100644
--- a/src/Interpreters/CrashLog.cpp
+++ b/src/Interpreters/CrashLog.cpp
@@ -8,7 +8,7 @@
 #include <Common/SymbolIndex.h>
 #include <Common/Stopwatch.h>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 
 
 namespace DB
diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp
index 92d9df9b391..82e761f43e2 100644
--- a/src/Processors/Formats/Impl/Parquet/Write.cpp
+++ b/src/Processors/Formats/Impl/Parquet/Write.cpp
@@ -6,14 +6,13 @@
 #include <lz4.h>
 #include <Columns/MaskOperations.h>
 #include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnNullable.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnMap.h>
 #include <IO/WriteHelpers.h>
-#include "config_version.h"
+#include <Common/config_version.h>
 
 #if USE_SNAPPY
 #include <snappy.h>
diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
index bacfb7e352e..4d4bc6220e9 100644
--- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
+++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
@@ -1,6 +1,6 @@
 #include <Processors/QueryPlan/DistributedCreateLocalPlan.h>
 
-#include "config_version.h"
+#include <Common/config_version.h>
 #include <Common/checkStackSize.h>
 #include <Core/ProtocolDefines.h>
 #include <Interpreters/ActionsDAG.h>
diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index 21fa7f7227a..10b520ca97a 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -25,8 +25,7 @@
 #include <Common/OpenSSLHelpers.h>
 #include <Common/logger_useful.h>
 #include <Common/setThreadName.h>
-
-#include "config_version.h"
+#include <Common/config_version.h>
 
 #if USE_SSL
 #    include <Poco/Crypto/RSAKey.h>
diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp
index 3956f795657..eeb3784c1df 100644
--- a/src/Server/PostgreSQLHandler.cpp
+++ b/src/Server/PostgreSQLHandler.cpp
@@ -11,8 +11,7 @@
 #include <Common/setThreadName.h>
 #include <base/scope_guard.h>
 #include <pcg_random.hpp>
-
-#include "config_version.h"
+#include <Common/config_version.h>
 
 #if USE_SSL
 #   include <Poco/Net/SecureStreamSocket.h>
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 1da9806b4f5..f929d0f5ff9 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -59,11 +59,11 @@
 #   include <Poco/Net/SecureStreamSocketImpl.h>
 #endif
 
-#include "Core/Protocol.h"
-#include "Storages/MergeTree/RequestResponse.h"
+#include <Core/Protocol.h>
+#include <Storages/MergeTree/RequestResponse.h>
 #include "TCPHandler.h"
 
-#include "config_version.h"
+#include <Common/config_version.h>
 
 using namespace std::literals;
 using namespace DB;
diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp
index 423d295cdf2..c17defca673 100644
--- a/src/Storages/Kafka/StorageKafka.cpp
+++ b/src/Storages/Kafka/StorageKafka.cpp
@@ -41,11 +41,11 @@
 #include <Common/setThreadName.h>
 #include <Formats/FormatFactory.h>
 
-#include "Storages/ColumnDefault.h"
-#include "config_version.h"
-
+#include <Storages/ColumnDefault.h>
+#include <Common/config_version.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/ProfileEvents.h>
+
 #if USE_KRB5
 #include <Access/KerberosInit.h>
 #endif // USE_KRB5

From a4d57fe10874825c735bc698b9749c700196a36b Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 13 Nov 2023 10:10:34 +0100
Subject: [PATCH 0409/1097] Rename the name of the utility "backupview" ->
 "clickhouse_backupview".

---
 docs/en/operations/utilities/backupview.md                | 8 +++++---
 docs/en/operations/utilities/index.md                     | 2 +-
 docs/ru/operations/utilities/backupview.md                | 6 ++++--
 docs/ru/operations/utilities/index.md                     | 2 +-
 .../{backupview.py => clickhouse_backupview.py}           | 0
 utils/backupview/test/test.py                             | 5 ++---
 6 files changed, 13 insertions(+), 10 deletions(-)
 rename utils/backupview/{backupview.py => clickhouse_backupview.py} (100%)

diff --git a/docs/en/operations/utilities/backupview.md b/docs/en/operations/utilities/backupview.md
index 773eb466711..c4f1cc0ae71 100644
--- a/docs/en/operations/utilities/backupview.md
+++ b/docs/en/operations/utilities/backupview.md
@@ -1,9 +1,11 @@
 ---
 slug: /en/operations/utilities/backupview
-title: backupview
+title: clickhouse_backupview
 ---
 
-Python module to help analyzing backups made by the [BACKUP command](https://clickhouse.com/docs/en/operations/backup)
+# clickhouse_backupview {#clickhouse_backupview}
+
+Python module to help analyzing backups made by the [BACKUP](https://clickhouse.com/docs/en/operations/backup) command.
 The main motivation was to allows getting some information from a backup without actually restoring it.
 
 This module provides functions to
@@ -15,7 +17,7 @@ This module provides functions to
 ## Example:
 
 ```python
-from backupview import open_backup, FileInfo, S3
+from clickhouse_backupview import open_backup, S3, FileInfo
 
 # Open a backup. We could also use a local path:
 # backup = open_backup("/backups/my_backup_1/")
diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md
index 15136f9dfb8..8959073d00e 100644
--- a/docs/en/operations/utilities/index.md
+++ b/docs/en/operations/utilities/index.md
@@ -16,4 +16,4 @@ pagination_next: 'en/operations/utilities/clickhouse-copier'
 - [clickhouse-disks](../../operations/utilities/clickhouse-disks.md) -- Provides filesystem-like operations
  on files among different ClickHouse disks.
 - [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — A proxy server for ODBC driver.
-- [backupview](../../operations/utilities/backupview.md) — A python module to analyze ClickHouse backups.
+- [clickhouse_backupview](../../operations/utilities/backupview.md) — A python module to analyze ClickHouse backups.
diff --git a/docs/ru/operations/utilities/backupview.md b/docs/ru/operations/utilities/backupview.md
index e64a75853e3..702fafadc17 100644
--- a/docs/ru/operations/utilities/backupview.md
+++ b/docs/ru/operations/utilities/backupview.md
@@ -1,8 +1,10 @@
 ---
 slug: /en/operations/utilities/backupview
-title: backupview
+title: clickhouse_backupview
 ---
 
+# clickhouse_backupview {#clickhouse_backupview}
+
 Модуль на Питоне для анализа бэкапов, созданных командой [BACKUP](https://clickhouse.com/docs/ru/operations/backup)
 Главная идея этого модуля была в том, чтобы позволить извлечение информации из бэкапа без выполнения команды RESTORE.
 
@@ -15,7 +17,7 @@ title: backupview
 ## Пример:
 
 ```python
-from backupview import open_backup, S3
+from clickhouse_backupview import open_backup, S3, FileInfo
 
 # Открыть бэкап. Можно также использовать локальный путь:
 # backup = open_backup("/backups/my_backup_1/")
diff --git a/docs/ru/operations/utilities/index.md b/docs/ru/operations/utilities/index.md
index 68cdfab40af..9eb90a3037c 100644
--- a/docs/ru/operations/utilities/index.md
+++ b/docs/ru/operations/utilities/index.md
@@ -13,4 +13,4 @@ sidebar_position: 56
 -   [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — обфусцирует данные.
 -   [ClickHouse compressor](../../operations/utilities/clickhouse-compressor.md) — упаковывает и распаковывает данные.
 -   [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — прокси-сервер для ODBC.
--   [backupview](../../operations/utilities/backupview.md) — модуль на Питоне для анализа бэкапов ClickHouse.
+-   [clickhouse_backupview](../../operations/utilities/backupview.md) — модуль на Питоне для анализа бэкапов ClickHouse.
diff --git a/utils/backupview/backupview.py b/utils/backupview/clickhouse_backupview.py
similarity index 100%
rename from utils/backupview/backupview.py
rename to utils/backupview/clickhouse_backupview.py
diff --git a/utils/backupview/test/test.py b/utils/backupview/test/test.py
index 561d3c22224..9a1dc5cab2e 100755
--- a/utils/backupview/test/test.py
+++ b/utils/backupview/test/test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Tests for the backupview utility.
+# Tests for the clickhouse_backupview utility.
 # Use pytest ./test.py to run.
 
 import pytest
@@ -8,13 +8,12 @@ import pytest
 import os.path
 import sys
 import tempfile
-import pathlib
 
 script_dir = os.path.dirname(os.path.realpath(__file__))
 backupview_dir = os.path.abspath(os.path.join(script_dir, ".."))
 if backupview_dir not in sys.path:
     sys.path.append(backupview_dir)
-from backupview import open_backup, FileInfo, ExtractionInfo
+from clickhouse_backupview import open_backup, S3, FileInfo
 
 
 def calculate_num_files(dir):

From 6ee95433f99e31c71578d4faf544e4082568efab Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 13 Nov 2023 09:53:19 +0000
Subject: [PATCH 0410/1097] + static

---
 src/Interpreters/Cache/QueryCache.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index e8b52bbc6a0..8347d32bd3c 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -148,7 +148,7 @@ QueryCache::Key::Key(ASTPtr ast_, const String & user_name_)
 }
 
 /// Hashing of ASTs must consider aliases (issue #56258)
-constexpr bool ignore_aliases = false;
+static constexpr bool ignore_aliases = false;
 
 bool QueryCache::Key::operator==(const Key & other) const
 {

From 0f96df582f9d0e1458ea08daba517d37f617aa78 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 11:04:50 +0100
Subject: [PATCH 0411/1097] Remove the test

---
 ...ry_profiler_concurrency_overruns.reference |  1 -
 ...907_query_profiler_concurrency_overruns.sh | 21 -------------------
 2 files changed, 22 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference
 delete mode 100755 tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh

diff --git a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference
deleted file mode 100644
index 45d53fbec54..00000000000
--- a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.reference
+++ /dev/null
@@ -1 +0,0 @@
-1000000 1 1 
diff --git a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh b/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
deleted file mode 100755
index 7c5e4209124..00000000000
--- a/tests/queries/0_stateless/02907_query_profiler_concurrency_overruns.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-# The check is probablistic, so make sure that it passes at least sometimes:
-
-while true
-do
-    ${CLICKHOUSE_CLIENT} -n --query="
-    SELECT count() FROM zeros_mt(1000000) SETTINGS
-        query_profiler_real_time_period_ns = 1000000,
-        query_profiler_cpu_time_period_ns = 1000000,
-        max_threads = 1000,
-        max_block_size = 100;
-    SELECT anyIf(value, event = 'QueryProfilerRuns') > 0, anyIf(value, event = 'QueryProfilerConcurrencyOverruns') > 0 FROM system.events;
-    " | tr '\t\n' '  ' | grep '1000000 1 1' && break
-    sleep 1
-done

From 50cd59a72b77ab75fb71228e25fa2a683e4ab83b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 13 Nov 2023 09:55:58 +0000
Subject: [PATCH 0412/1097] Remove default value for argument 'ignore_aliases'
 from IAST::getTreeHash()

---
 src/Interpreters/ActionsVisitor.cpp                 | 4 ++--
 src/Interpreters/Aggregator.cpp                     | 6 +++---
 src/Interpreters/ComparisonGraph.cpp                | 4 ++--
 src/Interpreters/ComparisonGraph.h                  | 2 +-
 src/Interpreters/Context.cpp                        | 6 +++---
 src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp | 2 +-
 src/Interpreters/GlobalSubqueriesVisitor.h          | 4 ++--
 src/Interpreters/LogicalExpressionsOptimizer.cpp    | 2 +-
 src/Interpreters/MonotonicityCheckVisitor.h         | 2 +-
 src/Interpreters/PredicateRewriteVisitor.cpp        | 4 ++--
 src/Interpreters/QueryAliasesVisitor.cpp            | 2 +-
 src/Interpreters/RewriteUniqToCountVisitor.cpp      | 2 +-
 src/Interpreters/TreeCNFConverter.h                 | 6 +++---
 src/Interpreters/TreeOptimizer.cpp                  | 4 ++--
 src/Interpreters/WhereConstraintsOptimizer.cpp      | 2 +-
 src/Parsers/ASTSetQuery.cpp                         | 2 +-
 src/Parsers/ASTSubquery.cpp                         | 2 +-
 src/Parsers/ASTWithAlias.cpp                        | 2 +-
 src/Parsers/IAST.h                                  | 2 +-
 src/Storages/ConstraintsDescription.cpp             | 4 ++--
 src/Storages/KVStorageUtils.cpp                     | 2 +-
 src/Storages/MergeTree/MergeTreeData.cpp            | 2 +-
 src/Storages/MergeTree/RPNBuilder.cpp               | 6 +++---
 src/Storages/System/StorageSystemQueryCache.cpp     | 2 +-
 24 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 3c4ed435f91..ab226d999c3 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -413,7 +413,7 @@ FutureSetPtr makeExplicitSet(
 
     auto set_element_keys = Set::getElementTypes(set_element_types, context->getSettingsRef().transform_null_in);
 
-    auto set_key = right_arg->getTreeHash();
+    auto set_key = right_arg->getTreeHash(/*ignore_aliases=*/ true);
     if (auto set = prepared_sets.findTuple(set_key, set_element_keys))
         return set; /// Already prepared.
 
@@ -1391,7 +1391,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool
             set_key = query_tree->getTreeHash();
         }
         else
-            set_key = right_in_operand->getTreeHash();
+            set_key = right_in_operand->getTreeHash(/*ignore_aliases=*/ true);
 
         if (auto set = data.prepared_sets->findSubquery(set_key))
             return set;
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index 6329b55208e..129c02f032b 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -164,11 +164,11 @@ public:
             return 0;
 
         SipHash hash;
-        hash.update(select.tables()->getTreeHash());
+        hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/ true));
         if (const auto where = select.where())
-            hash.update(where->getTreeHash());
+            hash.update(where->getTreeHash(/*ignore_aliases=*/ true));
         if (const auto group_by = select.groupBy())
-            hash.update(group_by->getTreeHash());
+            hash.update(group_by->getTreeHash(/*ignore_aliases=*/ true));
         return hash.get64();
     }
 
diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp
index 53e63903c43..4eacbae7a30 100644
--- a/src/Interpreters/ComparisonGraph.cpp
+++ b/src/Interpreters/ComparisonGraph.cpp
@@ -215,7 +215,7 @@ ComparisonGraph<Node>::ComparisonGraph(const NodeContainer & atomic_formulas, Co
                         [node](const Node & constraint_node)
                         {
                             if constexpr (with_ast)
-                                return constraint_node->getTreeHash() == node->getTreeHash()
+                                return constraint_node->getTreeHash(/*ignore_aliases=*/ true) == node->getTreeHash(/*ignore_aliases=*/ true)
                                     && constraint_node->getColumnName() == node->getColumnName();
                             else
                                 return constraint_node->isEqual(*node);
@@ -474,7 +474,7 @@ std::optional<size_t> ComparisonGraph<Node>::getComponentId(const Node & node) c
         [node](const Node & constraint_node)
         {
             if constexpr (with_ast)
-                return constraint_node->getTreeHash() == node->getTreeHash()
+                return constraint_node->getTreeHash(/*ignore_aliases=*/ true) == node->getTreeHash(/*ignore_aliases=*/ true)
                     && constraint_node->getColumnName() == node->getColumnName();
             else
                 return constraint_node->getTreeHash() == node->getTreeHash();
diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h
index 4fd90dad371..200d4af38fc 100644
--- a/src/Interpreters/ComparisonGraph.h
+++ b/src/Interpreters/ComparisonGraph.h
@@ -125,7 +125,7 @@ private:
         static auto getHash(const Node & node)
         {
             if constexpr (with_ast)
-                return node->getTreeHash();
+                return node->getTreeHash(/*ignore_aliases=*/ true);
             else
                 return QueryTreeNodePtrWithHash{node};
         }
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index ac552a3969c..75cc5f8366c 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1644,7 +1644,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             return res;
         }
     }
-    auto hash = table_expression->getTreeHash();
+    auto hash = table_expression->getTreeHash(/*ignore_aliases=*/ true);
     auto key = toString(hash);
     StoragePtr & res = table_function_results[key];
     if (!res)
@@ -1804,7 +1804,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
         ///
         ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
         ///
-        auto new_hash = table_expression->getTreeHash();
+        auto new_hash = table_expression->getTreeHash(/*ignore_aliases=*/ true);
         if (hash != new_hash)
         {
             key = toString(new_hash);
@@ -1816,7 +1816,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 
 StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr)
 {
-    const auto hash = table_expression->getTreeHash();
+    const auto hash = table_expression->getTreeHash(/*ignore_aliases=*/ true);
     const auto key = toString(hash);
     StoragePtr & res = table_function_results[key];
 
diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
index 07a928be493..77a022e066b 100644
--- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
+++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
@@ -109,7 +109,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
     String subquery_alias = subquery.alias;
     bool prefer_alias_to_column_name = subquery.prefer_alias_to_column_name;
 
-    auto hash = subquery.getTreeHash();
+    auto hash = subquery.getTreeHash(/*ignore_aliases=*/ true);
     const auto scalar_query_hash_str = toString(hash);
 
     std::unique_ptr<InterpreterSelectWithUnionQuery> interpreter = nullptr;
diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h
index 08378c3158b..384b562c80c 100644
--- a/src/Interpreters/GlobalSubqueriesVisitor.h
+++ b/src/Interpreters/GlobalSubqueriesVisitor.h
@@ -114,7 +114,7 @@ public:
             String external_table_name;
             if (alias.empty())
             {
-                auto hash = subquery_or_table_name->getTreeHash();
+                auto hash = subquery_or_table_name->getTreeHash(/*ignore_aliases=*/ true);
                 external_table_name = fmt::format("_data_{}", toString(hash));
             }
             else
@@ -164,7 +164,7 @@ public:
             StoragePtr external_storage = external_storage_holder->getTable();
             external_tables.emplace(external_table_name, external_storage_holder);
 
-            auto set_key = database_and_table_name->getTreeHash();
+            auto set_key = database_and_table_name->getTreeHash(/*ignore_aliases=*/ true);
 
             if (!prepared_sets->findSubquery(set_key))
             {
diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp
index 78297c0ef5c..3689e6ac2db 100644
--- a/src/Interpreters/LogicalExpressionsOptimizer.cpp
+++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp
@@ -139,7 +139,7 @@ void LogicalExpressionsOptimizer::collectDisjunctiveEqualityChains()
                             const auto * literal = equals_expression_list->children[1]->as<ASTLiteral>();
                             if (literal && literal->alias.empty())
                             {
-                                auto expr_lhs = equals_expression_list->children[0]->getTreeHash();
+                                auto expr_lhs = equals_expression_list->children[0]->getTreeHash(/*ignore_aliases=*/ true);
                                 OrWithExpression or_with_expression{function, expr_lhs, function->tryGetAlias()};
                                 disjunctive_equality_chains_map[or_with_expression].functions.push_back(equals);
                                 found_chain = true;
diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h
index c1838fa105c..cc386825024 100644
--- a/src/Interpreters/MonotonicityCheckVisitor.h
+++ b/src/Interpreters/MonotonicityCheckVisitor.h
@@ -39,7 +39,7 @@ public:
         bool canOptimize(const ASTFunction & ast_function) const
         {
             /// if GROUP BY contains the same function ORDER BY shouldn't be optimized
-            const auto hash = ast_function.getTreeHash();
+            const auto hash = ast_function.getTreeHash(/*ignore_aliases=*/ true);
             const auto key = toString(hash);
             if (group_by_function_hashes.count(key))
                 return false;
diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp
index 40de887da27..dc0125e538a 100644
--- a/src/Interpreters/PredicateRewriteVisitor.cpp
+++ b/src/Interpreters/PredicateRewriteVisitor.cpp
@@ -159,7 +159,7 @@ static void getConjunctionHashesFrom(const ASTPtr & ast, std::set<IAST::Hash> &
         /// Clone not to modify `ast`
         ASTPtr pred_copy = pred->clone();
         useAliasInsteadOfIdentifier(pred_copy);
-        hashes.emplace(pred_copy->getTreeHash());
+        hashes.emplace(pred_copy->getTreeHash(/*ignore_aliases=*/ true));
     }
 }
 
@@ -188,7 +188,7 @@ bool PredicateRewriteVisitorData::rewriteSubquery(ASTSelectQuery & subquery, con
         ASTPtr optimize_predicate = predicate->clone();
         cleanAliasAndCollectIdentifiers(optimize_predicate, identifiers);
 
-        auto predicate_hash = optimize_predicate->getTreeHash();
+        auto predicate_hash = optimize_predicate->getTreeHash(/*ignore_aliases=*/ true);
         if (hashes.contains(predicate_hash))
             continue;
 
diff --git a/src/Interpreters/QueryAliasesVisitor.cpp b/src/Interpreters/QueryAliasesVisitor.cpp
index 345a27bc3ab..8f96044abec 100644
--- a/src/Interpreters/QueryAliasesVisitor.cpp
+++ b/src/Interpreters/QueryAliasesVisitor.cpp
@@ -121,7 +121,7 @@ void QueryAliasesMatcher<T>::visitOther(const ASTPtr & ast, Data & data)
     String alias = ast->tryGetAlias();
     if (!alias.empty())
     {
-        if (aliases.contains(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash())
+        if (aliases.contains(alias) && ast->getTreeHash(/*ignore_aliases=*/ true) != aliases[alias]->getTreeHash(/*ignore_aliases=*/ true))
             throw Exception(wrongAliasMessage(ast, aliases[alias], alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
 
         aliases[alias] = ast;
diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp
index 539271aa997..ddec6fe063e 100644
--- a/src/Interpreters/RewriteUniqToCountVisitor.cpp
+++ b/src/Interpreters/RewriteUniqToCountVisitor.cpp
@@ -27,7 +27,7 @@ bool matchFnUniq(String name)
 
 bool expressionEquals(const ASTPtr & lhs, const ASTPtr & rhs, const Aliases & alias)
 {
-    if (lhs->getTreeHash() == rhs->getTreeHash())
+    if (lhs->getTreeHash(/*ignore_aliases=*/ true) == rhs->getTreeHash(/*ignore_aliases=*/ true))
     {
         return true;
     }
diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h
index 7f2fee4e6fd..3cc8b05d6c6 100644
--- a/src/Interpreters/TreeCNFConverter.h
+++ b/src/Interpreters/TreeCNFConverter.h
@@ -20,15 +20,15 @@ public:
         /// for set
         bool operator<(const AtomicFormula & rhs) const
         {
-            return ast->getTreeHash() == rhs.ast->getTreeHash()
+            return ast->getTreeHash(/*ignore_aliases=*/ true) == rhs.ast->getTreeHash(/*ignore_aliases=*/ true)
                 ? negative < rhs.negative
-                : ast->getTreeHash() < rhs.ast->getTreeHash();
+                : ast->getTreeHash(/*ignore_aliases=*/ true) < rhs.ast->getTreeHash(/*ignore_aliases=*/ true);
         }
 
         bool operator==(const AtomicFormula & rhs) const
         {
             return negative == rhs.negative &&
-                ast->getTreeHash() == rhs.ast->getTreeHash() &&
+                ast->getTreeHash(/*ignore_aliases=*/ true) == rhs.ast->getTreeHash(/*ignore_aliases=*/ true) &&
                 ast->getColumnName() == rhs.ast->getColumnName();
         }
     };
diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp
index 9612a4d6c3f..49d337267ae 100644
--- a/src/Interpreters/TreeOptimizer.cpp
+++ b/src/Interpreters/TreeOptimizer.cpp
@@ -409,7 +409,7 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context
             {
                 for (auto & elem : set->children)
                 {
-                    const auto hash = elem->getTreeHash();
+                    const auto hash = elem->getTreeHash(/*ignore_aliases=*/ true);
                     const auto key = toString(hash);
                     group_by_hashes.insert(key);
                 }
@@ -419,7 +419,7 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context
         {
             for (auto & elem : group_by->children)
             {
-                const auto hash = elem->getTreeHash();
+                const auto hash = elem->getTreeHash(/*ignore_aliases=*/ true);
                 const auto key = toString(hash);
                 group_by_hashes.insert(key);
             }
diff --git a/src/Interpreters/WhereConstraintsOptimizer.cpp b/src/Interpreters/WhereConstraintsOptimizer.cpp
index 91c19fa264e..5a0102f1ee7 100644
--- a/src/Interpreters/WhereConstraintsOptimizer.cpp
+++ b/src/Interpreters/WhereConstraintsOptimizer.cpp
@@ -37,7 +37,7 @@ enum class MatchState
 MatchState match(CNFQuery::AtomicFormula a, CNFQuery::AtomicFormula b)
 {
     bool match_means_ok = (a.negative == b.negative);
-    if (a.ast->getTreeHash() == b.ast->getTreeHash())
+    if (a.ast->getTreeHash(/*ignore_aliases=*/ true) == b.ast->getTreeHash(/*ignore_aliases=*/ true))
         return match_means_ok ? MatchState::FULL_MATCH : MatchState::NOT_MATCH;
 
     return MatchState::NONE;
diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp
index e2c60e8369d..cd266cc930f 100644
--- a/src/Parsers/ASTSetQuery.cpp
+++ b/src/Parsers/ASTSetQuery.cpp
@@ -66,7 +66,7 @@ void ASTSetQuery::formatImpl(const FormatSettings & format, FormatState &, Forma
 
 void ASTSetQuery::appendColumnName(WriteBuffer & ostr) const
 {
-    Hash hash = getTreeHash();
+    Hash hash = getTreeHash(/*ignore_aliases=*/ true);
 
     writeCString("__settings_", ostr);
     writeText(hash.low64, ostr);
diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp
index 75dfccd6e13..844520b2f64 100644
--- a/src/Parsers/ASTSubquery.cpp
+++ b/src/Parsers/ASTSubquery.cpp
@@ -19,7 +19,7 @@ void ASTSubquery::appendColumnNameImpl(WriteBuffer & ostr) const
     }
     else
     {
-        const auto hash = getTreeHash();
+        const auto hash = getTreeHash(/*ignore_aliases=*/ true);
         writeCString("__subquery_", ostr);
         writeString(toString(hash), ostr);
     }
diff --git a/src/Parsers/ASTWithAlias.cpp b/src/Parsers/ASTWithAlias.cpp
index 5d1122ae4d8..6f64e33d33d 100644
--- a/src/Parsers/ASTWithAlias.cpp
+++ b/src/Parsers/ASTWithAlias.cpp
@@ -19,7 +19,7 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
 {
     /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias.
     /// This is needed because the query can become extraordinary large after substitution of aliases.
-    if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash()).second)
+    if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash(/*ignore_aliases=*/ true)).second)
     {
         settings.ostr << (settings.hilite ? IAST::hilite_identifier : "");
         settings.writeIdentifier(alias);
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index 9afd59caa05..c147437797d 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -82,7 +82,7 @@ public:
      *  useful for common subexpression elimination. Set 'ignore_aliases = false' if you don't want that behavior.
       */
     using Hash = CityHash_v1_0_2::uint128;
-    Hash getTreeHash(bool ignore_aliases = true) const;
+    Hash getTreeHash(bool ignore_aliases) const;
     void updateTreeHash(SipHash & hash_state, bool ignore_aliases = true) const;
     virtual void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const;
 
diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp
index 249ed8be428..219c3fd0c97 100644
--- a/src/Storages/ConstraintsDescription.cpp
+++ b/src/Storages/ConstraintsDescription.cpp
@@ -167,7 +167,7 @@ const ASTs & ConstraintsDescription::getConstraints() const
 
 std::optional<ConstraintsDescription::AtomIds> ConstraintsDescription::getAtomIds(const ASTPtr & ast) const
 {
-    const auto hash = ast->getTreeHash();
+    const auto hash = ast->getTreeHash(/*ignore_aliases=*/ true);
     auto it = ast_to_atom_ids.find(hash);
     if (it != ast_to_atom_ids.end())
         return it->second;
@@ -321,7 +321,7 @@ void ConstraintsDescription::update()
     ast_to_atom_ids.clear();
     for (size_t i = 0; i < cnf_constraints.size(); ++i)
         for (size_t j = 0; j < cnf_constraints[i].size(); ++j)
-            ast_to_atom_ids[cnf_constraints[i][j].ast->getTreeHash()].push_back({i, j});
+            ast_to_atom_ids[cnf_constraints[i][j].ast->getTreeHash(/*ignore_aliases=*/ true)].push_back({i, j});
 
     graph = buildGraph();
 }
diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp
index 1884671a41d..3031fc6bf9d 100644
--- a/src/Storages/KVStorageUtils.cpp
+++ b/src/Storages/KVStorageUtils.cpp
@@ -72,7 +72,7 @@ bool traverseASTFilter(
                 return false;
             value = args.children.at(1);
 
-            PreparedSets::Hash set_key = value->getTreeHash();
+            PreparedSets::Hash set_key = value->getTreeHash(/*ignore_aliases=*/ true);
             FutureSetPtr future_set;
 
             if ((value->as<ASTSubquery>() || value->as<ASTIdentifier>()))
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 1c0f9208fef..32c6974f1d0 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -199,7 +199,7 @@ static void checkSuspiciousIndices(const ASTFunction * index_function)
     std::unordered_set<UInt64> unique_index_expression_hashes;
     for (const auto & child : index_function->arguments->children)
     {
-        const IAST::Hash hash = child->getTreeHash();
+        const IAST::Hash hash = child->getTreeHash(/*ignore_aliases=*/ true);
         const auto & first_half_of_hash = hash.low64;
 
         if (!unique_index_expression_hashes.emplace(first_half_of_hash).second)
diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp
index f2dcb77ee3b..3ffef1de718 100644
--- a/src/Storages/MergeTree/RPNBuilder.cpp
+++ b/src/Storages/MergeTree/RPNBuilder.cpp
@@ -314,7 +314,7 @@ FutureSetPtr RPNBuilderTreeNode::tryGetPreparedSet() const
 
     if (ast_node && prepared_sets)
     {
-        auto key = ast_node->getTreeHash();
+        auto key = ast_node->getTreeHash(/*ignore_aliases=*/ true);
         const auto & sets = prepared_sets->getSetsFromTuple();
         auto it = sets.find(key);
         if (it != sets.end() && !it->second.empty())
@@ -338,9 +338,9 @@ FutureSetPtr RPNBuilderTreeNode::tryGetPreparedSet(const DataTypes & data_types)
     if (prepared_sets && ast_node)
     {
         if (ast_node->as<ASTSubquery>() || ast_node->as<ASTTableIdentifier>())
-            return prepared_sets->findSubquery(ast_node->getTreeHash());
+            return prepared_sets->findSubquery(ast_node->getTreeHash(/*ignore_aliases=*/ true));
 
-        return prepared_sets->findTuple(ast_node->getTreeHash(), data_types);
+        return prepared_sets->findTuple(ast_node->getTreeHash(/*ignore_aliases=*/ true), data_types);
     }
     else if (dag_node)
     {
diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp
index 03757101ddf..8538820cf41 100644
--- a/src/Storages/System/StorageSystemQueryCache.cpp
+++ b/src/Storages/System/StorageSystemQueryCache.cpp
@@ -50,7 +50,7 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr
         res_columns[3]->insert(key.is_shared);
         res_columns[4]->insert(key.is_compressed);
         res_columns[5]->insert(std::chrono::system_clock::to_time_t(key.expires_at));
-        res_columns[6]->insert(key.ast->getTreeHash().low64);
+        res_columns[6]->insert(key.ast->getTreeHash(/*ignore_aliases=*/ false).low64); /// query cache considers aliases (issue #56258)
     }
 }
 

From 6ce1ae36d3c841c43e0c3d969d2c5f12afe5b750 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 13 Nov 2023 10:56:49 +0000
Subject: [PATCH 0413/1097] Fix the fix

---
 src/Functions/sleep.h                                | 6 +++---
 tests/queries/0_stateless/02915_sleep_large_uint.sql | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h
index 024ea757b9e..160a8a2afe2 100644
--- a/src/Functions/sleep.h
+++ b/src/Functions/sleep.h
@@ -113,15 +113,15 @@ public:
         if (size > 0)
         {
             /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time.
-            UInt64 microseconds = static_cast<UInt64>(seconds) * 1000000ull;
-            if (max_microseconds && seconds * 1e6 > max_microseconds)
+            UInt64 microseconds = static_cast<UInt64>(seconds * 1e6);
+            if (max_microseconds && microseconds > max_microseconds)
                 throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {} microseconds",
                     max_microseconds, microseconds);
 
             if (!dry_run)
             {
                 UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
-                microseconds = static_cast<UInt64>(seconds) * count * 1000000ull;
+                microseconds *= count;
 
                 if (max_microseconds && microseconds > max_microseconds)
                     throw Exception(ErrorCodes::TOO_SLOW,
diff --git a/tests/queries/0_stateless/02915_sleep_large_uint.sql b/tests/queries/0_stateless/02915_sleep_large_uint.sql
index 9fff629fb14..f7c04ab6d1f 100644
--- a/tests/queries/0_stateless/02915_sleep_large_uint.sql
+++ b/tests/queries/0_stateless/02915_sleep_large_uint.sql
@@ -2,3 +2,6 @@ SELECT sleep(3.40282e+44); -- { serverError BAD_ARGUMENTS }
 SELECT sleep((pow(2, 64) / 1000000) - 1); -- { serverError BAD_ARGUMENTS }
 SELECT sleepEachRow(184467440737095516) from numbers(10000); -- { serverError BAD_ARGUMENTS }
 SELECT sleepEachRow(pow(2, 31)) from numbers(9007199254740992) settings function_sleep_max_microseconds_per_block = 8589934592000000000; -- { serverError TOO_SLOW }
+
+-- Another corner case, but it requires lots of memory to run (huge block size)
+-- SELECT sleepEachRow(pow(2, 31)) from numbers(17179869184) settings max_block_size = 17179869184, function_sleep_max_microseconds_per_block = 8589934592000000000; -- { serverError TOO_SLOW }

From 7864df48266e9db3f671e9cd09e81b96485debc3 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 13 Nov 2023 10:56:26 +0000
Subject: [PATCH 0414/1097] Update docs + Try to stabilize test results, pt. II

---
 docs/en/operations/settings/settings.md                     | 6 +++---
 .../02494_query_cache_nondeterministic_functions.sql        | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 67b62501dd9..d0acad7b557 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1657,14 +1657,14 @@ Possible values:
 
 Default value: `1`.
 
-## query_cache_non_deterministic_function_handling {#query-cache-nondeterministic-function-handling}
+## query_cache_nondeterministic_function_handling {#query-cache-nondeterministic-function-handling}
 
 Controls how the [query cache](../query-cache.md) handles `SELECT` queries with non-deterministic functions like `rand()` or `now()`.
 
 Possible values:
 
-- `'throw'` - Throw an exception.
-- `'save'` - Cache the query result even if it is non-deterministic.
+- `'throw'` - Throw an exception and don't cache the query result.
+- `'save'` - Cache the query result.
 - `'ignore'` - Don't cache the query result and don't throw an exception.
 
 Default value: `throw`.
diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
index 1192a19e26b..477655e474f 100644
--- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
+++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
@@ -1,5 +1,5 @@
--- Tag no-parallel: Messes with internal cache
 -- Tags: no-parallel
+-- Tag no-parallel: Messes with internal cache
 
 SYSTEM DROP QUERY CACHE;
 

From ed50cb61f87141da2ca81da32d49776616a3234c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 12:23:55 +0100
Subject: [PATCH 0415/1097] Fix style

---
 src/Common/config_version.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Common/config_version.h b/src/Common/config_version.h
index b1e0ea67a68..e3ec12e2b34 100644
--- a/src/Common/config_version.h
+++ b/src/Common/config_version.h
@@ -1,5 +1,3 @@
-/// This file was autogenerated by CMake
-
 #pragma once
 
 /// These fields are changing only on every release, but we still don't want to have them in the header file,

From dc6a61ee8fe212b1d465d92fed75d8de3221ef6d Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 9 Nov 2023 10:54:12 +0100
Subject: [PATCH 0416/1097] Add reusable installation workflow

---
 .github/workflows/reusable_install_test.yml | 64 +++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 .github/workflows/reusable_install_test.yml

diff --git a/.github/workflows/reusable_install_test.yml b/.github/workflows/reusable_install_test.yml
new file mode 100644
index 00000000000..5d1a0509f64
--- /dev/null
+++ b/.github/workflows/reusable_install_test.yml
@@ -0,0 +1,64 @@
+### For the pure soul wishes to move it to another place
+# https://github.com/orgs/community/discussions/9050
+
+name: Test installation
+'on':
+  workflow_call:
+    inputs:
+      test_name:
+        description: the value of test type from tests/ci/ci_config.py
+        required: true
+        type: string
+      checkout_depth:
+        description: the value of the git shallow checkout
+        required: false
+        type: number
+        default: 1
+      runner_type:
+        description: the label of runner to use
+        default: style-checker
+        type: string
+      additional_envs:
+        description: additional ENV variables to setup the job
+        type: string
+
+jobs:
+  InstallCheck:
+    name: ${{inputs.test_name}}
+    runs-on: [self-hosted, '${{inputs.runner_type}}']
+    steps:
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+          submodules: true
+          fetch-depth: ${{inputs.checkout_depth}}
+          filter: tree:0
+      - name: Set build envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          GITHUB_JOB_OVERRIDDEN=${{inputs.test_name}}
+          CHECK_NAME=${{inputs.test_name}}
+          ${{inputs.additional_envs}}
+          EOF
+      - name: Common setup
+        uses: ./.github/actions/common_setup
+        with:
+          job_type: test_install
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Build
+        run: |
+          cd "$REPO_COPY/tests/ci"
+          python3 install_check.py "$CHECK_NAME"
+      - name: Upload build URLs to artifacts
+        if: ${{ success() || failure() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ env.BUILD_URLS }}
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
+      - name: Clean
+        uses: ./.github/actions/clean

From b1db60abc25e03b1bd80089242ac3d5683902857 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 9 Nov 2023 10:55:16 +0100
Subject: [PATCH 0417/1097] Move IMAGES_PATH and REPORTS_PATH to the
 common_setup

---
 .github/actions/common_setup/action.yml     | 2 ++
 .github/workflows/reusable_build.yml        | 1 -
 .github/workflows/reusable_install_test.yml | 1 -
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/common_setup/action.yml b/.github/actions/common_setup/action.yml
index 0d31945087d..b02413adc44 100644
--- a/.github/actions/common_setup/action.yml
+++ b/.github/actions/common_setup/action.yml
@@ -19,6 +19,8 @@ runs:
           cat >> "$GITHUB_ENV" << 'EOF'
           TEMP_PATH=${{runner.temp}}/${{inputs.job_type}}
           REPO_COPY=${{runner.temp}}/${{inputs.job_type}}/git-repo-copy
+          IMAGES_PATH=${{runner.temp}}/images_path
+          REPORTS_PATH=${{runner.temp}}/reports_dir
           EOF
           if [ -z "${{env.GITHUB_JOB_OVERRIDDEN}}" ] && [ "true" == "${{inputs.nested_job}}" ]; then
             echo "The GITHUB_JOB_OVERRIDDEN ENV is unset, and must be set for the nested jobs"
diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml
index 1eb25307f0c..f6586016874 100644
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@@ -37,7 +37,6 @@ jobs:
       - name: Set build envs
         run: |
           cat >> "$GITHUB_ENV" << 'EOF'
-          IMAGES_PATH=${{runner.temp}}/images_path
           GITHUB_JOB_OVERRIDDEN=Build-${{inputs.build_name}}
           ${{inputs.additional_envs}}
           EOF
diff --git a/.github/workflows/reusable_install_test.yml b/.github/workflows/reusable_install_test.yml
index 5d1a0509f64..d84bf5d34f7 100644
--- a/.github/workflows/reusable_install_test.yml
+++ b/.github/workflows/reusable_install_test.yml
@@ -37,7 +37,6 @@ jobs:
       - name: Set build envs
         run: |
           cat >> "$GITHUB_ENV" << 'EOF'
-          REPORTS_PATH=${{runner.temp}}/reports_dir
           GITHUB_JOB_OVERRIDDEN=${{inputs.test_name}}
           CHECK_NAME=${{inputs.test_name}}
           ${{inputs.additional_envs}}

From 45e12bef4ef19048dd004d98f2501939c183d4ea Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 9 Nov 2023 10:59:29 +0100
Subject: [PATCH 0418/1097] Use reusable_install_test.yml

---
 .github/workflows/backport_branches.yml | 70 +++----------------------
 .github/workflows/master.yml            | 70 +++----------------------
 .github/workflows/pull_request.yml      | 70 +++----------------------
 .github/workflows/release_branches.yml  | 70 +++----------------------
 4 files changed, 32 insertions(+), 248 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 7611c5429c5..9bd55c51438 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -322,70 +322,16 @@ jobs:
 ############################################################################################
   InstallPackagesTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/test_install
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Install packages (amd64)
-          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Test packages installation
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 install_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_install_test.yml
+    with:
+      test_name: Install packages (amd64)
+      runner_type: style-checker
   InstallPackagesTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, style-checker-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/test_install
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Install packages (arm64)
-          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Test packages installation
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 install_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_install_test.yml
+    with:
+      test_name: Install packages (arm64)
+      runner_type: style-checker-aarch64
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 5a0fc2fabcb..daac593b20c 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -431,70 +431,16 @@ jobs:
 ############################################################################################
   InstallPackagesTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/test_install
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Install packages (amd64)
-          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Test packages installation
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 install_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_install_test.yml
+    with:
+      test_name: Install packages (amd64)
+      runner_type: style-checker
   InstallPackagesTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, style-checker-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/test_install
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Install packages (arm64)
-          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Test packages installation
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 install_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_install_test.yml
+    with:
+      test_name: Install packages (arm64)
+      runner_type: style-checker-aarch64
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 66a0b186743..0b4cbc826f5 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -467,70 +467,16 @@ jobs:
 ############################################################################################
   InstallPackagesTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/test_install
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Install packages (amd64)
-          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Test packages installation
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 install_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_install_test.yml
+    with:
+      test_name: Install packages (amd64)
+      runner_type: style-checker
   InstallPackagesTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, style-checker-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/test_install
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Install packages (arm64)
-          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Test packages installation
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 install_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_install_test.yml
+    with:
+      test_name: Install packages (arm64)
+      runner_type: style-checker-aarch64
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 29776d0aa5c..6e890149e48 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -322,70 +322,16 @@ jobs:
 ############################################################################################
   InstallPackagesTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/test_install
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Install packages (amd64)
-          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Test packages installation
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 install_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_install_test.yml
+    with:
+      test_name: Install packages (amd64)
+      runner_type: style-checker
   InstallPackagesTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, style-checker-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/test_install
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Install packages (arm64)
-          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Test packages installation
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 install_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_install_test.yml
+    with:
+      test_name: Install packages (arm64)
+      runner_type: style-checker-aarch64
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################

From 2aec201c78236d296f3c2f0853e18cc4794489b2 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 9 Nov 2023 12:21:47 +0100
Subject: [PATCH 0419/1097] Rewrite reusable_install_test to universal
 reusable_test

---
 .github/workflows/backport_branches.yml     | 10 ++-
 .github/workflows/master.yml                | 10 ++-
 .github/workflows/pull_request.yml          | 10 ++-
 .github/workflows/release_branches.yml      | 10 ++-
 .github/workflows/reusable_install_test.yml | 63 --------------
 .github/workflows/reusable_test.yml         | 93 +++++++++++++++++++++
 6 files changed, 125 insertions(+), 71 deletions(-)
 delete mode 100644 .github/workflows/reusable_install_test.yml
 create mode 100644 .github/workflows/reusable_test.yml

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 9bd55c51438..e93bb83b081 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -322,16 +322,22 @@ jobs:
 ############################################################################################
   InstallPackagesTestRelease:
     needs: [BuilderDebRelease]
-    uses: ./.github/workflows/reusable_install_test.yml
+    uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Install packages (amd64)
       runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 install_check.py "$CHECK_NAME"
   InstallPackagesTestAarch64:
     needs: [BuilderDebAarch64]
-    uses: ./.github/workflows/reusable_install_test.yml
+    uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Install packages (arm64)
       runner_type: style-checker-aarch64
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 install_check.py "$CHECK_NAME"
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index daac593b20c..6cd94e9aa42 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -431,16 +431,22 @@ jobs:
 ############################################################################################
   InstallPackagesTestRelease:
     needs: [BuilderDebRelease]
-    uses: ./.github/workflows/reusable_install_test.yml
+    uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Install packages (amd64)
       runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 install_check.py "$CHECK_NAME"
   InstallPackagesTestAarch64:
     needs: [BuilderDebAarch64]
-    uses: ./.github/workflows/reusable_install_test.yml
+    uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Install packages (arm64)
       runner_type: style-checker-aarch64
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 install_check.py "$CHECK_NAME"
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 0b4cbc826f5..711e524acc5 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -467,16 +467,22 @@ jobs:
 ############################################################################################
   InstallPackagesTestRelease:
     needs: [BuilderDebRelease]
-    uses: ./.github/workflows/reusable_install_test.yml
+    uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Install packages (amd64)
       runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 install_check.py "$CHECK_NAME"
   InstallPackagesTestAarch64:
     needs: [BuilderDebAarch64]
-    uses: ./.github/workflows/reusable_install_test.yml
+    uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Install packages (arm64)
       runner_type: style-checker-aarch64
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 install_check.py "$CHECK_NAME"
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 6e890149e48..5e8530bcaae 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -322,16 +322,22 @@ jobs:
 ############################################################################################
   InstallPackagesTestRelease:
     needs: [BuilderDebRelease]
-    uses: ./.github/workflows/reusable_install_test.yml
+    uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Install packages (amd64)
       runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 install_check.py "$CHECK_NAME"
   InstallPackagesTestAarch64:
     needs: [BuilderDebAarch64]
-    uses: ./.github/workflows/reusable_install_test.yml
+    uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Install packages (arm64)
       runner_type: style-checker-aarch64
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 install_check.py "$CHECK_NAME"
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
diff --git a/.github/workflows/reusable_install_test.yml b/.github/workflows/reusable_install_test.yml
deleted file mode 100644
index d84bf5d34f7..00000000000
--- a/.github/workflows/reusable_install_test.yml
+++ /dev/null
@@ -1,63 +0,0 @@
-### For the pure soul wishes to move it to another place
-# https://github.com/orgs/community/discussions/9050
-
-name: Test installation
-'on':
-  workflow_call:
-    inputs:
-      test_name:
-        description: the value of test type from tests/ci/ci_config.py
-        required: true
-        type: string
-      checkout_depth:
-        description: the value of the git shallow checkout
-        required: false
-        type: number
-        default: 1
-      runner_type:
-        description: the label of runner to use
-        default: style-checker
-        type: string
-      additional_envs:
-        description: additional ENV variables to setup the job
-        type: string
-
-jobs:
-  InstallCheck:
-    name: ${{inputs.test_name}}
-    runs-on: [self-hosted, '${{inputs.runner_type}}']
-    steps:
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-          submodules: true
-          fetch-depth: ${{inputs.checkout_depth}}
-          filter: tree:0
-      - name: Set build envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          GITHUB_JOB_OVERRIDDEN=${{inputs.test_name}}
-          CHECK_NAME=${{inputs.test_name}}
-          ${{inputs.additional_envs}}
-          EOF
-      - name: Common setup
-        uses: ./.github/actions/common_setup
-        with:
-          job_type: test_install
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Build
-        run: |
-          cd "$REPO_COPY/tests/ci"
-          python3 install_check.py "$CHECK_NAME"
-      - name: Upload build URLs to artifacts
-        if: ${{ success() || failure() }}
-        uses: actions/upload-artifact@v3
-        with:
-          name: ${{ env.BUILD_URLS }}
-          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
-      - name: Clean
-        uses: ./.github/actions/clean
diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml
new file mode 100644
index 00000000000..19939d114f2
--- /dev/null
+++ b/.github/workflows/reusable_test.yml
@@ -0,0 +1,93 @@
+### For the pure soul wishes to move it to another place
+# https://github.com/orgs/community/discussions/9050
+
+name: Testing workflow
+'on':
+  workflow_call:
+    inputs:
+      test_name:
+        description: the value of test type from tests/ci/ci_config.py, ends up as $CHECK_NAME ENV
+        required: true
+        type: string
+      runner_type:
+        description: the label of runner to use
+        required: true
+        type: string
+      run_command:
+        description: the command to launch the check. Usually starts with `cd '$REPO_COPY/tests/ci'`
+        required: true
+        type: string
+      batches:
+        description: how many batches for the test will be launched
+        default: 1
+        type: number
+      checkout_depth:
+        description: the value of the git shallow checkout
+        required: false
+        type: number
+        default: 1
+      submodules:
+        description: if the submodules should be checked out
+        required: false
+        type: boolean
+        default: false
+      additional_envs:
+        description: additional ENV variables to setup the job
+        type: string
+
+env:
+  CHECK_NAME: ${{inputs.test_name}}
+
+jobs:
+  PrepareStrategy:
+    if: ${{inputs.batches > 0}}  # batches < 1 is misconfiguration
+    runs-on: [self-hosted, style-checker-aarch64]
+    outputs:
+      batches: ${{steps.batches.outputs.batches}}
+    steps:
+      - name: Calculate batches
+        id: batches
+        run: |
+          batches_output=$(python3 -c 'import json; print(json.dumps(list(range(${{inputs.batches}}))))')
+          echo "batches=${batches_output}" >> "$GITHUB_OUTPUT"
+  Test:
+    name: ${{inputs.test_name}}-${{matrix.batch}}
+    runs-on: [self-hosted, '${{inputs.runner_type}}']
+    needs: [PrepareStrategy]
+    strategy:
+      fail-fast: false  # we always wait for entire matrix
+      matrix:
+        batch: ${{ fromJson(needs.PrepareStrategy.outputs.batches) }}
+    steps:
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+          submodules: ${{inputs.submodules}}
+          fetch-depth: ${{inputs.checkout_depth}}
+          filter: tree:0
+      - name: Set build envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          GITHUB_JOB_OVERRIDDEN=${{inputs.test_name}}-${{matrix.batch}}
+          ${{inputs.additional_envs}}
+          EOF
+      - name: Common setup
+        uses: ./.github/actions/common_setup
+        with:
+          job_type: test
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Setup batch
+        if: ${{ inputs.batches > 1}}
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          RUN_BY_HASH_NUM=${{matrix.batch}}
+          RUN_BY_HASH_TOTAL=${{inputs.batches}}
+          EOF
+      - name: Run test
+        run: ${{inputs.run_command}}
+      - name: Clean
+        uses: ./.github/actions/clean

From b69a2608c472e7359dd580dfcdadfdcf4ae10033 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 9 Nov 2023 16:04:18 +0100
Subject: [PATCH 0420/1097] Rewrite functional test to use reusable_test.yml

---
 .github/workflows/backport_branches.yml |   84 +-
 .github/workflows/master.yml            | 1576 ++-------------
 .github/workflows/pull_request.yml      | 2368 +++--------------------
 .github/workflows/release_branches.yml  |  871 ++-------
 4 files changed, 676 insertions(+), 4223 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index e93bb83b081..02a5ee00e4c 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -343,75 +343,31 @@ jobs:
 ##############################################################################################
   FunctionalStatelessTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (asan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
 ############################ FUNCTIONAl STATEFUL TESTS #######################################
 ##############################################################################################
   FunctionalStatefulTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (debug)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (debug)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 6cd94e9aa42..77d9ac58b06 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -452,1389 +452,223 @@ jobs:
 ##############################################################################################
   FunctionalStatelessTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release)
-          REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatelessTestReleaseDatabaseOrdinary:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_release_database_ordinary
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, DatabaseOrdinary)
-          REPO_COPY=${{runner.temp}}/stateless_release_database_ordinary/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseDatabaseReplicated0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release, DatabaseOrdinary)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestReleaseDatabaseReplicated:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
-          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseDatabaseReplicated1:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release, DatabaseReplicated)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestReleaseS3:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
-          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseDatabaseReplicated2:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
-          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseDatabaseReplicated3:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
-          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseS3_0:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseS3_1:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release, s3 storage)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 2
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatelessTestReleaseAnalyzer:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_analyzer
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, analyzer)
-          REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release, analyzer)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatelessTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (aarch64)
-          REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (aarch64)
+      runner_type: func-tester-aarch64
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan1:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan2:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan3:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (asan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan1:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan2:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan3:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan4:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestUBsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (tsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 5
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (ubsan)
-          REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestUBsan1:
-    needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (ubsan)
-          REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (ubsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 2
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan1:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan2:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan3:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan4:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan5:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=5
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (msan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug1:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug2:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug3:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug4:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (debug)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 5
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
 ############################ FUNCTIONAl STATEFUL TESTS #######################################
 ##############################################################################################
   FunctionalStatefulTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (release)
-          REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (release)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (aarch64)
-          REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (aarch64)
+      runner_type: func-tester-aarch64
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (asan)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (asan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (tsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_msan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (msan)
-          REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (msan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (ubsan)
-          REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (ubsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (debug)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (debug)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
@@ -3474,39 +2308,19 @@ jobs:
       - BuilderReport
       - BuilderSpecialReport
       - MarkReleaseReady
-      - FunctionalStatelessTestDebug0
-      - FunctionalStatelessTestDebug1
-      - FunctionalStatelessTestDebug2
-      - FunctionalStatelessTestDebug3
-      - FunctionalStatelessTestDebug4
+      - FunctionalStatelessTestDebug
       - FunctionalStatelessTestRelease
       - FunctionalStatelessTestReleaseDatabaseOrdinary
-      - FunctionalStatelessTestReleaseDatabaseReplicated0
-      - FunctionalStatelessTestReleaseDatabaseReplicated1
-      - FunctionalStatelessTestReleaseDatabaseReplicated2
-      - FunctionalStatelessTestReleaseDatabaseReplicated3
+      - FunctionalStatelessTestReleaseDatabaseReplicated
+      - FunctionalStatelessTestReleaseAnalyzer
+      - FunctionalStatelessTestReleaseS3
       - FunctionalStatelessTestAarch64
-      - FunctionalStatelessTestAsan0
-      - FunctionalStatelessTestAsan1
-      - FunctionalStatelessTestAsan2
-      - FunctionalStatelessTestAsan3
-      - FunctionalStatelessTestTsan0
-      - FunctionalStatelessTestTsan1
-      - FunctionalStatelessTestTsan2
-      - FunctionalStatelessTestTsan3
-      - FunctionalStatelessTestTsan4
-      - FunctionalStatelessTestMsan0
-      - FunctionalStatelessTestMsan1
-      - FunctionalStatelessTestMsan2
-      - FunctionalStatelessTestMsan3
-      - FunctionalStatelessTestMsan4
-      - FunctionalStatelessTestMsan5
-      - FunctionalStatelessTestUBsan0
-      - FunctionalStatelessTestUBsan1
+      - FunctionalStatelessTestAsan
+      - FunctionalStatelessTestTsan
+      - FunctionalStatelessTestMsan
+      - FunctionalStatelessTestUBsan
       - FunctionalStatefulTestDebug
       - FunctionalStatefulTestRelease
-      - FunctionalStatelessTestReleaseS3_0
-      - FunctionalStatelessTestReleaseS3_1
       - FunctionalStatefulTestAarch64
       - FunctionalStatefulTestAsan
       - FunctionalStatefulTestTsan
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 711e524acc5..e8c31c830cc 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -488,2075 +488,355 @@ jobs:
 ##############################################################################################
   FunctionalStatelessTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release)
-          REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseDatabaseReplicated0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestReleaseDatabaseReplicated:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
-          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseDatabaseReplicated1:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
-          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseDatabaseReplicated2:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
-          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseDatabaseReplicated3:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
-          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release, DatabaseReplicated)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatelessTestReleaseWideParts:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_wide_parts
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, wide parts enabled)
-          REPO_COPY=${{runner.temp}}/stateless_wide_parts/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release, wide parts enabled)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatelessTestReleaseAnalyzer:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_analyzer
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, analyzer)
-          REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseS3_0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release, analyzer)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestReleaseS3:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseS3_1:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Debug0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release, s3 storage)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 2
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestS3Debug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Debug1:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Debug2:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Debug3:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Debug4:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Debug5:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=5
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Tsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (debug, s3 storage)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestS3Tsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Tsan1:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Tsan2:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Tsan3:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestS3Tsan4:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan, s3 storage)
-          REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (tsan, s3 storage)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 5
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatelessTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (aarch64)
-          REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (aarch64)
+      runner_type: func-tester-aarch64
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan1:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan2:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan3:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (asan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan1:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan2:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan3:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan4:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestUBsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (tsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 5
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestMsan:
+    needs: [BuilderDebMsan]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (msan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (ubsan)
-          REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestUBsan1:
-    needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (ubsan)
-          REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan0:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan1:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan2:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan3:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan4:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan5:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=5
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (ubsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 2
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug1:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug2:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug3:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug4:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=5
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (debug)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 5
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatelessTestFlakyCheck:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_flaky_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests flaky check (asan)
-          REPO_COPY=${{runner.temp}}/stateless_flaky_asan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests flaky check (asan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   TestsBugfixCheck:
     needs: [CheckLabels, StyleCheck]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/tests_bugfix_check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=tests bugfix validate check
-          KILL_TIMEOUT=3600
-          REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Bugfix test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: tests bugfix validate check
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
 
-          TEMP_PATH="${TEMP_PATH}/integration" \
-            REPORTS_PATH="${REPORTS_PATH}/integration" \
-            python3 integration_test_check.py "Integration $CHECK_NAME" \
-              --validate-bugfix --post-commit-status=file || echo 'ignore exit code'
+        TEMP_PATH="${TEMP_PATH}/integration" \
+          REPORTS_PATH="${REPORTS_PATH}/integration" \
+          python3 integration_test_check.py "Integration $CHECK_NAME" \
+            --validate-bugfix --post-commit-status=file || echo 'ignore exit code'
 
-          TEMP_PATH="${TEMP_PATH}/stateless" \
-            REPORTS_PATH="${REPORTS_PATH}/stateless" \
-            python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
-              --validate-bugfix --post-commit-status=file || echo 'ignore exit code'
+        TEMP_PATH="${TEMP_PATH}/stateless" \
+          REPORTS_PATH="${REPORTS_PATH}/stateless" \
+          python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
+            --validate-bugfix --post-commit-status=file || echo 'ignore exit code'
 
-          python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/functional_commit_status.tsv" "${TEMP_PATH}/integration/integration_commit_status.tsv"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+        python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/functional_commit_status.tsv" "${TEMP_PATH}/integration/integration_commit_status.tsv"
 ##############################################################################################
 ############################ FUNCTIONAl STATEFUL TESTS #######################################
 ##############################################################################################
   FunctionalStatefulTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (release)
-          REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (release)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (aarch64)
-          REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (aarch64)
+      runner_type: func-tester-aarch64
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (asan)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (asan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (tsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_msan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (msan)
-          REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (msan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (ubsan)
-          REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (ubsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (debug)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (debug)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   # Parallel replicas
   FunctionalStatefulTestDebugParallelReplicas:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (debug, ParallelReplicas)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v2
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Clear repository
-        run: |
-          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
-      - name: Check out repository code
-        uses: actions/checkout@v2
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (debug, ParallelReplicas)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestUBsanParallelReplicas:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (ubsan, ParallelReplicas)
-          REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v2
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Clear repository
-        run: |
-          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
-      - name: Check out repository code
-        uses: actions/checkout@v2
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (ubsan, ParallelReplicas)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestMsanParallelReplicas:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_msan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (msan, ParallelReplicas)
-          REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v2
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Clear repository
-        run: |
-          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
-      - name: Check out repository code
-        uses: actions/checkout@v2
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (msan, ParallelReplicas)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestTsanParallelReplicas:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (tsan, ParallelReplicas)
-          REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v2
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Clear repository
-        run: |
-          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
-      - name: Check out repository code
-        uses: actions/checkout@v2
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (tsan, ParallelReplicas)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestAsanParallelReplicas:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (asan, ParallelReplicas)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v2
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Clear repository
-        run: |
-          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
-      - name: Check out repository code
-        uses: actions/checkout@v2
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (asan, ParallelReplicas)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestReleaseParallelReplicas:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (release, ParallelReplicas)
-          REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v2
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Clear repository
-        run: |
-          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
-      - name: Check out repository code
-        uses: actions/checkout@v2
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (release, ParallelReplicas)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
@@ -4378,36 +2658,16 @@ jobs:
       - BuilderReport
       - BuilderSpecialReport
       - FastTest
-      - FunctionalStatelessTestDebug0
-      - FunctionalStatelessTestDebug1
-      - FunctionalStatelessTestDebug2
-      - FunctionalStatelessTestDebug3
-      - FunctionalStatelessTestDebug4
+      - FunctionalStatelessTestDebug
       - FunctionalStatelessTestRelease
-      - FunctionalStatelessTestReleaseDatabaseReplicated0
-      - FunctionalStatelessTestReleaseDatabaseReplicated1
-      - FunctionalStatelessTestReleaseDatabaseReplicated2
-      - FunctionalStatelessTestReleaseDatabaseReplicated3
+      - FunctionalStatelessTestReleaseDatabaseReplicated
       - FunctionalStatelessTestReleaseWideParts
       - FunctionalStatelessTestReleaseAnalyzer
       - FunctionalStatelessTestAarch64
-      - FunctionalStatelessTestAsan0
-      - FunctionalStatelessTestAsan1
-      - FunctionalStatelessTestAsan2
-      - FunctionalStatelessTestAsan3
-      - FunctionalStatelessTestTsan0
-      - FunctionalStatelessTestTsan1
-      - FunctionalStatelessTestTsan2
-      - FunctionalStatelessTestTsan3
-      - FunctionalStatelessTestTsan4
-      - FunctionalStatelessTestMsan0
-      - FunctionalStatelessTestMsan1
-      - FunctionalStatelessTestMsan2
-      - FunctionalStatelessTestMsan3
-      - FunctionalStatelessTestMsan4
-      - FunctionalStatelessTestMsan5
-      - FunctionalStatelessTestUBsan0
-      - FunctionalStatelessTestUBsan1
+      - FunctionalStatelessTestAsan
+      - FunctionalStatelessTestTsan
+      - FunctionalStatelessTestMsan
+      - FunctionalStatelessTestUBsan
       - FunctionalStatefulTestDebug
       - FunctionalStatefulTestRelease
       - FunctionalStatefulTestAarch64
@@ -4415,17 +2675,15 @@ jobs:
       - FunctionalStatefulTestTsan
       - FunctionalStatefulTestMsan
       - FunctionalStatefulTestUBsan
-      - FunctionalStatelessTestReleaseS3_0
-      - FunctionalStatelessTestReleaseS3_1
-      - FunctionalStatelessTestS3Debug0
-      - FunctionalStatelessTestS3Debug1
-      - FunctionalStatelessTestS3Debug2
-      - FunctionalStatelessTestS3Debug4
-      - FunctionalStatelessTestS3Debug5
-      - FunctionalStatelessTestS3Tsan0
-      - FunctionalStatelessTestS3Tsan1
-      - FunctionalStatelessTestS3Tsan2
-      - FunctionalStatelessTestS3Tsan4
+      - FunctionalStatelessTestReleaseS3
+      - FunctionalStatelessTestS3Debug
+      - FunctionalStatelessTestS3Tsan
+      - FunctionalStatefulTestReleaseParallelReplicas
+      - FunctionalStatefulTestAsanParallelReplicas
+      - FunctionalStatefulTestTsanParallelReplicas
+      - FunctionalStatefulTestMsanParallelReplicas
+      - FunctionalStatefulTestUBsanParallelReplicas
+      - FunctionalStatefulTestDebugParallelReplicas
       - StressTestDebug
       - StressTestAsan
       - StressTestTsan
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 5e8530bcaae..81aad9ead13 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -343,743 +343,175 @@ jobs:
 ##############################################################################################
   FunctionalStatelessTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (release)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (release)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatelessTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (aarch64)
-          REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (aarch64)
+      runner_type: func-tester-aarch64
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestAsan1:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (asan)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (asan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan1:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestTsan2:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (tsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 5
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatelessTestUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (ubsan)
-          REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (ubsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 2
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan1:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestMsan2:
-    needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (msan)
-          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (msan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  FunctionalStatelessTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug1:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestDebug2:
-    needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateless_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (debug)
-          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (debug)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 5
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
 ############################ FUNCTIONAl STATEFUL TESTS #######################################
 ##############################################################################################
   FunctionalStatefulTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (release)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (release)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (aarch64)
-          REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (aarch64)
+      runner_type: func-tester-aarch64
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (asan)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (asan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (tsan)
-          REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (tsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_msan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (msan)
-          REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (msan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (ubsan)
-          REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (ubsan)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
   FunctionalStatefulTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stateful_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (debug)
-          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT=3600
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Functional test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateful tests (debug)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=3600
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
@@ -1577,19 +1009,12 @@ jobs:
       - BuilderReport
       - BuilderSpecialReport
       - MarkReleaseReady
-      - FunctionalStatelessTestDebug0
-      - FunctionalStatelessTestDebug1
-      - FunctionalStatelessTestDebug2
+      - FunctionalStatelessTestDebug
       - FunctionalStatelessTestRelease
       - FunctionalStatelessTestAarch64
-      - FunctionalStatelessTestAsan0
-      - FunctionalStatelessTestAsan1
-      - FunctionalStatelessTestTsan0
-      - FunctionalStatelessTestTsan1
-      - FunctionalStatelessTestTsan2
-      - FunctionalStatelessTestMsan0
-      - FunctionalStatelessTestMsan1
-      - FunctionalStatelessTestMsan2
+      - FunctionalStatelessTestAsan
+      - FunctionalStatelessTestTsan
+      - FunctionalStatelessTestMsan
       - FunctionalStatelessTestUBsan
       - FunctionalStatefulTestDebug
       - FunctionalStatefulTestRelease

From 449555777400755bcbbdb7d134322ba1e7088dae Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 10 Nov 2023 12:38:52 +0100
Subject: [PATCH 0421/1097] Rewrite stress and upgrade checks

---
 .github/workflows/backport_branches.yml |  43 +--
 .github/workflows/master.yml            | 199 +++----------
 .github/workflows/pull_request.yml      | 366 +++++-------------------
 .github/workflows/release_branches.yml  | 199 +++----------
 4 files changed, 167 insertions(+), 640 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 02a5ee00e4c..d7fe66472d0 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -373,41 +373,14 @@ jobs:
 ##############################################################################################
   StressTestTsan:
     needs: [BuilderDebTsan]
-    # func testers have 16 cores + 128 GB memory
-    # while stress testers have 36 cores + 72 memory
-    # It would be better to have something like 32 + 128,
-    # but such servers almost unavailable as spot instances.
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_thread
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (tsan)
-          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (tsan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 77d9ac58b06..d963de094a5 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -674,173 +674,54 @@ jobs:
 ##############################################################################################
   StressTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_thread
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (asan)
-          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (asan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestTsan:
     needs: [BuilderDebTsan]
-    # func testers have 16 cores + 128 GB memory
-    # while stress testers have 36 cores + 72 memory
-    # It would be better to have something like 32 + 128,
-    # but such servers almost unavailable as spot instances.
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_thread
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (tsan)
-          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (tsan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (msan)
-          REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (msan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_undefined
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (ubsan)
-          REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (ubsan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (debug)
-          REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (debug)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index e8c31c830cc..a048d052178 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -842,309 +842,97 @@ jobs:
 ##############################################################################################
   StressTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (asan)
-          REPO_COPY=${{runner.temp}}/stress_asan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (asan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestTsan:
     needs: [BuilderDebTsan]
-    # func testers have 16 cores + 128 GB memory
-    # while stress testers have 36 cores + 72 memory
-    # It would be better to have something like 32 + 128,
-    # but such servers almost unavailable as spot instances.
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_thread
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (tsan)
-          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (tsan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (msan)
-          REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (msan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_undefined
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (ubsan)
-          REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (ubsan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (debug)
-          REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  ##############################################################################################
-  ######################################### UPGRADE CHECK ######################################
-  ##############################################################################################
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (debug)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
+##############################################################################################
+######################################### UPGRADE CHECK ######################################
+##############################################################################################
   UpgradeCheckAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/upgrade_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Upgrade check (asan)
-          REPO_COPY=${{runner.temp}}/upgrade_asan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Upgrade check
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 upgrade_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Upgrade check (asan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 upgrade_check.py "$CHECK_NAME"
   UpgradeCheckTsan:
     needs: [BuilderDebTsan]
-    # same as for stress test with tsan
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/upgrade_thread
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Upgrade check (tsan)
-          REPO_COPY=${{runner.temp}}/upgrade_thread/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Upgrade check
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 upgrade_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Upgrade check (tsan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 upgrade_check.py "$CHECK_NAME"
   UpgradeCheckMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/upgrade_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Upgrade check (msan)
-          REPO_COPY=${{runner.temp}}/upgrade_memory/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Upgrade check
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 upgrade_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Upgrade check (msan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 upgrade_check.py "$CHECK_NAME"
   UpgradeCheckDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/upgrade_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Upgrade check (debug)
-          REPO_COPY=${{runner.temp}}/upgrade_debug/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Upgrade check
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 upgrade_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Upgrade check (debug)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 upgrade_check.py "$CHECK_NAME"
 ##############################################################################################
 ##################################### AST FUZZERS ############################################
 ##############################################################################################
@@ -2689,6 +2477,10 @@ jobs:
       - StressTestTsan
       - StressTestMsan
       - StressTestUBsan
+      - UpgradeCheckAsan
+      - UpgradeCheckTsan
+      - UpgradeCheckMsan
+      - UpgradeCheckDebug
       - ASTFuzzerTestDebug
       - ASTFuzzerTestAsan
       - ASTFuzzerTestTsan
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 81aad9ead13..77e43656fd3 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -517,173 +517,54 @@ jobs:
 ##############################################################################################
   StressTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_thread
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (asan)
-          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (asan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestTsan:
     needs: [BuilderDebTsan]
-    # func testers have 16 cores + 128 GB memory
-    # while stress testers have 36 cores + 72 memory
-    # It would be better to have something like 32 + 128,
-    # but such servers almost unavailable as spot instances.
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_thread
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (tsan)
-          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (tsan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_memory
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (msan)
-          REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (msan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_undefined
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (ubsan)
-          REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (ubsan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
   StressTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/stress_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (debug)
-          REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Stress test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 stress_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (debug)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 stress_check.py "$CHECK_NAME"
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################

From e07de85497b1ea702fc010a50e9ed064277f13db Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 10 Nov 2023 12:50:03 +0100
Subject: [PATCH 0422/1097] Rewrite AST fuzzers and performance checks

---
 .github/workflows/master.yml       | 497 ++++------------------------
 .github/workflows/pull_request.yml | 501 ++++-------------------------
 2 files changed, 120 insertions(+), 878 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index d963de094a5..aa45970a965 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -1500,169 +1500,54 @@ jobs:
 ##############################################################################################
   ASTFuzzerTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (asan)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (asan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
   ASTFuzzerTestTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (tsan)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (tsan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
   ASTFuzzerTestUBSan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (ubsan)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (ubsan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
   ASTFuzzerTestMSan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (msan)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (msan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
   ASTFuzzerTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (debug)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (debug)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
 #############################################################################################
 #################################### UNIT TESTS #############################################
 #############################################################################################
@@ -1834,286 +1719,26 @@ jobs:
 #############################################################################################
 #################################### PERFORMANCE TESTS ######################################
 #############################################################################################
-  PerformanceComparisonX86-0:
+  PerformanceComparisonX86:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonX86-1:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonX86-2:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonX86-3:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonAarch-0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Performance Comparison
+      runner_type: stress-tester
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 performance_comparison_check.py "$CHECK_NAME"
+  PerformanceComparisonAarch:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison Aarch64
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonAarch-1:
-    needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison Aarch64
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonAarch-2:
-    needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison Aarch64
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonAarch-3:
-    needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison Aarch64
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Performance Comparison Aarch64
+      runner_type: func-tester-aarch64
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 performance_comparison_check.py "$CHECK_NAME"
 ##############################################################################################
 ###################################### SQLANCER FUZZERS ######################################
 ##############################################################################################
@@ -2234,10 +1859,8 @@ jobs:
       - IntegrationTestsTsan3
       - IntegrationTestsTsan4
       - IntegrationTestsTsan5
-      - PerformanceComparisonX86-0
-      - PerformanceComparisonX86-1
-      - PerformanceComparisonX86-2
-      - PerformanceComparisonX86-3
+      - PerformanceComparisonX86
+      - PerformanceComparisonAarch
       - CompatibilityCheckX86
       - CompatibilityCheckAarch64
       - ASTFuzzerTestDebug
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index a048d052178..563131ceec3 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -938,169 +938,54 @@ jobs:
 ##############################################################################################
   ASTFuzzerTestAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (asan)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (asan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
   ASTFuzzerTestTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (tsan)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (tsan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
   ASTFuzzerTestUBSan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (ubsan)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (ubsan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
   ASTFuzzerTestMSan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (msan)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (msan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
   ASTFuzzerTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/ast_fuzzer_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (debug)
-          REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Fuzzer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 ast_fuzzer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: AST fuzzer (debug)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 ast_fuzzer_check.py "$CHECK_NAME"
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################
@@ -2078,286 +1963,26 @@ jobs:
 #############################################################################################
 #################################### PERFORMANCE TESTS ######################################
 #############################################################################################
-  PerformanceComparisonX86-0:
+  PerformanceComparisonX86:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonX86-1:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonX86-2:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonX86-3:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonAarch-0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Performance Comparison
+      runner_type: stress-tester
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 performance_comparison_check.py "$CHECK_NAME"
+  PerformanceComparisonAarch:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison Aarch64
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonAarch-1:
-    needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison Aarch64
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonAarch-2:
-    needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison Aarch64
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  PerformanceComparisonAarch-3:
-    needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/performance_comparison
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Performance Comparison Aarch64
-          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Performance Comparison
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 performance_comparison_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Performance Comparison Aarch64
+      runner_type: func-tester-aarch64
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 performance_comparison_check.py "$CHECK_NAME"
 ##############################################################################################
 ###################################### SQLANCER FUZZERS ######################################
 ##############################################################################################
@@ -2508,14 +2133,8 @@ jobs:
       - IntegrationTestsTsan3
       - IntegrationTestsTsan4
       - IntegrationTestsTsan5
-      - PerformanceComparisonX86-0
-      - PerformanceComparisonX86-1
-      - PerformanceComparisonX86-2
-      - PerformanceComparisonX86-3
-      - PerformanceComparisonAarch-0
-      - PerformanceComparisonAarch-1
-      - PerformanceComparisonAarch-2
-      - PerformanceComparisonAarch-3
+      - PerformanceComparisonX86
+      - PerformanceComparisonAarch
       - UnitTestsAsan
       - UnitTestsTsan
       - UnitTestsMsan

From db8a548718be4ba750783b302797faca473fb80d Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 10 Nov 2023 13:25:38 +0100
Subject: [PATCH 0423/1097] Rewrite integrations checks

---
 .github/workflows/backport_branches.yml |  39 +-
 .github/workflows/master.yml            | 828 ++--------------------
 .github/workflows/pull_request.yml      | 869 ++----------------------
 .github/workflows/release_branches.yml  | 359 ++--------
 4 files changed, 136 insertions(+), 1959 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index d7fe66472d0..ddf9ae1d384 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -386,37 +386,14 @@ jobs:
 #############################################################################################
   IntegrationTestsRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (release)
+      runner_type: stress-tester
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
   FinishCheck:
     needs:
       - DockerHubPush
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index aa45970a965..3e68ab83e98 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -725,776 +725,46 @@ jobs:
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################
-  IntegrationTestsAsan0:
+  IntegrationTestsAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan1:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (asan)
+      runner_type: stress-tester
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
+  IntegrationTestsAnalyzerAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan2:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan3:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan4:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan5:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=5
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan0:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan1:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan2:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan3:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan4:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan5:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=5
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (asan, analyzer)
+      runner_type: stress-tester
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
+  IntegrationTestsTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan1:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan2:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan3:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan4:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan5:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=5
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (tsan)
+      runner_type: stress-tester
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
+  IntegrationTestsRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease1:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease2:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease3:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (release)
+      runner_type: stress-tester
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
 ##############################################################################################
 ##################################### AST FUZZERS ############################################
 ##############################################################################################
@@ -1837,28 +1107,10 @@ jobs:
       - StressTestTsan
       - StressTestMsan
       - StressTestUBsan
-      - IntegrationTestsAsan0
-      - IntegrationTestsAsan1
-      - IntegrationTestsAsan2
-      - IntegrationTestsAsan3
-      - IntegrationTestsAsan4
-      - IntegrationTestsAsan5
-      - IntegrationTestsAnalyzerAsan0
-      - IntegrationTestsAnalyzerAsan1
-      - IntegrationTestsAnalyzerAsan2
-      - IntegrationTestsAnalyzerAsan3
-      - IntegrationTestsAnalyzerAsan4
-      - IntegrationTestsAnalyzerAsan5
-      - IntegrationTestsRelease0
-      - IntegrationTestsRelease1
-      - IntegrationTestsRelease2
-      - IntegrationTestsRelease3
-      - IntegrationTestsTsan0
-      - IntegrationTestsTsan1
-      - IntegrationTestsTsan2
-      - IntegrationTestsTsan3
-      - IntegrationTestsTsan4
-      - IntegrationTestsTsan5
+      - IntegrationTestsAsan
+      - IntegrationTestsAnalyzerAsan
+      - IntegrationTestsTsan
+      - IntegrationTestsRelease
       - PerformanceComparisonX86
       - PerformanceComparisonAarch
       - CompatibilityCheckX86
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 563131ceec3..a639bf393b8 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -989,809 +989,56 @@ jobs:
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################
-  IntegrationTestsAsan0:
+  IntegrationTestsAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan1:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (asan)
+      runner_type: stress-tester
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
+  IntegrationTestsAnalyzerAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan2:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan3:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan4:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan5:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=5
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan0:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan1:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan2:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan3:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan4:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAnalyzerAsan5:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan, analyzer)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=5
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (asan, analyzer)
+      runner_type: stress-tester
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
+  IntegrationTestsTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan1:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan2:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan3:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan4:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=4
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan5:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=5
-          RUN_BY_HASH_TOTAL=6
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (tsan)
+      runner_type: stress-tester
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
+  IntegrationTestsRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease1:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease2:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease3:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (release)
+      runner_type: stress-tester
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
   IntegrationTestsFlakyCheck:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan_flaky_check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests flaky check (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests flaky check (asan)
+      runner_type: stress-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
 #############################################################################################
 #################################### UNIT TESTS #############################################
 #############################################################################################
@@ -2111,28 +1358,11 @@ jobs:
       - ASTFuzzerTestTsan
       - ASTFuzzerTestMSan
       - ASTFuzzerTestUBSan
-      - IntegrationTestsAsan0
-      - IntegrationTestsAsan1
-      - IntegrationTestsAsan2
-      - IntegrationTestsAsan3
-      - IntegrationTestsAsan4
-      - IntegrationTestsAsan5
-      - IntegrationTestsAnalyzerAsan0
-      - IntegrationTestsAnalyzerAsan1
-      - IntegrationTestsAnalyzerAsan2
-      - IntegrationTestsAnalyzerAsan3
-      - IntegrationTestsAnalyzerAsan4
-      - IntegrationTestsAnalyzerAsan5
-      - IntegrationTestsRelease0
-      - IntegrationTestsRelease1
-      - IntegrationTestsRelease2
-      - IntegrationTestsRelease3
-      - IntegrationTestsTsan0
-      - IntegrationTestsTsan1
-      - IntegrationTestsTsan2
-      - IntegrationTestsTsan3
-      - IntegrationTestsTsan4
-      - IntegrationTestsTsan5
+      - IntegrationTestsAsan
+      - IntegrationTestsAnalyzerAsan
+      - IntegrationTestsTsan
+      - IntegrationTestsRelease
+      - IntegrationTestsFlakyCheck
       - PerformanceComparisonX86
       - PerformanceComparisonAarch
       - UnitTestsAsan
@@ -2142,7 +1372,6 @@ jobs:
       - UnitTestsReleaseClang
       - CompatibilityCheckX86
       - CompatibilityCheckAarch64
-      - IntegrationTestsFlakyCheck
       - SQLancerTestRelease
       - SQLancerTestDebug
     runs-on: [self-hosted, style-checker]
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 77e43656fd3..a344f23b2c0 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -568,321 +568,46 @@ jobs:
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################
-  IntegrationTestsAsan0:
+  IntegrationTestsAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan1:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (asan)
+      runner_type: stress-tester
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
+  IntegrationTestsAnalyzerAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsAsan2:
-    needs: [BuilderDebAsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (asan)
-          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (asan, analyzer)
+      runner_type: stress-tester
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
+  IntegrationTestsTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan1:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan2:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsTsan3:
-    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (tsan)
-          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
-          RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease0:
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (tsan)
+      runner_type: stress-tester
+      batches: 6
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
+  IntegrationTestsRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-  IntegrationTestsRelease1:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, stress-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/integration_tests_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (release)
-          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
-          RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=2
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Integration test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 integration_test_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (release)
+      runner_type: stress-tester
+      batches: 4
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 integration_test_check.py "$CHECK_NAME"
   FinishCheck:
     needs:
       - DockerHubPush
@@ -909,15 +634,9 @@ jobs:
       - StressTestTsan
       - StressTestMsan
       - StressTestUBsan
-      - IntegrationTestsAsan0
-      - IntegrationTestsAsan1
-      - IntegrationTestsAsan2
-      - IntegrationTestsRelease0
-      - IntegrationTestsRelease1
-      - IntegrationTestsTsan0
-      - IntegrationTestsTsan1
-      - IntegrationTestsTsan2
-      - IntegrationTestsTsan3
+      - IntegrationTestsAsan
+      - IntegrationTestsTsan
+      - IntegrationTestsRelease
       - CompatibilityCheckX86
       - CompatibilityCheckAarch64
     runs-on: [self-hosted, style-checker]

From 38b251946e5b8bd819369842442277ae6e6ff67b Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 10 Nov 2023 14:29:24 +0100
Subject: [PATCH 0424/1097] Add another check to CiConfig.validate, and test
 for it

---
 tests/ci/ci_config.py      | 18 ++++++++++++------
 tests/ci/test_ci_config.py | 15 +++++++++++++++
 2 files changed, 27 insertions(+), 6 deletions(-)
 create mode 100644 tests/ci/test_ci_config.py

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index dc22babb907..6ba3e0992d4 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -50,21 +50,27 @@ class CiConfig:
 
     def validate(self) -> None:
         errors = []
-        # All build configs must belong to build_report_config
-        for build_name in self.build_config.keys():
+        for name, build_config in self.build_config.items():
             build_in_reports = False
             for report_config in self.builds_report_config.values():
-                if build_name in report_config:
+                if name in report_config:
                     build_in_reports = True
                     break
+            # All build configs must belong to build_report_config
             if not build_in_reports:
+                logging.error("Build name %s does not belong to build reports", name)
+                errors.append(f"Build name {name} does not belong to build reports")
+            # The name should be the same as build_config.name
+            if not build_config.name == name:
                 logging.error(
-                    "Build name %s does not belong to build reports", build_name
+                    "Build name '%s' does not match the config 'name' value '%s'",
+                    name,
+                    build_config.name,
                 )
                 errors.append(
-                    f"Build name {build_name} does not belong to build reports"
+                    f"Build name {name} does not match 'name' value '{build_config.name}'"
                 )
-        # And otherwise
+        # All build_report_config values should be in build_config.keys()
         for build_report_name, build_names in self.builds_report_config.items():
             missed_names = [
                 name for name in build_names if name not in self.build_config.keys()
diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py
new file mode 100644
index 00000000000..d22ed16748e
--- /dev/null
+++ b/tests/ci/test_ci_config.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import unittest
+
+
+class TestCiConfig(unittest.TestCase):
+    def test_no_errors_in_ci_config(self):
+        raised = None
+        try:
+            from ci_config import (  # pylint: disable=import-outside-toplevel
+                CI_CONFIG as _,
+            )
+        except Exception as exc:
+            raised = exc
+        self.assertIsNone(raised, f"CI_CONFIG import raised error {raised}")

From a831a648cdde2db874873714929b75fe9f9f4865 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 10 Nov 2023 17:47:40 +0100
Subject: [PATCH 0425/1097] Move GITHUB_JOB_OVERRIDDEN to job ENV context

---
 .github/workflows/reusable_build.yml | 3 ++-
 .github/workflows/reusable_test.yml  | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml
index f6586016874..7b88c2b9925 100644
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@@ -25,6 +25,8 @@ name: Build ClickHouse
 jobs:
   Build:
     name: Build-${{inputs.build_name}}
+    env:
+      GITHUB_JOB_OVERRIDDEN: Build-${{inputs.build_name}}
     runs-on: [self-hosted, '${{inputs.runner_type}}']
     steps:
       - name: Check out repository code
@@ -37,7 +39,6 @@ jobs:
       - name: Set build envs
         run: |
           cat >> "$GITHUB_ENV" << 'EOF'
-          GITHUB_JOB_OVERRIDDEN=Build-${{inputs.build_name}}
           ${{inputs.additional_envs}}
           EOF
           python3 "$GITHUB_WORKSPACE"/tests/ci/ci_config.py --build-name "${{inputs.build_name}}" >> "$GITHUB_ENV"
diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml
index 19939d114f2..4e65733542a 100644
--- a/.github/workflows/reusable_test.yml
+++ b/.github/workflows/reusable_test.yml
@@ -51,7 +51,10 @@ jobs:
           batches_output=$(python3 -c 'import json; print(json.dumps(list(range(${{inputs.batches}}))))')
           echo "batches=${batches_output}" >> "$GITHUB_OUTPUT"
   Test:
-    name: ${{inputs.test_name}}-${{matrix.batch}}
+    # Do not add `-0` to the end, if there's only one batch
+    name: ${{inputs.test_name}}${{ inputs.batches > 1 && format('-{0}',matrix.batch) || '' }}
+    env:
+      GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}${{ inputs.batches > 1 && format('-{0}',matrix.batch) || '' }}
     runs-on: [self-hosted, '${{inputs.runner_type}}']
     needs: [PrepareStrategy]
     strategy:
@@ -69,7 +72,6 @@ jobs:
       - name: Set build envs
         run: |
           cat >> "$GITHUB_ENV" << 'EOF'
-          GITHUB_JOB_OVERRIDDEN=${{inputs.test_name}}-${{matrix.batch}}
           ${{inputs.additional_envs}}
           EOF
       - name: Common setup

From 88e04579fdc4b79707e0a974e7d77dc50b9bbd2d Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 10 Nov 2023 19:45:57 +0100
Subject: [PATCH 0426/1097] Add a simple validator for reusable workflows

---
 utils/check-style/check-workflows             |  8 ++-
 utils/check-style/check_reusable_workflows.py | 53 +++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 utils/check-style/check_reusable_workflows.py

diff --git a/utils/check-style/check-workflows b/utils/check-style/check-workflows
index df2292d84ca..fb41d5af461 100755
--- a/utils/check-style/check-workflows
+++ b/utils/check-style/check-workflows
@@ -2,8 +2,14 @@
 
 set -e
 
+WORKING_DIR=$(dirname "$0")
+cd "$WORKING_DIR"
+
 GIT_ROOT=$(git rev-parse --show-cdup)
-GIT_ROOT=${GIT_ROOT:-.}
+GIT_ROOT=${GIT_ROOT:-../../}
 act --list --directory="$GIT_ROOT" 1>/dev/null 2>&1 || act --list --directory="$GIT_ROOT" 2>&1
 
 actionlint -ignore 'reusable workflow call.+' || :
+
+
+python3 check_reusable_workflows.py
diff --git a/utils/check-style/check_reusable_workflows.py b/utils/check-style/check_reusable_workflows.py
new file mode 100644
index 00000000000..6fe22786650
--- /dev/null
+++ b/utils/check-style/check_reusable_workflows.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+
+from pathlib import Path
+from typing import Dict, Iterable, List
+import yaml
+
+git_root = Path(__file__).absolute().parents[2]
+
+
+def check_workflows(paths: Iterable[Path]) -> List[str]:
+    outputs = []  # type: List[str]
+    for path in paths:
+        workflow_object = yaml.safe_load(path.read_bytes())
+        workflow_object["file---name"] = path.name
+        outputs.extend(check_name_override(workflow_object))
+
+    return outputs
+
+
+def check_name_override(workflow_object: dict) -> List[str]:
+    outputs = []  # type: List[str]
+    workflow_file = workflow_object.get("file---name", "")  # type: str
+    jobs = workflow_object.get("jobs", {})  # type: Dict[str, dict]
+    for name, obj in jobs.items():
+        header = f"Workflow '{workflow_file}': Job '{name}': "
+        name_overriden = obj.get("name", "")
+        env_name_overriden = obj.get("env", {}).get("GITHUB_JOB_OVERRIDDEN", "")
+        if name_overriden or env_name_overriden:
+            if not (name_overriden and env_name_overriden):
+                outputs.append(
+                    f"{header}job has one of 'name' and 'env.GITHUB_JOB_OVERRIDDEN', "
+                    "but not both"
+                )
+            elif name_overriden != env_name_overriden:
+                outputs.append(
+                    f"{header}value of 'name' and 'env.GITHUB_JOB_OVERRIDDEN' are not "
+                    f"equal. name={name_overriden}; "
+                    f"env.GITHUB_JOB_OVERRIDDEN={env_name_overriden}"
+                )
+    return outputs
+
+
+def main() -> None:
+    reusable_workflow_paths = git_root.glob(".github/workflows/reusable_*.y*ml")
+    outputs = check_workflows(reusable_workflow_paths)
+    if outputs:
+        print("Found next issues for workflows:")
+        for o in outputs:
+            print(o)
+
+
+if __name__ == "__main__":
+    main()

From 6f8baf3def2e8836db88c33f51f5fc2835c0cfde Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 10 Nov 2023 21:16:54 +0100
Subject: [PATCH 0427/1097] Rewrite style checks to reusable workflow

---
 .github/workflows/docs_check.yml    | 45 +++++++++--------------------
 .github/workflows/master.yml        | 36 +++++------------------
 .github/workflows/pull_request.yml  | 42 ++++++++-------------------
 .github/workflows/reusable_test.yml |  5 ++++
 tests/ci/env_helper.py              |  2 +-
 tests/ci/style_check.py             |  8 +++--
 6 files changed, 45 insertions(+), 93 deletions(-)

diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml
index dada9999a68..c311679c8a3 100644
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@@ -96,38 +96,21 @@ jobs:
           path: ${{ runner.temp }}/changed_images.json
   StyleCheck:
     needs: DockerHubPush
-    runs-on: [self-hosted, style-checker]
-    if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{ runner.temp }}/style_check
-          ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
-          ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
-          RCSK
-          EOF
-      - name: Download changed images
-        # even if artifact does not exist, e.g. on `do not test` label or failed Docker job
-        continue-on-error: true
-        uses: actions/download-artifact@v3
-        with:
-          name: changed_images
-          path: ${{ env.TEMP_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Style Check
-        run: |
-          cd "$GITHUB_WORKSPACE/tests/ci"
+    # We need additional `&& ! cancelled()` to have the job being able to cancel
+    if: ${{ success() || failure() || ( always() && ! cancelled() ) }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Style check
+      runner_type: style-checker
+      batches: 1
+      run_command: |
+          cd "$REPO_COPY/tests/ci"
           python3 style_check.py
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    secrets:
+      secret_envs: |
+        ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
+        ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
+        RCSK
   DocsCheck:
     needs: DockerHubPush
     runs-on: [self-hosted, func-tester-aarch64]
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 3e68ab83e98..df56084e4e0 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -89,35 +89,15 @@ jobs:
           path: ${{ runner.temp }}/changed_images.json
   StyleCheck:
     needs: DockerHubPush
-    runs-on: [self-hosted, style-checker]
     if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{ runner.temp }}/style_check
-          EOF
-      - name: Download changed images
-        # even if artifact does not exist, e.g. on `do not test` label or failed Docker job
-        continue-on-error: true
-        uses: actions/download-artifact@v3
-        with:
-          name: changed_images
-          path: ${{ env.TEMP_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Style Check
-        run: |
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 style_check.py --no-push
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Style check
+      runner_type: style-checker
+      batches: 1
+      run_command: |
+          cd "$REPO_COPY/tests/ci"
+          python3 style_check.py
   CompatibilityCheckX86:
     needs: [BuilderDebRelease]
     runs-on: [self-hosted, style-checker]
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index a639bf393b8..8b7f7d94eed 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -118,39 +118,21 @@ jobs:
           path: ${{ runner.temp }}/changed_images.json
   StyleCheck:
     needs: DockerHubPush
-    runs-on: [self-hosted, style-checker]
     # We need additional `&& ! cancelled()` to have the job being able to cancel
     if: ${{ success() || failure() || ( always() && ! cancelled() ) }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{ runner.temp }}/style_check
-          ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
-          ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
-          RCSK
-          EOF
-      - name: Download changed images
-        # even if artifact does not exist, e.g. on `do not test` label or failed Docker job
-        continue-on-error: true
-        uses: actions/download-artifact@v3
-        with:
-          name: changed_images
-          path: ${{ env.TEMP_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Style Check
-        run: |
-          cd "$GITHUB_WORKSPACE/tests/ci"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Style check
+      runner_type: style-checker
+      batches: 1
+      run_command: |
+          cd "$REPO_COPY/tests/ci"
           python3 style_check.py
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    secrets:
+      secret_envs: |
+        ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
+        ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
+        RCSK
   FastTest:
     needs: DockerHubPush
     runs-on: [self-hosted, builder]
diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml
index 4e65733542a..04b737d2131 100644
--- a/.github/workflows/reusable_test.yml
+++ b/.github/workflows/reusable_test.yml
@@ -34,6 +34,10 @@ name: Testing workflow
       additional_envs:
         description: additional ENV variables to setup the job
         type: string
+    secrets:
+      secret_envs:
+        description: if given, it's passed to the environments
+        required: false
 
 env:
   CHECK_NAME: ${{inputs.test_name}}
@@ -73,6 +77,7 @@ jobs:
         run: |
           cat >> "$GITHUB_ENV" << 'EOF'
           ${{inputs.additional_envs}}
+          ${{secrets.secret_envs}}
           EOF
       - name: Common setup
         uses: ./.github/actions/common_setup
diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py
index 2d867e62228..6364ea0ff7c 100644
--- a/tests/ci/env_helper.py
+++ b/tests/ci/env_helper.py
@@ -24,7 +24,7 @@ GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", git_root)
 GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
 IMAGES_PATH = os.getenv("IMAGES_PATH", TEMP_PATH)
 REPORTS_PATH = os.getenv("REPORTS_PATH", p.abspath(p.join(module_dir, "./reports")))
-REPO_COPY = os.getenv("REPO_COPY", git_root)
+REPO_COPY = os.getenv("REPO_COPY", GITHUB_WORKSPACE)
 RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp")))
 S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")
 S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports")
diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py
index 83dc54a57b8..a006e01ff6b 100644
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@@ -21,7 +21,7 @@ from commit_status_helper import (
     update_mergeable_check,
 )
 from docker_pull_helper import get_image_with_version
-from env_helper import GITHUB_WORKSPACE, TEMP_PATH
+from env_helper import REPO_COPY, REPORTS_PATH, TEMP_PATH
 from get_robot_token import get_best_robot_token
 from github_helper import GitHub
 from git_helper import git_runner
@@ -139,9 +139,11 @@ def main():
 
     stopwatch = Stopwatch()
 
-    repo_path = Path(GITHUB_WORKSPACE)
+    repo_path = Path(REPO_COPY)
     temp_path = Path(TEMP_PATH)
     temp_path.mkdir(parents=True, exist_ok=True)
+    reports_path = Path(REPORTS_PATH)
+    reports_path.mkdir(parents=True, exist_ok=True)
 
     pr_info = PRInfo()
     if args.push:
@@ -161,7 +163,7 @@ def main():
         code = int(state != "success")
         sys.exit(code)
 
-    docker_image = get_image_with_version(temp_path, "clickhouse/style-test")
+    docker_image = get_image_with_version(reports_path, "clickhouse/style-test")
     s3_helper = S3Helper()
 
     cmd = (

From b6a71ae6f4bcf001e09947ece06f485b4d39a0eb Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 11 Nov 2023 00:16:22 +0100
Subject: [PATCH 0428/1097] Rewrite fast tests to reusable workflow

---
 .github/workflows/pull_request.yml | 36 +++++++-----------------------
 tests/ci/fast_test_check.py        |  7 +++---
 2 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 8b7f7d94eed..ed64a900ab7 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -135,34 +135,14 @@ jobs:
         RCSK
   FastTest:
     needs: DockerHubPush
-    runs-on: [self-hosted, builder]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/fasttest
-          REPO_COPY=${{runner.temp}}/fasttest/ClickHouse
-          CACHES_PATH=${{runner.temp}}/../ccaches
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Download changed images
-        uses: actions/download-artifact@v3
-        with:
-          name: changed_images
-          path: ${{ env.TEMP_PATH }}
-      - name: Fast Test
-        run: |
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 fast_test_check.py
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Fast tests
+      runner_type: builder
+      batches: 1
+      run_command: |
+          cd "$REPO_COPY/tests/ci"
+          python3 fast_test_check.py
   CompatibilityCheckX86:
     needs: [BuilderDebRelease]
     runs-on: [self-hosted, style-checker]
diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index 3e7f9debb4c..ee046d6dfef 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -24,7 +24,7 @@ from commit_status_helper import (
     format_description,
 )
 from docker_pull_helper import get_image_with_version, DockerImage
-from env_helper import S3_BUILDS_BUCKET, TEMP_PATH, REPO_COPY
+from env_helper import S3_BUILDS_BUCKET, TEMP_PATH, REPO_COPY, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
 from report import TestResult, TestResults, read_test_results
@@ -117,8 +117,9 @@ def main():
     args = parse_args()
 
     temp_path = Path(TEMP_PATH)
-
     temp_path.mkdir(parents=True, exist_ok=True)
+    reports_path = Path(REPORTS_PATH)
+    reports_path.mkdir(parents=True, exist_ok=True)
 
     pr_info = PRInfo()
 
@@ -135,7 +136,7 @@ def main():
             sys.exit(1)
         sys.exit(0)
 
-    docker_image = get_image_with_version(temp_path, "clickhouse/fasttest")
+    docker_image = get_image_with_version(reports_path, "clickhouse/fasttest")
 
     s3_helper = S3Helper()
 

From 6e00e14a45e485ae75d519b908f411e4faec1622 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 11 Nov 2023 00:24:14 +0100
Subject: [PATCH 0429/1097] Rewrite libfuzzer tests to reusable workflow

Update fuzzers clang version
---
 .github/workflows/libfuzzer.yml | 94 +++++----------------------------
 tests/ci/ci_config.py           |  2 +-
 2 files changed, 14 insertions(+), 82 deletions(-)

diff --git a/.github/workflows/libfuzzer.yml b/.github/workflows/libfuzzer.yml
index e8a0396684a..aabf6275c05 100644
--- a/.github/workflows/libfuzzer.yml
+++ b/.github/workflows/libfuzzer.yml
@@ -10,86 +10,18 @@ on: # yamllint disable-line rule:truthy
   workflow_call:
 jobs:
   BuilderFuzzers:
-    runs-on: [self-hosted, builder]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/build_check
-          IMAGES_PATH=${{runner.temp}}/images_path
-          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
-          CACHES_PATH=${{runner.temp}}/../ccaches
-          BUILD_NAME=fuzzers
-          EOF
-      - name: Download changed images
-        # even if artifact does not exist, e.g. on `do not test` label or failed Docker job
-        continue-on-error: true
-        uses: actions/download-artifact@v3
-        with:
-          name: changed_images
-          path: ${{ env.IMAGES_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-          submodules: true
-          ref: ${{github.ref}}
-      - name: Build
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
-      - name: Upload build URLs to artifacts
-        if: ${{ success() || failure() }}
-        uses: actions/upload-artifact@v3
-        with:
-          name: ${{ env.BUILD_URLS }}
-          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
+    uses: ./.github/workflows/reusable_build.yml
+    with:
+      build_name: fuzzers
   libFuzzerTest:
     needs: [BuilderFuzzers]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/libfuzzer
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=libFuzzer tests
-          REPO_COPY=${{runner.temp}}/libfuzzer/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download changed images
-        # even if artifact does not exist, e.g. on `do not test` label or failed Docker job
-        continue-on-error: true
-        uses: actions/download-artifact@v3
-        with:
-          name: changed_images
-          path: ${{ env.TEMP_PATH }}
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: libFuzzer test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 libfuzzer_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: libFuzzer tests
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 libfuzzer_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 6ba3e0992d4..d1d2021753e 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -222,7 +222,7 @@ CI_CONFIG = CiConfig(
         ),
         "fuzzers": BuildConfig(
             name="fuzzers",
-            compiler="clang-16",
+            compiler="clang-17",
             package_type="fuzzers",
         ),
     },

From 15d27d5e859d66fce8d77cfa3ba1d4d4636511aa Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 11 Nov 2023 00:29:57 +0100
Subject: [PATCH 0430/1097] Rewrite docs test to reusable workflow

---
 .github/workflows/docs_check.yml | 37 ++++++++------------------------
 tests/ci/docs_check.py           |  6 ++++--
 2 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml
index c311679c8a3..d7699f0419d 100644
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@@ -113,34 +113,15 @@ jobs:
         RCSK
   DocsCheck:
     needs: DockerHubPush
-    runs-on: [self-hosted, func-tester-aarch64]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/docs_check
-          REPO_COPY=${{runner.temp}}/docs_check/ClickHouse
-          EOF
-      - name: Download changed images
-        uses: actions/download-artifact@v3
-        with:
-          name: changed_images
-          path: ${{ env.TEMP_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Docs Check
-        run: |
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 docs_check.py
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Docs check
+      runner_type: func-tester-aarch64
+      additional_envs: |
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 docs_check.py
   FinishCheck:
     needs:
       - StyleCheck
diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py
index f7339d59a5e..650ed93aa71 100644
--- a/tests/ci/docs_check.py
+++ b/tests/ci/docs_check.py
@@ -17,7 +17,7 @@ from commit_status_helper import (
     update_mergeable_check,
 )
 from docker_pull_helper import get_image_with_version
-from env_helper import TEMP_PATH, REPO_COPY
+from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResults, TestResult
@@ -57,6 +57,8 @@ def main():
 
     temp_path = Path(TEMP_PATH)
     temp_path.mkdir(parents=True, exist_ok=True)
+    reports_path = Path(REPORTS_PATH)
+    reports_path.mkdir(parents=True, exist_ok=True)
     repo_path = Path(REPO_COPY)
 
     pr_info = PRInfo(need_changed_files=True)
@@ -82,7 +84,7 @@ def main():
     elif args.force:
         logging.info("Check the docs because of force flag")
 
-    docker_image = get_image_with_version(temp_path, "clickhouse/docs-builder")
+    docker_image = get_image_with_version(reports_path, "clickhouse/docs-builder")
 
     test_output = temp_path / "docs_check_log"
     test_output.mkdir(parents=True, exist_ok=True)

From 9937d0d5b8fd20747b627f2ff13e65f6a9712338 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 11 Nov 2023 00:58:04 +0100
Subject: [PATCH 0431/1097] Rewrite jepsen to reusable workflow

---
 .github/workflows/jepsen.yml | 71 ++++++++----------------------------
 1 file changed, 16 insertions(+), 55 deletions(-)

diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml
index 7f1fd16aa89..5ec038231ec 100644
--- a/.github/workflows/jepsen.yml
+++ b/.github/workflows/jepsen.yml
@@ -11,60 +11,21 @@ on: # yamllint disable-line rule:truthy
   workflow_call:
 jobs:
   KeeperJepsenRelease:
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/keeper_jepsen
-          REPO_COPY=${{runner.temp}}/keeper_jepsen/ClickHouse
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-          fetch-depth: 0
-          filter: tree:0
-      - name: Jepsen Test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 jepsen_check.py keeper
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Jepsen keeper check
+      runner_type: style-checker
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 jepsen_check.py keeper
   # ServerJepsenRelease:
   #   runs-on: [self-hosted, style-checker]
-  #   if: ${{ always() }}
-  #   needs: [KeeperJepsenRelease]
-  #   steps:
-  #     - name: Set envs
-  #       run: |
-  #         cat >> "$GITHUB_ENV" << 'EOF'
-  #         TEMP_PATH=${{runner.temp}}/server_jepsen
-  #         REPO_COPY=${{runner.temp}}/server_jepsen/ClickHouse
-  #         EOF
-  #     - name: Check out repository code
-  #       uses: ClickHouse/checkout@v1
-  #       with:
-  #         clear-repository: true
-  #         fetch-depth: 0
-  #         filter: tree:0
-  #     - name: Jepsen Test
-  #       run: |
-  #         sudo rm -fr "$TEMP_PATH"
-  #         mkdir -p "$TEMP_PATH"
-  #         cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-  #         cd "$REPO_COPY/tests/ci"
-  #         python3 jepsen_check.py server
-  #     - name: Cleanup
-  #       if: always()
-  #       run: |
-  #         docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-  #         docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-  #         sudo rm -fr "$TEMP_PATH"
+  #   uses: ./.github/workflows/reusable_test.yml
+  #   with:
+  #     test_name: Jepsen server check
+  #     runner_type: style-checker
+  #     batches: 1
+  #     run_command: |
+  #       cd "$REPO_COPY/tests/ci"
+  #       python3 jepsen_check.py server

From b90a27af8605311432309a23abc2ea5ff8ba2faf Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 11 Nov 2023 01:02:18 +0100
Subject: [PATCH 0432/1097] Rewrite compatibility checks to reusable workflow

---
 .github/workflows/backport_branches.yml | 72 +++++--------------------
 .github/workflows/master.yml            | 72 +++++--------------------
 .github/workflows/pull_request.yml      | 72 +++++--------------------
 .github/workflows/release_branches.yml  | 72 +++++--------------------
 4 files changed, 56 insertions(+), 232 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index ddf9ae1d384..1f0e424bf5c 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -105,66 +105,22 @@ jobs:
           path: ${{ runner.temp }}/changed_images.json
   CompatibilityCheckX86:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/compatibility_check
-          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheckX86
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Compatibility check X86
+      runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
   CompatibilityCheckAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/compatibility_check
-          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheckAarch64
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Compatibility check X86
+      runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
 #########################################################################################
 #################################### ORDINARY BUILDS ####################################
 #########################################################################################
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index df56084e4e0..0d232dc8f06 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -100,66 +100,22 @@ jobs:
           python3 style_check.py
   CompatibilityCheckX86:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/compatibility_check
-          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheckX86
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Compatibility check X86
+      runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
   CompatibilityCheckAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/compatibility_check
-          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheckAarch64
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Compatibility check X86
+      runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
 #########################################################################################
 #################################### ORDINARY BUILDS ####################################
 #########################################################################################
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index ed64a900ab7..aa104fd3874 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -145,66 +145,22 @@ jobs:
           python3 fast_test_check.py
   CompatibilityCheckX86:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/compatibility_check
-          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheckX86
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Compatibility check X86
+      runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
   CompatibilityCheckAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/compatibility_check
-          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheckAarch64
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Compatibility check X86
+      runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
 #########################################################################################
 #################################### ORDINARY BUILDS ####################################
 #########################################################################################
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index a344f23b2c0..e78740a6564 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -76,66 +76,22 @@ jobs:
           path: ${{ runner.temp }}/changed_images.json
   CompatibilityCheckX86:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/compatibility_check
-          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheckX86
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Compatibility check X86
+      runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
   CompatibilityCheckAarch64:
     needs: [BuilderDebAarch64]
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/compatibility_check
-          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheckAarch64
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Compatibility check X86
+      runner_type: style-checker
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
 #########################################################################################
 #################################### ORDINARY BUILDS ####################################
 #########################################################################################

From 7d3440add56a7b74a78437562dda9ec5b1a54190 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 11 Nov 2023 01:06:13 +0100
Subject: [PATCH 0433/1097] Delete codebrowser leftovers

---
 .github/workflows/nightly.yml |   3 -
 .github/workflows/woboq.yml   |  44 ----------
 tests/ci/codebrowser_check.py | 150 ----------------------------------
 3 files changed, 197 deletions(-)
 delete mode 100644 .github/workflows/woboq.yml
 delete mode 100644 tests/ci/codebrowser_check.py

diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 6452b83fdd6..1e94f70b9e6 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -74,9 +74,6 @@ jobs:
         with:
           name: changed_images
           path: ${{ runner.temp }}/changed_images.json
-  Codebrowser:
-    needs: [DockerHubPush]
-    uses: ./.github/workflows/woboq.yml
   SonarCloud:
     runs-on: [self-hosted, builder]
     env:
diff --git a/.github/workflows/woboq.yml b/.github/workflows/woboq.yml
deleted file mode 100644
index 1ef729af30a..00000000000
--- a/.github/workflows/woboq.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-name: WoboqBuilder
-env:
-  # Force the stdout and stderr streams to be unbuffered
-  PYTHONUNBUFFERED: 1
-
-concurrency:
-  group: woboq
-on: # yamllint disable-line rule:truthy
-  workflow_dispatch:
-  workflow_call:
-jobs:
-  # don't use dockerhub push because this image updates so rarely
-  WoboqCodebrowser:
-    runs-on: [self-hosted, style-checker]
-    timeout-minutes: 420  # the task is pretty heavy, so there's an additional hour
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/codebrowser
-          REPO_COPY=${{runner.temp}}/codebrowser/ClickHouse
-          IMAGES_PATH=${{runner.temp}}/images_path
-          EOF
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-          submodules: 'true'
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.IMAGES_PATH }}
-      - name: Codebrowser
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 codebrowser_check.py
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py
deleted file mode 100644
index a3414156bba..00000000000
--- a/tests/ci/codebrowser_check.py
+++ /dev/null
@@ -1,150 +0,0 @@
-#!/usr/bin/env python3
-
-
-import logging
-import os
-from pathlib import Path
-
-from github import Github
-
-from commit_status_helper import get_commit, post_commit_status
-from docker_pull_helper import get_image_with_version, DockerImage
-from env_helper import (
-    IMAGES_PATH,
-    REPO_COPY,
-    S3_DOWNLOAD,
-    S3_BUILDS_BUCKET,
-    S3_TEST_REPORTS_BUCKET,
-    TEMP_PATH,
-)
-from get_robot_token import get_best_robot_token
-from pr_info import PRInfo
-from report import TestResult
-from s3_helper import S3Helper
-from stopwatch import Stopwatch
-from tee_popen import TeePopen
-from upload_result_helper import upload_results
-
-NAME = "Woboq Build"
-
-
-def get_run_command(
-    repo_path: Path, output_path: Path, image: DockerImage, sha: str
-) -> str:
-    user = f"{os.geteuid()}:{os.getegid()}"
-    cmd = (
-        f"docker run --rm --user={user} --volume={repo_path}:/build "
-        f"--volume={output_path}:/workdir/output --network=host "
-        # use sccache, https://github.com/KDAB/codebrowser/issues/111
-        f"-e SCCACHE_BUCKET='{S3_BUILDS_BUCKET}' "
-        "-e SCCACHE_S3_KEY_PREFIX=ccache/sccache "
-        '-e CMAKE_FLAGS="$CMAKE_FLAGS -DCOMPILER_CACHE=sccache" '
-        f"-e 'DATA={S3_DOWNLOAD}/{S3_TEST_REPORTS_BUCKET}/codebrowser/data' "
-        f"-e SHA={sha} {image}"
-    )
-    return cmd
-
-
-def main():
-    logging.basicConfig(level=logging.INFO)
-
-    stopwatch = Stopwatch()
-
-    gh = Github(get_best_robot_token(), per_page=100)
-    pr_info = PRInfo()
-    commit = get_commit(gh, pr_info.sha)
-    temp_path = Path(TEMP_PATH)
-
-    if not temp_path.exists():
-        os.makedirs(temp_path)
-
-    docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser")
-    # FIXME: the codebrowser is broken with clang-16, workaround with clang-15
-    # See https://github.com/ClickHouse/ClickHouse/issues/50077
-    docker_image.version = "49701-4dcdcf4c11b5604f1c5d3121c9c6fea3e957b605"
-    s3_helper = S3Helper()
-
-    result_path = temp_path / "result_path"
-    if not result_path.exists():
-        os.makedirs(result_path)
-
-    run_command = get_run_command(
-        Path(REPO_COPY), result_path, docker_image, pr_info.sha[:12]
-    )
-
-    logging.info("Going to run codebrowser: %s", run_command)
-
-    run_log_path = result_path / "run.log"
-
-    state = "success"
-    with TeePopen(run_command, run_log_path) as process:
-        retcode = process.wait()
-        if retcode == 0:
-            logging.info("Run successfully")
-        else:
-            logging.info("Run failed")
-            state = "failure"
-
-    report_path = result_path / "html_report"
-    logging.info("Report path %s", report_path)
-
-    s3_path_prefix = "codebrowser"
-    index_template = (
-        f'<a href="{S3_DOWNLOAD}/{S3_TEST_REPORTS_BUCKET}/{s3_path_prefix}/index.html">'
-        "{}</a>"
-    )
-    additional_logs = [path.absolute() for path in result_path.glob("*.log")]
-    test_results = [
-        TestResult(
-            index_template.format("Generate codebrowser site"),
-            state,
-            stopwatch.duration_seconds,
-            additional_logs,
-        )
-    ]
-
-    if state == "success":
-        stopwatch.reset()
-        _ = s3_helper.fast_parallel_upload_dir(
-            report_path, s3_path_prefix, S3_TEST_REPORTS_BUCKET
-        )
-        test_results.append(
-            TestResult(
-                index_template.format("Upload codebrowser site"),
-                state,
-                stopwatch.duration_seconds,
-            )
-        )
-
-    # Check if the run log contains `FATAL Error:`, that means the code problem
-    stopwatch.reset()
-    fatal_error = "FATAL Error:"
-    logging.info("Search for '%s' in %s", fatal_error, run_log_path)
-    with open(run_log_path, "r", encoding="utf-8") as rlfd:
-        for line in rlfd.readlines():
-            if "FATAL Error:" in line:
-                logging.warning(
-                    "The line '%s' found, mark the run as failure", fatal_error
-                )
-                state = "failure"
-                test_results.append(
-                    TestResult(
-                        "Indexing error",
-                        state,
-                        stopwatch.duration_seconds,
-                        additional_logs,
-                    )
-                )
-                break
-
-    report_url = upload_results(
-        s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME
-    )
-
-    print(f"::notice ::Report url: {report_url}")
-
-    post_commit_status(commit, state, report_url, "Report built", NAME, pr_info)
-
-
-if __name__ == "__main__":
-    main()

From cdf417defec42314b0078cbae7bd7b159e8a887b Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 11 Nov 2023 01:11:26 +0100
Subject: [PATCH 0434/1097] Rewrite unit tests to reusable workflow

---
 .github/workflows/master.yml       | 195 ++++++-----------------------
 .github/workflows/pull_request.yml | 195 ++++++-----------------------
 2 files changed, 80 insertions(+), 310 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 0d232dc8f06..22d4371b0f9 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -759,169 +759,54 @@ jobs:
 #############################################################################################
   UnitTestsAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (asan)
-          REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (asan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
   UnitTestsReleaseClang:
     needs: [BuilderBinRelease]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (release)
-          REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (release)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
   UnitTestsTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (tsan)
-          REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (tsan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
   UnitTestsMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_msan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (msan)
-          REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (msan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
   UnitTestsUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (ubsan)
-          REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (ubsan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
 #############################################################################################
 #################################### PERFORMANCE TESTS ######################################
 #############################################################################################
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index aa104fd3874..6c703ac1518 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -962,169 +962,54 @@ jobs:
 #############################################################################################
   UnitTestsAsan:
     needs: [BuilderDebAsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (asan)
-          REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (asan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
   UnitTestsReleaseClang:
     needs: [BuilderBinRelease]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_asan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (release)
-          REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (release)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
   UnitTestsTsan:
     needs: [BuilderDebTsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_tsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (tsan)
-          REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (tsan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
   UnitTestsMsan:
     needs: [BuilderDebMsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_msan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (msan)
-          REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (msan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
   UnitTestsUBsan:
     needs: [BuilderDebUBsan]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/unit_tests_ubsan
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Unit tests (ubsan)
-          REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Unit test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 unit_tests_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Unit tests (ubsan)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 unit_tests_check.py "$CHECK_NAME"
 #############################################################################################
 #################################### PERFORMANCE TESTS ######################################
 #############################################################################################

From 111bc9e6288e334aa93e16489898947cd13cd515 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 11 Nov 2023 01:20:02 +0100
Subject: [PATCH 0435/1097] Rewrite leftovers to reusable workflow

---
 .github/workflows/master.yml       |  78 +++--------
 .github/workflows/pull_request.yml | 202 ++++++++---------------------
 2 files changed, 72 insertions(+), 208 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 22d4371b0f9..8466c749933 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -835,70 +835,24 @@ jobs:
 ##############################################################################################
   SQLancerTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/sqlancer_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=SQLancer (release)
-          REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: SQLancer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 sqlancer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: SQLancer (release)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 sqlancer_check.py "$CHECK_NAME"
   SQLancerTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/sqlancer_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=SQLancer (debug)
-          REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: SQLancer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 sqlancer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: SQLancer (debug)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 sqlancer_check.py "$CHECK_NAME"
   FinishCheck:
     needs:
       - DockerHubPush
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 6c703ac1518..76cd9248de8 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -1038,80 +1038,24 @@ jobs:
 ##############################################################################################
   SQLancerTestRelease:
     needs: [BuilderDebRelease]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/sqlancer_release
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=SQLancer (release)
-          REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: SQLancer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 sqlancer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: SQLancer (release)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 sqlancer_check.py "$CHECK_NAME"
   SQLancerTestDebug:
     needs: [BuilderDebDebug]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/sqlancer_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=SQLancer (debug)
-          REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: SQLancer
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 sqlancer_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-#############################################################################################
-###################################### JEPSEN TESTS #########################################
-#############################################################################################
-  Jepsen:
-    # This is special test NOT INCLUDED in FinishCheck
-    # When it's skipped, all dependent tasks will be skipped too.
-    # DO NOT add it there
-    if: contains(github.event.pull_request.labels.*.name, 'jepsen-test')
-    needs: [BuilderBinRelease]
-    uses: ./.github/workflows/jepsen.yml
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: SQLancer (debug)
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 sqlancer_check.py "$CHECK_NAME"
   FinishCheck:
     needs:
       - StyleCheck
@@ -1188,6 +1132,46 @@ jobs:
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 finish_check.py
           python3 merge_pr.py --check-approved
+##############################################################################################
+############################ SQLLOGIC TEST ###################################################
+##############################################################################################
+  SQLLogicTestRelease:
+    needs: [BuilderDebRelease]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Sqllogic test (release)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=10800
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 sqllogic_test.py "$CHECK_NAME" "$KILL_TIMEOUT"
+##############################################################################################
+##################################### SQL TEST ###############################################
+##############################################################################################
+  SQLTest:
+    needs: [BuilderDebRelease]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: SQLTest
+      runner_type: fuzzer-unit-tester
+      batches: 1
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 sqltest.py "$CHECK_NAME"
+#############################################################################################
+###################################### NOT IN FINISH ########################################
+#############################################################################################
+###################################### JEPSEN TESTS #########################################
+#############################################################################################
+  Jepsen:
+    # This is special test NOT INCLUDED in FinishCheck
+    # When it's skipped, all dependent tasks will be skipped too.
+    # DO NOT add it there
+    if: contains(github.event.pull_request.labels.*.name, 'jepsen-test')
+    needs: [BuilderBinRelease]
+    uses: ./.github/workflows/jepsen.yml
 #############################################################################################
 ####################################### libFuzzer ###########################################
 #############################################################################################
@@ -1195,77 +1179,3 @@ jobs:
     if: contains(github.event.pull_request.labels.*.name, 'libFuzzer')
     needs: [DockerHubPush, StyleCheck]
     uses: ./.github/workflows/libfuzzer.yml
-  ##############################################################################################
-  ############################ SQLLOGIC TEST ###################################################
-  ##############################################################################################
-  SQLLogicTestRelease:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/sqllogic_debug
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Sqllogic test (release)
-          REPO_COPY=${{runner.temp}}/sqllogic_debug/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v2
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Clear repository
-        run: |
-          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
-      - name: Check out repository code
-        uses: actions/checkout@v2
-      - name: Sqllogic test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 sqllogic_test.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-##############################################################################################
-##################################### SQL TEST ###############################################
-##############################################################################################
-  SQLTest:
-    needs: [BuilderDebRelease]
-    runs-on: [self-hosted, fuzzer-unit-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/sqltest
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=SQLTest
-          REPO_COPY=${{runner.temp}}/sqltest/ClickHouse
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: SQLTest
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 sqltest.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"

From 025c3cf37b0dba689602149da5cfefbfe26ae961 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 11 Nov 2023 01:27:09 +0100
Subject: [PATCH 0436/1097] Trigger docs check

---
 docker/docs/builder/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/docs/builder/Dockerfile b/docker/docs/builder/Dockerfile
index 3ca2bdafcb3..b7b706a8a5c 100644
--- a/docker/docs/builder/Dockerfile
+++ b/docker/docs/builder/Dockerfile
@@ -4,8 +4,8 @@ FROM node:16-alpine
 
 RUN apk add --no-cache git openssh bash
 
-# At this point we want to really update /opt/clickhouse-docs
-# despite the cached images
+# At this point we want to really update /opt/clickhouse-docs directory
+# So we reset the cache
 ARG CACHE_INVALIDATOR=0
 
 RUN git clone https://github.com/ClickHouse/clickhouse-docs.git \

From 8a36f2fa4caee42a382a1f9d7e3419675348d68a Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sun, 12 Nov 2023 10:12:04 +0100
Subject: [PATCH 0437/1097] Rewrite builds reports to reusable workflow

---
 .github/workflows/backport_branches.yml | 92 +++++++-----------------
 .github/workflows/master.yml            | 93 +++++++------------------
 .github/workflows/pull_request.yml      | 92 +++++++-----------------
 .github/workflows/release_branches.yml  | 93 +++++++------------------
 tests/ci/build_report_check.py          |  9 ++-
 5 files changed, 104 insertions(+), 275 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 1f0e424bf5c..05ade29dfa6 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -195,84 +195,40 @@ jobs:
 ##################################### BUILD REPORTER #######################################
 ############################################################################################
   BuilderReport:
+    if: ${{ success() || failure() }}
     needs:
       - BuilderDebRelease
       - BuilderDebAarch64
       - BuilderDebAsan
       - BuilderDebTsan
       - BuilderDebDebug
-    runs-on: [self-hosted, style-checker]
-    if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          CHECK_NAME=ClickHouse build check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          TEMP_PATH=${{runner.temp}}/report_check
-          NEEDS_DATA_PATH=${{runner.temp}}/needs.json
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Report Builder
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cat > "$NEEDS_DATA_PATH" << 'EOF'
-          ${{ toJSON(needs) }}
-          EOF
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickHouse build check
+      runner_type: style-checker
+      additional_envs: |
+        NEEDS_DATA<<NDENV
+        ${{ toJSON(needs) }}
+        NDENV
+      run_command: |
+        cd "$GITHUB_WORKSPACE/tests/ci"
+        python3 build_report_check.py "$CHECK_NAME"
   BuilderSpecialReport:
+    if: ${{ success() || failure() }}
     needs:
       - BuilderBinDarwin
       - BuilderBinDarwinAarch64
-    runs-on: [self-hosted, style-checker]
-    if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/report_check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=ClickHouse special build check
-          NEEDS_DATA_PATH=${{runner.temp}}/needs.json
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Report Builder
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cat > "$NEEDS_DATA_PATH" << 'EOF'
-          ${{ toJSON(needs) }}
-          EOF
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickHouse special build check
+      runner_type: style-checker
+      additional_envs: |
+        NEEDS_DATA<<NDENV
+        ${{ toJSON(needs) }}
+        NDENV
+      run_command: |
+        cd "$GITHUB_WORKSPACE/tests/ci"
+        python3 build_report_check.py "$CHECK_NAME"
 ############################################################################################
 #################################### INSTALL PACKAGES ######################################
 ############################################################################################
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 8466c749933..50fcfc79572 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -256,6 +256,7 @@ jobs:
 ##################################### BUILD REPORTER #######################################
 ############################################################################################
   BuilderReport:
+    if: ${{ success() || failure() }}
     needs:
       - BuilderBinRelease
       - BuilderDebAarch64
@@ -265,42 +266,19 @@ jobs:
       - BuilderDebRelease
       - BuilderDebTsan
       - BuilderDebUBsan
-    runs-on: [self-hosted, style-checker]
-    if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          CHECK_NAME=ClickHouse build check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          TEMP_PATH=${{runner.temp}}/report_check
-          NEEDS_DATA_PATH=${{runner.temp}}/needs.json
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Report Builder
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cat > "$NEEDS_DATA_PATH" << 'EOF'
-          ${{ toJSON(needs) }}
-          EOF
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickHouse build check
+      runner_type: style-checker
+      additional_envs: |
+        NEEDS_DATA<<NDENV
+        ${{ toJSON(needs) }}
+        NDENV
+      run_command: |
+        cd "$GITHUB_WORKSPACE/tests/ci"
+        python3 build_report_check.py "$CHECK_NAME"
   BuilderSpecialReport:
+    if: ${{ success() || failure() }}
     needs:
       - BuilderBinAarch64
       - BuilderBinDarwin
@@ -312,40 +290,17 @@ jobs:
       - BuilderBinAmd64Compat
       - BuilderBinAarch64V80Compat
       - BuilderBinClangTidy
-    runs-on: [self-hosted, style-checker]
-    if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/report_check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=ClickHouse special build check
-          NEEDS_DATA_PATH=${{runner.temp}}/needs.json
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Report Builder
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cat > "$NEEDS_DATA_PATH" << 'EOF'
-          ${{ toJSON(needs) }}
-          EOF
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickHouse special build check
+      runner_type: style-checker
+      additional_envs: |
+        NEEDS_DATA<<NDENV
+        ${{ toJSON(needs) }}
+        NDENV
+      run_command: |
+        cd "$GITHUB_WORKSPACE/tests/ci"
+        python3 build_report_check.py "$CHECK_NAME"
   MarkReleaseReady:
     needs:
       - BuilderBinDarwin
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 76cd9248de8..1c12ddca07c 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -291,6 +291,7 @@ jobs:
 ##################################### BUILD REPORTER #######################################
 ############################################################################################
   BuilderReport:
+    if: ${{ success() || failure() }}
     needs:
       - BuilderBinRelease
       - BuilderDebAarch64
@@ -300,41 +301,19 @@ jobs:
       - BuilderDebRelease
       - BuilderDebTsan
       - BuilderDebUBsan
-    runs-on: [self-hosted, style-checker]
-    if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          CHECK_NAME=ClickHouse build check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          TEMP_PATH=${{runner.temp}}/report_check
-          NEEDS_DATA_PATH=${{runner.temp}}/needs.json
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Report Builder
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cat > "$NEEDS_DATA_PATH" << 'EOF'
-          ${{ toJSON(needs) }}
-          EOF
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickHouse build check
+      runner_type: style-checker
+      additional_envs: |
+        NEEDS_DATA<<NDENV
+        ${{ toJSON(needs) }}
+        NDENV
+      run_command: |
+        cd "$GITHUB_WORKSPACE/tests/ci"
+        python3 build_report_check.py "$CHECK_NAME"
   BuilderSpecialReport:
+    if: ${{ success() || failure() }}
     needs:
       - BuilderBinAarch64
       - BuilderBinDarwin
@@ -346,40 +325,17 @@ jobs:
       - BuilderBinAmd64Compat
       - BuilderBinAarch64V80Compat
       - BuilderBinClangTidy
-    runs-on: [self-hosted, style-checker]
-    if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/report_check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=ClickHouse special build check
-          NEEDS_DATA_PATH=${{runner.temp}}/needs.json
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Report Builder
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cat > "$NEEDS_DATA_PATH" << 'EOF'
-          ${{ toJSON(needs) }}
-          EOF
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickHouse special build check
+      runner_type: style-checker
+      additional_envs: |
+        NEEDS_DATA<<NDENV
+        ${{ toJSON(needs) }}
+        NDENV
+      run_command: |
+        cd "$GITHUB_WORKSPACE/tests/ci"
+        python3 build_report_check.py "$CHECK_NAME"
 ############################################################################################
 #################################### INSTALL PACKAGES ######################################
 ############################################################################################
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index e78740a6564..f4169f0ffef 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -176,6 +176,7 @@ jobs:
 ##################################### BUILD REPORTER #######################################
 ############################################################################################
   BuilderReport:
+    if: ${{ success() || failure() }}
     needs:
       - BuilderDebRelease
       - BuilderDebAarch64
@@ -184,79 +185,33 @@ jobs:
       - BuilderDebUBsan
       - BuilderDebMsan
       - BuilderDebDebug
-    runs-on: [self-hosted, style-checker]
-    if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          CHECK_NAME=ClickHouse build check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          TEMP_PATH=${{runner.temp}}/report_check
-          NEEDS_DATA_PATH=${{runner.temp}}/needs.json
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Report Builder
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cat > "$NEEDS_DATA_PATH" << 'EOF'
-          ${{ toJSON(needs) }}
-          EOF
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickHouse build check
+      runner_type: style-checker
+      additional_envs: |
+        NEEDS_DATA<<NDENV
+        ${{ toJSON(needs) }}
+        NDENV
+      run_command: |
+        cd "$GITHUB_WORKSPACE/tests/ci"
+        python3 build_report_check.py "$CHECK_NAME"
   BuilderSpecialReport:
+    if: ${{ success() || failure() }}
     needs:
       - BuilderBinDarwin
       - BuilderBinDarwinAarch64
-    runs-on: [self-hosted, style-checker]
-    if: ${{ success() || failure() }}
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/report_check
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=ClickHouse special build check
-          NEEDS_DATA_PATH=${{runner.temp}}/needs.json
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Report Builder
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cat > "$NEEDS_DATA_PATH" << 'EOF'
-          ${{ toJSON(needs) }}
-          EOF
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickHouse special build check
+      runner_type: style-checker
+      additional_envs: |
+        NEEDS_DATA<<NDENV
+        ${{ toJSON(needs) }}
+        NDENV
+      run_command: |
+        cd "$GITHUB_WORKSPACE/tests/ci"
+        python3 build_report_check.py "$CHECK_NAME"
   MarkReleaseReady:
     needs:
       - BuilderBinDarwin
diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py
index 62224fbcef9..d6368d7d3b3 100644
--- a/tests/ci/build_report_check.py
+++ b/tests/ci/build_report_check.py
@@ -37,7 +37,10 @@ from commit_status_helper import (
 from ci_config import CI_CONFIG
 
 
+# Old way to read the neads_data
 NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "")
+# Now it's set here. Two-steps migration for backward compatibility
+NEEDS_DATA = os.getenv("NEEDS_DATA", "")
 
 
 def main():
@@ -58,7 +61,11 @@ def main():
     if os.path.exists(NEEDS_DATA_PATH):
         with open(NEEDS_DATA_PATH, "rb") as file_handler:
             needs_data = json.load(file_handler)
-            required_builds = len(needs_data)
+
+    if NEEDS_DATA:
+        needs_data = json.loads(NEEDS_DATA)
+
+    required_builds = len(needs_data)
 
     if needs_data:
         logging.info("The next builds are required: %s", ", ".join(needs_data))

From 5c1ae48532fee996973703fd5d1dc874c03138c8 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sun, 12 Nov 2023 10:14:54 +0100
Subject: [PATCH 0438/1097] Clean all `batches: 1` default values

---
 .github/workflows/backport_branches.yml |  3 --
 .github/workflows/docs_check.yml        |  2 --
 .github/workflows/jepsen.yml            |  2 --
 .github/workflows/libfuzzer.yml         |  1 -
 .github/workflows/master.yml            | 29 ----------------
 .github/workflows/pull_request.yml      | 45 -------------------------
 .github/workflows/release_branches.yml  | 14 --------
 7 files changed, 96 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 05ade29dfa6..f6af4778cf1 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -261,7 +261,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -276,7 +275,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -289,7 +287,6 @@ jobs:
     with:
       test_name: Stress test (tsan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml
index d7699f0419d..6d449e74f30 100644
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@@ -102,7 +102,6 @@ jobs:
     with:
       test_name: Style check
       runner_type: style-checker
-      batches: 1
       run_command: |
           cd "$REPO_COPY/tests/ci"
           python3 style_check.py
@@ -118,7 +117,6 @@ jobs:
       test_name: Docs check
       runner_type: func-tester-aarch64
       additional_envs: |
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 docs_check.py
diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml
index 5ec038231ec..163de7769af 100644
--- a/.github/workflows/jepsen.yml
+++ b/.github/workflows/jepsen.yml
@@ -15,7 +15,6 @@ jobs:
     with:
       test_name: Jepsen keeper check
       runner_type: style-checker
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 jepsen_check.py keeper
@@ -25,7 +24,6 @@ jobs:
   #   with:
   #     test_name: Jepsen server check
   #     runner_type: style-checker
-  #     batches: 1
   #     run_command: |
   #       cd "$REPO_COPY/tests/ci"
   #       python3 jepsen_check.py server
diff --git a/.github/workflows/libfuzzer.yml b/.github/workflows/libfuzzer.yml
index aabf6275c05..1ca637c0d84 100644
--- a/.github/workflows/libfuzzer.yml
+++ b/.github/workflows/libfuzzer.yml
@@ -21,7 +21,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 libfuzzer_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 50fcfc79572..260fc2fc7d5 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -94,7 +94,6 @@ jobs:
     with:
       test_name: Style check
       runner_type: style-checker
-      batches: 1
       run_command: |
           cd "$REPO_COPY/tests/ci"
           python3 style_check.py
@@ -349,7 +348,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -361,7 +359,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -397,7 +394,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -409,7 +405,6 @@ jobs:
       runner_type: func-tester-aarch64
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -484,7 +479,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -496,7 +490,6 @@ jobs:
       runner_type: func-tester-aarch64
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -508,7 +501,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -520,7 +512,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -532,7 +523,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -544,7 +534,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -556,7 +545,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -569,7 +557,6 @@ jobs:
     with:
       test_name: Stress test (asan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -579,7 +566,6 @@ jobs:
     with:
       test_name: Stress test (tsan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -589,7 +575,6 @@ jobs:
     with:
       test_name: Stress test (msan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -599,7 +584,6 @@ jobs:
     with:
       test_name: Stress test (ubsan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -609,7 +593,6 @@ jobs:
     with:
       test_name: Stress test (debug)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -665,7 +648,6 @@ jobs:
     with:
       test_name: AST fuzzer (asan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -675,7 +657,6 @@ jobs:
     with:
       test_name: AST fuzzer (tsan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -685,7 +666,6 @@ jobs:
     with:
       test_name: AST fuzzer (ubsan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -695,7 +675,6 @@ jobs:
     with:
       test_name: AST fuzzer (msan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -705,7 +684,6 @@ jobs:
     with:
       test_name: AST fuzzer (debug)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -718,7 +696,6 @@ jobs:
     with:
       test_name: Unit tests (asan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -728,7 +705,6 @@ jobs:
     with:
       test_name: Unit tests (release)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -738,7 +714,6 @@ jobs:
     with:
       test_name: Unit tests (tsan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -748,7 +723,6 @@ jobs:
     with:
       test_name: Unit tests (msan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -758,7 +732,6 @@ jobs:
     with:
       test_name: Unit tests (ubsan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -794,7 +767,6 @@ jobs:
     with:
       test_name: SQLancer (release)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 sqlancer_check.py "$CHECK_NAME"
@@ -804,7 +776,6 @@ jobs:
     with:
       test_name: SQLancer (debug)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 sqlancer_check.py "$CHECK_NAME"
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 1c12ddca07c..f8f052d9226 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -124,7 +124,6 @@ jobs:
     with:
       test_name: Style check
       runner_type: style-checker
-      batches: 1
       run_command: |
           cd "$REPO_COPY/tests/ci"
           python3 style_check.py
@@ -139,7 +138,6 @@ jobs:
     with:
       test_name: Fast tests
       runner_type: builder
-      batches: 1
       run_command: |
           cd "$REPO_COPY/tests/ci"
           python3 fast_test_check.py
@@ -368,7 +366,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -392,7 +389,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -404,7 +400,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -452,7 +447,6 @@ jobs:
       runner_type: func-tester-aarch64
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -524,7 +518,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -536,7 +529,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
 
@@ -562,7 +554,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -574,7 +565,6 @@ jobs:
       runner_type: func-tester-aarch64
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -586,7 +576,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -598,7 +587,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -610,7 +598,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -622,7 +609,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -634,7 +620,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -647,7 +632,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -659,7 +643,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -671,7 +654,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -683,7 +665,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -695,7 +676,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -707,7 +687,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -720,7 +699,6 @@ jobs:
     with:
       test_name: Stress test (asan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -730,7 +708,6 @@ jobs:
     with:
       test_name: Stress test (tsan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -740,7 +717,6 @@ jobs:
     with:
       test_name: Stress test (msan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -750,7 +726,6 @@ jobs:
     with:
       test_name: Stress test (ubsan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -760,7 +735,6 @@ jobs:
     with:
       test_name: Stress test (debug)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -773,7 +747,6 @@ jobs:
     with:
       test_name: Upgrade check (asan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 upgrade_check.py "$CHECK_NAME"
@@ -783,7 +756,6 @@ jobs:
     with:
       test_name: Upgrade check (tsan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 upgrade_check.py "$CHECK_NAME"
@@ -793,7 +765,6 @@ jobs:
     with:
       test_name: Upgrade check (msan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 upgrade_check.py "$CHECK_NAME"
@@ -803,7 +774,6 @@ jobs:
     with:
       test_name: Upgrade check (debug)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 upgrade_check.py "$CHECK_NAME"
@@ -816,7 +786,6 @@ jobs:
     with:
       test_name: AST fuzzer (asan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -826,7 +795,6 @@ jobs:
     with:
       test_name: AST fuzzer (tsan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -836,7 +804,6 @@ jobs:
     with:
       test_name: AST fuzzer (ubsan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -846,7 +813,6 @@ jobs:
     with:
       test_name: AST fuzzer (msan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -856,7 +822,6 @@ jobs:
     with:
       test_name: AST fuzzer (debug)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 ast_fuzzer_check.py "$CHECK_NAME"
@@ -909,7 +874,6 @@ jobs:
     with:
       test_name: Integration tests flaky check (asan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 integration_test_check.py "$CHECK_NAME"
@@ -922,7 +886,6 @@ jobs:
     with:
       test_name: Unit tests (asan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -932,7 +895,6 @@ jobs:
     with:
       test_name: Unit tests (release)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -942,7 +904,6 @@ jobs:
     with:
       test_name: Unit tests (tsan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -952,7 +913,6 @@ jobs:
     with:
       test_name: Unit tests (msan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -962,7 +922,6 @@ jobs:
     with:
       test_name: Unit tests (ubsan)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 unit_tests_check.py "$CHECK_NAME"
@@ -998,7 +957,6 @@ jobs:
     with:
       test_name: SQLancer (release)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 sqlancer_check.py "$CHECK_NAME"
@@ -1008,7 +966,6 @@ jobs:
     with:
       test_name: SQLancer (debug)
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 sqlancer_check.py "$CHECK_NAME"
@@ -1099,7 +1056,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 sqllogic_test.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -1112,7 +1068,6 @@ jobs:
     with:
       test_name: SQLTest
       runner_type: fuzzer-unit-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 sqltest.py "$CHECK_NAME"
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index f4169f0ffef..b5771fa87ab 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -260,7 +260,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -272,7 +271,6 @@ jobs:
       runner_type: func-tester-aarch64
       additional_envs: |
         KILL_TIMEOUT=10800
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -347,7 +345,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -359,7 +356,6 @@ jobs:
       runner_type: func-tester-aarch64
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -371,7 +367,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -383,7 +378,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -395,7 +389,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -407,7 +400,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -419,7 +411,6 @@ jobs:
       runner_type: func-tester
       additional_envs: |
         KILL_TIMEOUT=3600
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
@@ -432,7 +423,6 @@ jobs:
     with:
       test_name: Stress test (asan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -442,7 +432,6 @@ jobs:
     with:
       test_name: Stress test (tsan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -452,7 +441,6 @@ jobs:
     with:
       test_name: Stress test (msan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -462,7 +450,6 @@ jobs:
     with:
       test_name: Stress test (ubsan)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"
@@ -472,7 +459,6 @@ jobs:
     with:
       test_name: Stress test (debug)
       runner_type: stress-tester
-      batches: 1
       run_command: |
         cd "$REPO_COPY/tests/ci"
         python3 stress_check.py "$CHECK_NAME"

From d0e5a55fb51a22b34b37f889882ab6eb6cc36430 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 13 Nov 2023 07:41:17 +0100
Subject: [PATCH 0439/1097] Fix condition for Clean actions, execute it always

---
 .github/workflows/reusable_build.yml | 1 +
 .github/workflows/reusable_test.yml  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml
index 7b88c2b9925..57c7354c183 100644
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@@ -71,4 +71,5 @@ jobs:
           name: ${{ env.BUILD_URLS }}
           path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
       - name: Clean
+        if: always()
         uses: ./.github/actions/clean
diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml
index 04b737d2131..08c1e431f4b 100644
--- a/.github/workflows/reusable_test.yml
+++ b/.github/workflows/reusable_test.yml
@@ -97,4 +97,5 @@ jobs:
       - name: Run test
         run: ${{inputs.run_command}}
       - name: Clean
+        if: always()
         uses: ./.github/actions/clean

From c0a3c025648535d75f5c97a60c6deb52f3df0643 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 13 Nov 2023 09:43:09 +0100
Subject: [PATCH 0440/1097] Skip unnecessary PrepareStrategy jobs

---
 .github/workflows/reusable_test.yml | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml
index 08c1e431f4b..3363576e90c 100644
--- a/.github/workflows/reusable_test.yml
+++ b/.github/workflows/reusable_test.yml
@@ -44,7 +44,9 @@ env:
 
 jobs:
   PrepareStrategy:
-    if: ${{inputs.batches > 0}}  # batches < 1 is misconfiguration
+    # batches < 1 is misconfiguration,
+    # and we need this step only for batches > 1
+    if: ${{ inputs.batches > 1 }}
     runs-on: [self-hosted, style-checker-aarch64]
     outputs:
       batches: ${{steps.batches.outputs.batches}}
@@ -55,6 +57,11 @@ jobs:
           batches_output=$(python3 -c 'import json; print(json.dumps(list(range(${{inputs.batches}}))))')
           echo "batches=${batches_output}" >> "$GITHUB_OUTPUT"
   Test:
+    # If PrepareStrategy is skipped for batches == 1,
+    # we still need to launch the test.
+    # `! failure()` is mandatory here to launch on skipped Job
+    # `&& !cancelled()` to allow the be cancelable
+    if: ${{ ( !failure() && !cancelled() ) && inputs.batches > 0 }}
     # Do not add `-0` to the end, if there's only one batch
     name: ${{inputs.test_name}}${{ inputs.batches > 1 && format('-{0}',matrix.batch) || '' }}
     env:
@@ -64,7 +71,10 @@ jobs:
     strategy:
       fail-fast: false  # we always wait for entire matrix
       matrix:
-        batch: ${{ fromJson(needs.PrepareStrategy.outputs.batches) }}
+        # if PrepareStrategy does not have batches, we use 0
+        batch: ${{ needs.PrepareStrategy.outputs.batches
+          && fromJson(needs.PrepareStrategy.outputs.batches)
+          || fromJson('[0]')}}
     steps:
       - name: Check out repository code
         uses: ClickHouse/checkout@v1

From f72c628fbc67f4ce409900844b18d4eb70433069 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 13 Nov 2023 11:06:03 +0100
Subject: [PATCH 0441/1097] Add forgotten PYTHONUNBUFFERED global env to
 reusable workflows

---
 .github/workflows/reusable_build.yml | 4 ++++
 .github/workflows/reusable_test.yml  | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml
index 57c7354c183..f36b93bea58 100644
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@@ -1,6 +1,10 @@
 ### For the pure soul wishes to move it to another place
 # https://github.com/orgs/community/discussions/9050
 
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
 name: Build ClickHouse
 'on':
   workflow_call:
diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml
index 3363576e90c..e82d2d51596 100644
--- a/.github/workflows/reusable_test.yml
+++ b/.github/workflows/reusable_test.yml
@@ -40,6 +40,8 @@ name: Testing workflow
         required: false
 
 env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
   CHECK_NAME: ${{inputs.test_name}}
 
 jobs:

From fede5d8fcb6f321412ec2e6bced13f88f18de226 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 10 Nov 2023 11:17:35 +0000
Subject: [PATCH 0442/1097] Analyzer: fix logical error with set in array join

---
 src/Planner/Planner.cpp           |  3 +++
 src/Planner/PlannerJoinTree.cpp   |  4 ++++
 src/Planner/TableExpressionData.h | 13 +++++++++++++
 tests/analyzer_tech_debt.txt      |  1 -
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 5b354ccda46..1547cd07785 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1399,6 +1399,9 @@ void Planner::buildPlanForQueryNode()
 
         if (table_expression_data.getRowLevelFilterActions())
             result_actions_to_execute.push_back(table_expression_data.getRowLevelFilterActions());
+
+        if (table_expression_data.getArrayJoinExpressionActions())
+            result_actions_to_execute.push_back(table_expression_data.getArrayJoinExpressionActions());
     }
 
     if (query_processing_info.isIntermediateStage())
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index f8770ca7c9c..282de24f1f6 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -1450,6 +1450,10 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
     }
 
     array_join_action_dag->projectInput();
+
+    auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(array_join_table_expression);
+    table_expression_data.setArrayJoinExpressionActions(array_join_action_dag);
+
     auto array_join_actions = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), array_join_action_dag);
     array_join_actions->setStepDescription("ARRAY JOIN actions");
     plan.addStep(std::move(array_join_actions));
diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h
index ee5a05fe7da..b53684802e2 100644
--- a/src/Planner/TableExpressionData.h
+++ b/src/Planner/TableExpressionData.h
@@ -280,6 +280,16 @@ public:
         filter_actions = std::move(filter_actions_value);
     }
 
+    const ActionsDAGPtr & getArrayJoinExpressionActions() const
+    {
+        return array_join_actions;
+    }
+
+    void setArrayJoinExpressionActions(ActionsDAGPtr array_join_actions_value)
+    {
+        array_join_actions = std::move(array_join_actions_value);
+    }
+
 private:
     void addColumnImpl(const NameAndTypePair & column, const ColumnIdentifier & column_identifier)
     {
@@ -304,6 +314,9 @@ private:
     /// Valid for table, table function, array join, query, union nodes
     ColumnIdentifierToColumnName column_identifier_to_column_name;
 
+    /// Valid for array join
+    ActionsDAGPtr array_join_actions;
+
     /// Valid for table, table function
     ActionsDAGPtr filter_actions;
 
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 6021e7ccf38..ae13ac356f8 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -38,7 +38,6 @@
 02493_inconsistent_hex_and_binary_number
 02554_fix_grouping_sets_predicate_push_down
 02575_merge_prewhere_different_default_kind
-01009_global_array_join_names
 00917_multiple_joins_denny_crane
 00636_partition_key_parts_pruning
 01825_type_json_multiple_files

From 99d521becf34442af6ce53fd1c1cb081bfb42aa8 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 13 Nov 2023 12:09:13 +0000
Subject: [PATCH 0443/1097] More reliable log handling in Keeper

---
 src/Coordination/Changelog.cpp                |  95 +++++++++------
 src/Coordination/Changelog.h                  |   2 +-
 src/Coordination/CoordinationSettings.cpp     |   1 +
 src/Coordination/KeeperDispatcher.cpp         |   4 +-
 src/Coordination/KeeperServer.cpp             |   1 -
 src/Coordination/KeeperStateMachine.cpp       |  13 +-
 src/Coordination/tests/gtest_coordination.cpp |  56 +++++++++
 .../test_keeper_broken_logs/__init__.py       |   0
 .../configs/enable_keeper1.xml                |  44 +++++++
 .../configs/enable_keeper2.xml                |  43 +++++++
 .../configs/enable_keeper3.xml                |  43 +++++++
 .../test_keeper_broken_logs/test.py           | 113 ++++++++++++++++++
 12 files changed, 368 insertions(+), 47 deletions(-)
 create mode 100644 tests/integration/test_keeper_broken_logs/__init__.py
 create mode 100644 tests/integration/test_keeper_broken_logs/configs/enable_keeper1.xml
 create mode 100644 tests/integration/test_keeper_broken_logs/configs/enable_keeper2.xml
 create mode 100644 tests/integration/test_keeper_broken_logs/configs/enable_keeper3.xml
 create mode 100644 tests/integration/test_keeper_broken_logs/test.py

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 3c2004a1b75..d8743d5bb5d 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -707,6 +707,8 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
     else
         start_to_read_from = 1;
 
+    uint64_t last_read_index = 0;
+
     /// Got through changelog files in order of start_index
     for (const auto & [changelog_start_index, changelog_description_ptr] : existing_changelogs)
     {
@@ -747,27 +749,29 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
                         changelog_description.from_log_index);
                 }
             }
-            else if ((changelog_description.from_log_index - last_log_read_result->last_read_index) > 1)
+            else if ((changelog_description.from_log_index - last_read_index) > 1)
             {
-                LOG_ERROR(
-                    log,
-                    "Some records were lost, last found log index {}, while the next log index on disk is {}. Hopefully will receive "
-                    "missing records from leader.",
-                    last_log_read_result->last_read_index,
-                    changelog_description.from_log_index);
-                removeAllLogsAfter(last_log_read_result->log_start_index);
+                if (!last_log_read_result->error)
+                {
+                    LOG_ERROR(
+                        log,
+                        "Some records were lost, last found log index {}, while the next log index on disk is {}. Hopefully will receive "
+                        "missing records from leader.",
+                        last_read_index,
+                        changelog_description.from_log_index);
+                    removeAllLogsAfter(last_log_read_result->log_start_index);
+                }
                 break;
             }
 
             ChangelogReader reader(changelog_description.disk, changelog_description.path);
             last_log_read_result = reader.readChangelog(logs, start_to_read_from, log);
+
+            if (last_log_read_result->last_read_index != 0)
+                last_read_index = last_log_read_result->last_read_index;
+
             last_log_read_result->log_start_index = changelog_description.from_log_index;
 
-            if (last_log_read_result->error)
-            {
-                last_log_is_not_complete = true;
-                break;
-            }
             /// Otherwise we have already initialized it
             if (min_log_id == 0)
                 min_log_id = last_log_read_result->first_read_index;
@@ -779,14 +783,20 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
             uint64_t log_count = changelog_description.expectedEntriesCountInLog();
 
             /// Unfinished log
-            if (last_log_read_result->error || last_log_read_result->total_entries_read_from_log < log_count)
-            {
-                last_log_is_not_complete = true;
-                break;
-            }
+            last_log_is_not_complete = last_log_read_result->error || last_log_read_result->total_entries_read_from_log < log_count;
         }
     }
 
+    const auto move_from_latest_logs_disks = [&](auto & description)
+    {
+        /// check if we need to move completed log to another disk
+        auto latest_log_disk = getLatestLogDisk();
+        auto disk = getDisk();
+
+        if (latest_log_disk != disk && latest_log_disk == description->disk)
+            moveFileBetweenDisks(latest_log_disk, description, disk, description->path);
+    };
+
     /// we can have empty log (with zero entries) and last_log_read_result will be initialized
     if (!last_log_read_result || min_log_id == 0) /// We just may have no logs (only snapshot or nothing)
     {
@@ -813,23 +823,34 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
         assert(last_log_read_result != std::nullopt);
         assert(!existing_changelogs.empty());
 
-        /// Actually they shouldn't exist, but to be sure we remove them
-        removeAllLogsAfter(last_log_read_result->log_start_index);
-
-        /// This log, even if it finished with error shouldn't be removed
-        assert(existing_changelogs.find(last_log_read_result->log_start_index) != existing_changelogs.end());
-        assert(existing_changelogs.find(last_log_read_result->log_start_index)->first == existing_changelogs.rbegin()->first);
-
         /// Continue to write into incomplete existing log if it didn't finish with error
         const auto & description = existing_changelogs[last_log_read_result->log_start_index];
 
-        if (last_log_read_result->last_read_index == 0 || last_log_read_result->error) /// If it's broken log then remove it
+        const auto remove_invalid_logs = [&]
         {
-            LOG_INFO(log, "Removing chagelog {} because it's empty or read finished with error", description->path);
+            /// Actually they shouldn't exist, but to be sure we remove them
+            removeAllLogsAfter(last_log_read_result->log_start_index);
+
+            /// This log, even if it finished with error shouldn't be removed
+            chassert(existing_changelogs.find(last_log_read_result->log_start_index) != existing_changelogs.end());
+            chassert(existing_changelogs.find(last_log_read_result->log_start_index)->first == existing_changelogs.rbegin()->first);
+        };
+
+        if (last_log_read_result->last_read_index == 0) /// If it's broken or empty log then remove it
+        {
+            LOG_INFO(log, "Removing chagelog {} because it's empty", description->path);
+            remove_invalid_logs();
             description->disk->removeFile(description->path);
             existing_changelogs.erase(last_log_read_result->log_start_index);
             std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first >= last_log_read_result->log_start_index; });
         }
+        else if (last_log_read_result->error)
+        {
+            LOG_INFO(log, "Chagelog {} read finished with error but some logs were read from it, file will not be removed", description->path);
+            remove_invalid_logs();
+            std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first > last_log_read_result->last_read_index; });
+            move_from_latest_logs_disks(existing_changelogs.at(last_log_read_result->log_start_index));
+        }
         else
         {
             initWriter(description);
@@ -837,13 +858,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
     }
     else if (last_log_read_result.has_value())
     {
-        /// check if we need to move completed log to another disk
-        auto latest_log_disk = getLatestLogDisk();
-        auto disk = getDisk();
-
-        auto & description = existing_changelogs.at(last_log_read_result->log_start_index);
-        if (latest_log_disk != disk && latest_log_disk == description->disk)
-            moveFileBetweenDisks(latest_log_disk, description, disk, description->path);
+        move_from_latest_logs_disks(existing_changelogs.at(last_log_read_result->log_start_index));
     }
 
     /// Start new log if we don't initialize writer from previous log. All logs can be "complete".
@@ -927,17 +942,19 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end)
 
     for (auto itr = begin; itr != end;)
     {
+        auto & changelog_description = itr->second;
+
         if (!disk->exists(timestamp_folder))
         {
             LOG_WARNING(log, "Moving broken logs to {}", timestamp_folder);
             disk->createDirectories(timestamp_folder);
         }
 
-        LOG_WARNING(log, "Removing changelog {}", itr->second->path);
-        const std::filesystem::path & path = itr->second->path;
+        LOG_WARNING(log, "Removing changelog {}", changelog_description->path);
+        const std::filesystem::path & path = changelog_description->path;
         const auto new_path = timestamp_folder / path.filename();
 
-        auto changelog_disk = itr->second->disk;
+        auto changelog_disk = changelog_description->disk;
         if (changelog_disk == disk)
         {
             try
@@ -947,11 +964,11 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end)
             catch (const DB::Exception & e)
             {
                 if (e.code() == DB::ErrorCodes::NOT_IMPLEMENTED)
-                    moveFileBetweenDisks(changelog_disk, itr->second, disk, new_path);
+                    moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path);
             }
         }
         else
-            moveFileBetweenDisks(changelog_disk, itr->second, disk, new_path);
+            moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path);
 
         itr = existing_changelogs.erase(itr);
     }
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index dddcb9aa218..a9e4d48fa36 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -167,9 +167,9 @@ private:
     std::map<uint64_t, ChangelogFileDescriptionPtr> existing_changelogs;
 
     using ChangelogIter = decltype(existing_changelogs)::iterator;
+
     void removeExistingLogs(ChangelogIter begin, ChangelogIter end);
 
-    static void removeLog(const std::filesystem::path & path, const std::filesystem::path & detached_folder);
     /// Remove all changelogs from disk with start_index bigger than start_to_remove_from_id
     void removeAllLogsAfter(uint64_t remove_after_log_start_index);
     /// Remove all logs from disk
diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index cdd691f6a79..469b69e5243 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -1,4 +1,5 @@
 #include <Coordination/CoordinationSettings.h>
+#include "Common/Exception.h"
 #include <Common/logger_useful.h>
 #include <filesystem>
 #include <Coordination/Defines.h>
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index 6d43445474d..8f018f1a164 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -368,9 +368,9 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
 {
     LOG_DEBUG(log, "Initializing storage dispatcher");
 
+    keeper_context = std::make_shared<KeeperContext>(standalone_keeper);
     configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
 
-    keeper_context = std::make_shared<KeeperContext>(standalone_keeper);
     String availability_zone;
     try
     {
@@ -462,7 +462,7 @@ void KeeperDispatcher::shutdown()
     try
     {
         {
-            if (keeper_context->shutdown_called.exchange(true))
+            if (!keeper_context || keeper_context->shutdown_called.exchange(true))
                 return;
 
             LOG_DEBUG(log, "Shutting down storage dispatcher");
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 656d009e0a7..f229c4499c7 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -4,7 +4,6 @@
 #include "config.h"
 
 #include <chrono>
-#include <filesystem>
 #include <string>
 #include <Coordination/KeeperStateMachine.h>
 #include <Coordination/KeeperStateManager.h>
diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp
index b6185fa2e36..cc0a07d2339 100644
--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@@ -1,5 +1,4 @@
 #include <cerrno>
-#include <future>
 #include <Coordination/KeeperSnapshotManager.h>
 #include <Coordination/KeeperStateMachine.h>
 #include <Coordination/KeeperDispatcher.h>
@@ -162,6 +161,15 @@ void assertDigest(
 
 nuraft::ptr<nuraft::buffer> KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data)
 {
+    auto result = nuraft::buffer::alloc(sizeof(log_idx));
+    nuraft::buffer_serializer ss(result);
+    ss.put_u64(log_idx);
+
+    /// Don't preprocess anything until the first commit when we will manually pre_commit and commit
+    /// all needed logs
+    if (!keeper_context->local_logs_preprocessed)
+        return result;
+
     auto request_for_session = parseRequest(data, /*final=*/false);
     if (!request_for_session->zxid)
         request_for_session->zxid = log_idx;
@@ -169,9 +177,6 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::pre_commit(uint64_t log_idx, nur
     request_for_session->log_idx = log_idx;
 
     preprocess(*request_for_session);
-    auto result = nuraft::buffer::alloc(sizeof(log_idx));
-    nuraft::buffer_serializer ss(result);
-    ss.put_u64(log_idx);
     return result;
 }
 
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index 418b5225fa4..62885066dbe 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -1048,6 +1048,7 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
     EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
 }
 
+/// Truncating all entries
 TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
 {
     auto params = GetParam();
@@ -1102,6 +1103,61 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
     EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
 }
 
+/// Truncating only some entries from the end
+TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3)
+{
+    auto params = GetParam();
+
+    /// For compressed logs we have no reliable way of knowing how many log entries were lost 
+    /// after we truncate some bytes from the end
+    if (!params.extension.empty())
+        return;
+
+    ChangelogDirTest test("./logs");
+    setLogDirectory("./logs");
+
+    DB::KeeperLogStore changelog(
+        DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20},
+        DB::FlushSettings(),
+        keeper_context);
+    changelog.init(1, 0);
+
+    for (size_t i = 0; i < 35; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
+        changelog.append(entry);
+    }
+
+    changelog.end_of_append_batch(0, 0);
+
+    waitDurableLogs(changelog);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension));
+
+    DB::WriteBufferFromFile plain_buf(
+        "./logs/changelog_1_20.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
+    plain_buf.truncate(plain_buf.size() - 30);
+
+    DB::KeeperLogStore changelog_reader(
+        DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20},
+        DB::FlushSettings(),
+        keeper_context);
+    changelog_reader.init(1, 0);
+
+    EXPECT_EQ(changelog_reader.size(), 19);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension));
+    assertBrokenLogRemoved("./logs", "changelog_21_40.bin" + params.extension);
+    EXPECT_TRUE(fs::exists("./logs/changelog_20_39.bin" + params.extension));
+    auto entry = getLogEntry("hello_world", 7777);
+    changelog_reader.append(entry);
+    changelog_reader.end_of_append_batch(0, 0);
+
+    waitDurableLogs(changelog_reader);
+
+    EXPECT_EQ(changelog_reader.size(), 20);
+    EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);
+}
+
 TEST_P(CoordinationTest, ChangelogTestLostFiles)
 {
     auto params = GetParam();
diff --git a/tests/integration/test_keeper_broken_logs/__init__.py b/tests/integration/test_keeper_broken_logs/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_keeper_broken_logs/configs/enable_keeper1.xml b/tests/integration/test_keeper_broken_logs/configs/enable_keeper1.xml
new file mode 100644
index 00000000000..870326838e6
--- /dev/null
+++ b/tests/integration/test_keeper_broken_logs/configs/enable_keeper1.xml
@@ -0,0 +1,44 @@
+<clickhouse>
+    <keeper_server>
+        <use_cluster>false</use_cluster>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+        <create_snapshot_on_exit>false</create_snapshot_on_exit>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <snapshot_distance>75</snapshot_distance>
+            <raft_logs_level>trace</raft_logs_level>
+            <compress_logs>false</compress_logs>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_keeper_broken_logs/configs/enable_keeper2.xml b/tests/integration/test_keeper_broken_logs/configs/enable_keeper2.xml
new file mode 100644
index 00000000000..ee2ff903dff
--- /dev/null
+++ b/tests/integration/test_keeper_broken_logs/configs/enable_keeper2.xml
@@ -0,0 +1,43 @@
+<clickhouse>
+    <keeper_server>
+        <use_cluster>false</use_cluster>
+        <tcp_port>9181</tcp_port>
+        <server_id>2</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <snapshot_distance>75</snapshot_distance>
+            <raft_logs_level>trace</raft_logs_level>
+            <compress_logs>false</compress_logs>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_keeper_broken_logs/configs/enable_keeper3.xml b/tests/integration/test_keeper_broken_logs/configs/enable_keeper3.xml
new file mode 100644
index 00000000000..a16fea43125
--- /dev/null
+++ b/tests/integration/test_keeper_broken_logs/configs/enable_keeper3.xml
@@ -0,0 +1,43 @@
+<clickhouse>
+    <keeper_server>
+        <use_cluster>false</use_cluster>
+        <tcp_port>9181</tcp_port>
+        <server_id>3</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <snapshot_distance>75</snapshot_distance>
+            <raft_logs_level>trace</raft_logs_level>
+            <compress_logs>false</compress_logs>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>9234</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_keeper_broken_logs/test.py b/tests/integration/test_keeper_broken_logs/test.py
new file mode 100644
index 00000000000..4369970239a
--- /dev/null
+++ b/tests/integration/test_keeper_broken_logs/test.py
@@ -0,0 +1,113 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+import helpers.keeper_utils as keeper_utils
+import random
+import string
+import os
+import time
+from multiprocessing.dummy import Pool
+from helpers.network import PartitionManager
+from helpers.test_tools import assert_eq_with_retry
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=["configs/enable_keeper1.xml"],
+    stay_alive=True,
+)
+node2 = cluster.add_instance(
+    "node2",
+    main_configs=["configs/enable_keeper2.xml"],
+    stay_alive=True,
+)
+node3 = cluster.add_instance(
+    "node3",
+    main_configs=["configs/enable_keeper3.xml"],
+    stay_alive=True,
+)
+
+from kazoo.client import KazooClient, KazooState
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def smaller_exception(ex):
+    return "\n".join(str(ex).split("\n")[0:2])
+
+
+def wait_nodes():
+    keeper_utils.wait_nodes(cluster, [node1, node2, node3])
+
+
+def get_fake_zk(nodename, timeout=30.0):
+    _fake_zk_instance = KazooClient(
+        hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout
+    )
+    _fake_zk_instance.start()
+    return _fake_zk_instance
+
+
+def test_single_node_broken_log(started_cluster):
+    try:
+        wait_nodes()
+        node1_conn = get_fake_zk("node1")
+
+        # Cleanup
+        if node1_conn.exists("/test_broken_log") != None:
+            node1_conn.delete("/test_broken_log")
+
+        node1_conn.create("/test_broken_log")
+        for _ in range(10):
+            node1_conn.create(f"/test_broken_log/node", b"somedata1", sequence=True)
+
+        def verify_nodes(zk_conn):
+            children = zk_conn.get_children("/test_broken_log")
+            assert len(children) == 10
+
+            for child in children:
+                assert zk_conn.get("/test_broken_log/" + child)[0] == b"somedata1"
+
+        verify_nodes(node1_conn)
+
+        node1_conn.stop()
+        node1_conn.close()
+
+        node1.stop_clickhouse()
+        node1.exec_in_container(["truncate", "-s", "-50", "/var/lib/clickhouse/coordination/log/changelog_1_100000.bin"])
+        node1.start_clickhouse()
+        keeper_utils.wait_until_connected(cluster, node1)
+
+        node1_conn = get_fake_zk("node1")
+        node1_conn.create(f"/test_broken_log_final_node", b"somedata1")
+
+        verify_nodes(node1_conn)
+        assert node1_conn.get("/test_broken_log_final_node")[0] == b"somedata1"
+
+        node2_conn = get_fake_zk("node2")
+        verify_nodes(node2_conn)
+        assert node2_conn.get("/test_broken_log_final_node")[0] == b"somedata1"
+
+        node3_conn = get_fake_zk("node2")
+        verify_nodes(node3_conn)
+        assert node3_conn.get("/test_broken_log_final_node")[0] == b"somedata1"
+
+        assert node1.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) == "changelog_1_100000.bin\nchangelog_14_100013.bin\n"
+        assert node2.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) == "changelog_1_100000.bin\n"
+        assert node3.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) == "changelog_1_100000.bin\n"
+    finally:
+        try:
+            for zk_conn in [node1_conn, node2_conn, node3_conn]:
+                zk_conn.stop()
+                zk_conn.close()
+        except:
+            pass
+

From 04431e2ce0a35b5fe2c45edcae7bdf777539d421 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 13 Nov 2023 12:28:11 +0000
Subject: [PATCH 0444/1097] Test was inexact and now it's better (by pure luck)

---
 .../01927_query_views_log_current_database.reference        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.reference b/tests/queries/0_stateless/01927_query_views_log_current_database.reference
index 64bf1e501f3..9912e3b059d 100644
--- a/tests/queries/0_stateless/01927_query_views_log_current_database.reference
+++ b/tests/queries/0_stateless/01927_query_views_log_current_database.reference
@@ -7,7 +7,7 @@ databases:            ['_table_function','default']
 tables:               ['_table_function.numbers','default.table_a','default.table_b','default.table_c']
 views:                ['default.matview_a_to_b','default.matview_b_to_c']
 sleep_calls:          200
-sleep_us:             298
+sleep_us:             300
 profile_select_rows:  300
 profile_select_bytes: 3600
 profile_insert_rows:  201
@@ -23,7 +23,7 @@ view_query:           SELECT toFloat64(a) AS a, b + sleepEachRow(0.000001) AS co
 read_rows:            100
 written_rows:         100
 sleep_calls:          100
-sleep_us:             99
+sleep_us:             100
 profile_select_rows:  100
 profile_select_bytes: 2000
 profile_insert_rows:  100
@@ -40,7 +40,7 @@ view_query:           SELECT sum(a + sleepEachRow(0.000002)) AS a FROM default.t
 read_rows:            100
 written_rows:         1
 sleep_calls:          100
-sleep_us:             199
+sleep_us:             200
 profile_select_rows:  100
 profile_select_bytes: 800
 profile_insert_rows:  1

From a9725f22e176a04447a8a705dabbf2b1de1bcdcb Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 13 Nov 2023 12:38:02 +0000
Subject: [PATCH 0445/1097] Automatic style fix

---
 .../test_keeper_broken_logs/test.py           | 25 +++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_keeper_broken_logs/test.py b/tests/integration/test_keeper_broken_logs/test.py
index 4369970239a..e283d946174 100644
--- a/tests/integration/test_keeper_broken_logs/test.py
+++ b/tests/integration/test_keeper_broken_logs/test.py
@@ -82,7 +82,14 @@ def test_single_node_broken_log(started_cluster):
         node1_conn.close()
 
         node1.stop_clickhouse()
-        node1.exec_in_container(["truncate", "-s", "-50", "/var/lib/clickhouse/coordination/log/changelog_1_100000.bin"])
+        node1.exec_in_container(
+            [
+                "truncate",
+                "-s",
+                "-50",
+                "/var/lib/clickhouse/coordination/log/changelog_1_100000.bin",
+            ]
+        )
         node1.start_clickhouse()
         keeper_utils.wait_until_connected(cluster, node1)
 
@@ -100,9 +107,18 @@ def test_single_node_broken_log(started_cluster):
         verify_nodes(node3_conn)
         assert node3_conn.get("/test_broken_log_final_node")[0] == b"somedata1"
 
-        assert node1.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) == "changelog_1_100000.bin\nchangelog_14_100013.bin\n"
-        assert node2.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) == "changelog_1_100000.bin\n"
-        assert node3.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) == "changelog_1_100000.bin\n"
+        assert (
+            node1.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"])
+            == "changelog_1_100000.bin\nchangelog_14_100013.bin\n"
+        )
+        assert (
+            node2.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"])
+            == "changelog_1_100000.bin\n"
+        )
+        assert (
+            node3.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"])
+            == "changelog_1_100000.bin\n"
+        )
     finally:
         try:
             for zk_conn in [node1_conn, node2_conn, node3_conn]:
@@ -110,4 +126,3 @@ def test_single_node_broken_log(started_cluster):
                 zk_conn.close()
         except:
             pass
-

From b8992703724ceb5f9ed7bc26ded289d4544ee013 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 13 Nov 2023 10:32:48 +0000
Subject: [PATCH 0446/1097] Remove default value for argument 'ignore_aliases'
 from IAST::updateTreeHash()

---
 src/Client/QueryFuzzer.cpp                                 | 4 ++--
 src/Compression/CompressionCodecDeflateQpl.cpp             | 2 +-
 src/Compression/CompressionCodecDelta.cpp                  | 2 +-
 src/Compression/CompressionCodecDoubleDelta.cpp            | 2 +-
 src/Compression/CompressionCodecEncrypted.cpp              | 2 +-
 src/Compression/CompressionCodecFPC.cpp                    | 2 +-
 src/Compression/CompressionCodecGCD.cpp                    | 2 +-
 src/Compression/CompressionCodecGorilla.cpp                | 2 +-
 src/Compression/CompressionCodecLZ4.cpp                    | 2 +-
 src/Compression/CompressionCodecNone.cpp                   | 2 +-
 src/Compression/CompressionCodecT64.cpp                    | 2 +-
 src/Compression/CompressionCodecZSTD.cpp                   | 2 +-
 src/Interpreters/AsynchronousInsertQueue.cpp               | 2 +-
 src/Parsers/IAST.h                                         | 2 +-
 src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp | 2 +-
 15 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp
index 86cedf65345..9b6689927c5 100644
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@@ -517,9 +517,9 @@ void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create)
     SipHash sip_hash;
     sip_hash.update(original_name);
     if (create.columns_list)
-        create.columns_list->updateTreeHash(sip_hash);
+        create.columns_list->updateTreeHash(sip_hash, /*ignore_aliases=*/ true);
     if (create.storage)
-        create.storage->updateTreeHash(sip_hash);
+        create.storage->updateTreeHash(sip_hash, /*ignore_aliases=*/ true);
 
     const auto hash = getSipHash128AsPair(sip_hash);
 
diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp
index 76dc5f824e3..aeef813ecc9 100644
--- a/src/Compression/CompressionCodecDeflateQpl.cpp
+++ b/src/Compression/CompressionCodecDeflateQpl.cpp
@@ -377,7 +377,7 @@ uint8_t CompressionCodecDeflateQpl::getMethodByte() const
 
 void CompressionCodecDeflateQpl::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
 }
 
 UInt32 CompressionCodecDeflateQpl::getMaxCompressedDataSize(UInt32 uncompressed_size) const
diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp
index 48b06226de5..113535489ac 100644
--- a/src/Compression/CompressionCodecDelta.cpp
+++ b/src/Compression/CompressionCodecDelta.cpp
@@ -56,7 +56,7 @@ uint8_t CompressionCodecDelta::getMethodByte() const
 
 void CompressionCodecDelta::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
 }
 
 namespace
diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp
index 544fde4f587..60e81bbb640 100644
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -466,7 +466,7 @@ uint8_t CompressionCodecDoubleDelta::getMethodByte() const
 
 void CompressionCodecDoubleDelta::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
     hash.update(data_bytes_size);
 }
 
diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp
index 4e25f77c38e..8d945417fc1 100644
--- a/src/Compression/CompressionCodecEncrypted.cpp
+++ b/src/Compression/CompressionCodecEncrypted.cpp
@@ -576,7 +576,7 @@ uint8_t CompressionCodecEncrypted::getMethodByte() const
 
 void CompressionCodecEncrypted::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
 }
 
 UInt32 CompressionCodecEncrypted::getMaxCompressedDataSize(UInt32 uncompressed_size) const
diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index 8c3e518ed62..506093bbe49 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -66,7 +66,7 @@ uint8_t CompressionCodecFPC::getMethodByte() const
 
 void CompressionCodecFPC::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
 }
 
 CompressionCodecFPC::CompressionCodecFPC(UInt8 float_size, UInt8 compression_level)
diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp
index 1ae3093c48c..d51e75067a1 100644
--- a/src/Compression/CompressionCodecGCD.cpp
+++ b/src/Compression/CompressionCodecGCD.cpp
@@ -64,7 +64,7 @@ uint8_t CompressionCodecGCD::getMethodByte() const
 
 void CompressionCodecGCD::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
 }
 
 namespace
diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp
index d96a9979da4..7e36a424f03 100644
--- a/src/Compression/CompressionCodecGorilla.cpp
+++ b/src/Compression/CompressionCodecGorilla.cpp
@@ -364,7 +364,7 @@ uint8_t CompressionCodecGorilla::getMethodByte() const
 
 void CompressionCodecGorilla::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
     hash.update(data_bytes_size);
 }
 
diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp
index 3dbb6be9a99..ae739ab4e6b 100644
--- a/src/Compression/CompressionCodecLZ4.cpp
+++ b/src/Compression/CompressionCodecLZ4.cpp
@@ -78,7 +78,7 @@ uint8_t CompressionCodecLZ4::getMethodByte() const
 
 void CompressionCodecLZ4::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
 }
 
 UInt32 CompressionCodecLZ4::getMaxCompressedDataSize(UInt32 uncompressed_size) const
diff --git a/src/Compression/CompressionCodecNone.cpp b/src/Compression/CompressionCodecNone.cpp
index ff1e68366c1..065ac4a2625 100644
--- a/src/Compression/CompressionCodecNone.cpp
+++ b/src/Compression/CompressionCodecNone.cpp
@@ -18,7 +18,7 @@ uint8_t CompressionCodecNone::getMethodByte() const
 
 void CompressionCodecNone::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
 }
 
 UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_size, char * dest) const
diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp
index 832b47bdbd0..ec7922a030d 100644
--- a/src/Compression/CompressionCodecT64.cpp
+++ b/src/Compression/CompressionCodecT64.cpp
@@ -723,7 +723,7 @@ CompressionCodecT64::CompressionCodecT64(std::optional<TypeIndex> type_idx_, Var
 
 void CompressionCodecT64::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
     hash.update(type_idx.value_or(TypeIndex::Nothing));
     hash.update(variant);
 }
diff --git a/src/Compression/CompressionCodecZSTD.cpp b/src/Compression/CompressionCodecZSTD.cpp
index 83643f05e4c..891f1acaa9a 100644
--- a/src/Compression/CompressionCodecZSTD.cpp
+++ b/src/Compression/CompressionCodecZSTD.cpp
@@ -60,7 +60,7 @@ uint8_t CompressionCodecZSTD::getMethodByte() const
 
 void CompressionCodecZSTD::updateHash(SipHash & hash) const
 {
-    getCodecDesc()->updateTreeHash(hash);
+    getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true);
 }
 
 UInt32 CompressionCodecZSTD::getMaxCompressedDataSize(UInt32 uncompressed_size) const
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index 70ce2df813c..04285a06a65 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -85,7 +85,7 @@ AsynchronousInsertQueue::InsertQuery::InsertQuery(
     SipHash siphash;
 
     siphash.update(data_kind);
-    query->updateTreeHash(siphash);
+    query->updateTreeHash(siphash, /*ignore_aliases=*/ true);
 
     if (user_id)
     {
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index c147437797d..fa2b3b0d1c0 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -83,7 +83,7 @@ public:
       */
     using Hash = CityHash_v1_0_2::uint128;
     Hash getTreeHash(bool ignore_aliases) const;
-    void updateTreeHash(SipHash & hash_state, bool ignore_aliases = true) const;
+    void updateTreeHash(SipHash & hash_state, bool ignore_aliases) const;
     virtual void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const;
 
     void dumpTree(WriteBuffer & ostr, size_t indent = 0) const;
diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index fe6fb42d0a0..2602f8b881d 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -393,7 +393,7 @@ size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTP
     SipHash hash_state;
     hash_state.update(result_column_type->getName());
 
-    expression->updateTreeHash(hash_state);
+    expression->updateTreeHash(hash_state, /*ignore_aliases=*/ true);
 
     for (const auto & info : replaced_literals)
         hash_state.update(info.type->getName());

From d9fbab554f1f0815409546e9ac814b694f5c9088 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Mon, 13 Nov 2023 14:52:32 +0100
Subject: [PATCH 0447/1097] remove useless include

---
 src/Coordination/CoordinationSettings.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index 469b69e5243..cdd691f6a79 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -1,5 +1,4 @@
 #include <Coordination/CoordinationSettings.h>
-#include "Common/Exception.h"
 #include <Common/logger_useful.h>
 #include <filesystem>
 #include <Coordination/Defines.h>

From c93efc929af894d776fd2ff92248acbcc869f106 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Mon, 13 Nov 2023 16:54:21 +0300
Subject: [PATCH 0448/1097] fix typo in the doc

---
 docs/en/sql-reference/dictionaries/index.md                   | 4 ++--
 .../external-dictionaries/external-dicts-dict-sources.md      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md
index 7d8d04f80a0..4f021b25809 100644
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@@ -1769,7 +1769,7 @@ Example of settings:
         <password>qwerty123</password>
         <keyspase>database_name</keyspase>
         <column_family>table_name</column_family>
-        <allow_filering>1</allow_filering>
+        <allow_filtering>1</allow_filtering>
         <partition_key_prefix>1</partition_key_prefix>
         <consistency>One</consistency>
         <where>"SomeColumn" = 42</where>
@@ -1787,7 +1787,7 @@ Setting fields:
 - `password` – Password of the Cassandra user.
 - `keyspace` – Name of the keyspace (database).
 - `column_family` – Name of the column family (table).
-- `allow_filering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1.
+- `allow_filtering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1.
 - `partition_key_prefix` – Number of partition key columns in primary key of the Cassandra table. Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. Default value is 1 (the first key column is a partition key and other key columns are clustering key).
 - `consistency` – Consistency level. Possible values: `One`, `Two`, `Three`, `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default value is `One`.
 - `where` – Optional selection criteria.
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
index 5bd0d9ed206..9bd4c48c6a0 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@@ -723,7 +723,7 @@ SOURCE(REDIS(
         <password>qwerty123</password>
         <keyspase>database_name</keyspase>
         <column_family>table_name</column_family>
-        <allow_filering>1</allow_filering>
+        <allow_filtering>1</allow_filtering>
         <partition_key_prefix>1</partition_key_prefix>
         <consistency>One</consistency>
         <where>"SomeColumn" = 42</where>
@@ -741,7 +741,7 @@ SOURCE(REDIS(
 -   `password` – пароль для соединения с Cassandra.
 -   `keyspace` – имя keyspace (база данных).
 -   `column_family` – имя семейства столбцов (таблица).
--   `allow_filering` – флаг, разрешающий или не разрешающий потенциально дорогостоящие условия на кластеризации ключевых столбцов. Значение по умолчанию: 1.
+-   `allow_filtering` – флаг, разрешающий или не разрешающий потенциально дорогостоящие условия на кластеризации ключевых столбцов. Значение по умолчанию: 1.
 -   `partition_key_prefix` – количество партиций ключевых столбцов в первичном ключе таблицы Cassandra.
 Необходимо для составления ключей словаря. Порядок ключевых столбцов в определении словаря должен быть таким же, как в Cassandra.
 Значение по умолчанию: 1 (первый ключевой столбец - это ключ партицирования, остальные ключевые столбцы - ключи кластеризации).

From 876b2cd7bc4971f026b0f414e7a1fb36f00277c3 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 13 Nov 2023 15:10:57 +0100
Subject: [PATCH 0449/1097] Remove unused code

---
 src/Storages/MergeTree/MergeTreeData.cpp | 37 ------------------------
 src/Storages/MergeTree/MergeTreeData.h   |  2 --
 2 files changed, 39 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 1c0f9208fef..64be52c4951 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1555,43 +1555,6 @@ std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
 }
 
 
-void MergeTreeData::loadDataPartsFromWAL(MutableDataPartsVector & parts_from_wal)
-{
-    std::sort(parts_from_wal.begin(), parts_from_wal.end(), [](const auto & lhs, const auto & rhs)
-    {
-        return std::tie(lhs->info.level, lhs->info.mutation) > std::tie(rhs->info.level, rhs->info.mutation);
-    });
-
-    for (auto & part : parts_from_wal)
-    {
-        part->modification_time = time(nullptr);
-        auto lo = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo{DataPartState::Active, part->info});
-
-        if (lo != data_parts_by_state_and_info.begin() && (*std::prev(lo))->info.contains(part->info))
-            continue;
-
-        if (lo != data_parts_by_state_and_info.end() && (*lo)->info.contains(part->info))
-            continue;
-
-        part->setState(DataPartState::Active);
-        LOG_TEST(log, "loadDataPartsFromWAL: inserting {} into data_parts_indexes", part->getNameWithState());
-        auto [it, inserted] = data_parts_indexes.insert(part);
-
-        if (!inserted)
-        {
-            if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex())
-                LOG_ERROR(log, "Remove duplicate part {}", part->getDataPartStorage().getFullPath());
-            else
-                throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists but with different checksums", part->name);
-        }
-        else
-        {
-            addPartContributionToDataVolume(part);
-        }
-    }
-}
-
-
 void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::unordered_set<std::string>> expected_parts)
 {
     LOG_DEBUG(log, "Loading data parts");
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 54104849fe4..54c818da607 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1567,8 +1567,6 @@ private:
 
     std::vector<LoadPartResult> loadDataPartsFromDisk(PartLoadingTreeNodes & parts_to_load);
 
-    void loadDataPartsFromWAL(MutableDataPartsVector & parts_from_wal);
-
     /// Create zero-copy exclusive lock for part and disk. Useful for coordination of
     /// distributed operations which can lead to data duplication. Implemented only in ReplicatedMergeTree.
     virtual std::optional<ZeroCopyLock> tryCreateZeroCopyExclusiveLock(const String &, const DiskPtr &) { return std::nullopt; }

From 5c8d0c25ee1d06440aabb131213d8adbf33c09e3 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 13 Nov 2023 14:42:14 +0000
Subject: [PATCH 0450/1097] Revert "Analyzer: fix logical error with set in
 array join"

This reverts commit fede5d8fcb6f321412ec2e6bced13f88f18de226.
---
 src/Planner/Planner.cpp           |  3 ---
 src/Planner/PlannerJoinTree.cpp   |  4 ----
 src/Planner/TableExpressionData.h | 13 -------------
 3 files changed, 20 deletions(-)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 1547cd07785..5b354ccda46 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1399,9 +1399,6 @@ void Planner::buildPlanForQueryNode()
 
         if (table_expression_data.getRowLevelFilterActions())
             result_actions_to_execute.push_back(table_expression_data.getRowLevelFilterActions());
-
-        if (table_expression_data.getArrayJoinExpressionActions())
-            result_actions_to_execute.push_back(table_expression_data.getArrayJoinExpressionActions());
     }
 
     if (query_processing_info.isIntermediateStage())
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 282de24f1f6..f8770ca7c9c 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -1450,10 +1450,6 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
     }
 
     array_join_action_dag->projectInput();
-
-    auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(array_join_table_expression);
-    table_expression_data.setArrayJoinExpressionActions(array_join_action_dag);
-
     auto array_join_actions = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), array_join_action_dag);
     array_join_actions->setStepDescription("ARRAY JOIN actions");
     plan.addStep(std::move(array_join_actions));
diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h
index b53684802e2..ee5a05fe7da 100644
--- a/src/Planner/TableExpressionData.h
+++ b/src/Planner/TableExpressionData.h
@@ -280,16 +280,6 @@ public:
         filter_actions = std::move(filter_actions_value);
     }
 
-    const ActionsDAGPtr & getArrayJoinExpressionActions() const
-    {
-        return array_join_actions;
-    }
-
-    void setArrayJoinExpressionActions(ActionsDAGPtr array_join_actions_value)
-    {
-        array_join_actions = std::move(array_join_actions_value);
-    }
-
 private:
     void addColumnImpl(const NameAndTypePair & column, const ColumnIdentifier & column_identifier)
     {
@@ -314,9 +304,6 @@ private:
     /// Valid for table, table function, array join, query, union nodes
     ColumnIdentifierToColumnName column_identifier_to_column_name;
 
-    /// Valid for array join
-    ActionsDAGPtr array_join_actions;
-
     /// Valid for table, table function
     ActionsDAGPtr filter_actions;
 

From 9e91e4d671608db3c22d798399eec99303860a22 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 13 Nov 2023 14:40:06 +0000
Subject: [PATCH 0451/1097] Define BackupReferenceEntry

---
 src/Backups/BackupCoordinationFileInfos.cpp   |  8 ++++
 src/Backups/BackupCoordinationLocal.h         |  7 ---
 src/Backups/BackupCoordinationRemote.cpp      |  2 +
 src/Backups/BackupCoordinationRemote.h        |  7 ---
 src/Backups/BackupEntryReference.cpp          | 47 +++++++++++++++++++
 src/Backups/BackupEntryReference.h            | 25 ++++++++++
 src/Backups/BackupFileInfo.cpp                | 10 ++++
 src/Backups/BackupFileInfo.h                  |  3 ++
 src/Backups/BackupImpl.cpp                    | 27 ++++++++++-
 src/Backups/BackupImpl.h                      |  7 +--
 src/Backups/IBackupEntriesLazyBatch.cpp       |  2 +
 src/Backups/IBackupEntry.h                    | 12 +++++
 src/Backups/IRestoreCoordination.h            |  2 +-
 src/Backups/RestoreCoordinationLocal.cpp      |  2 +-
 src/Backups/RestoreCoordinationLocal.h        |  2 +-
 src/Backups/RestoreCoordinationRemote.cpp     | 37 +++++----------
 src/Backups/RestoreCoordinationRemote.h       |  2 +-
 src/Common/escapeForFileName.cpp              |  5 ++
 src/Common/escapeForFileName.h                |  1 +
 src/Storages/StorageKeeperMap.cpp             | 25 +++-------
 .../02911_backup_restore_keeper_map.sh        |  2 +-
 21 files changed, 170 insertions(+), 65 deletions(-)
 create mode 100644 src/Backups/BackupEntryReference.cpp
 create mode 100644 src/Backups/BackupEntryReference.h

diff --git a/src/Backups/BackupCoordinationFileInfos.cpp b/src/Backups/BackupCoordinationFileInfos.cpp
index eead742b510..ac2cbc337aa 100644
--- a/src/Backups/BackupCoordinationFileInfos.cpp
+++ b/src/Backups/BackupCoordinationFileInfos.cpp
@@ -84,6 +84,10 @@ void BackupCoordinationFileInfos::prepare() const
         for (size_t i = 0; i != file_infos_for_all_hosts.size(); ++i)
         {
             auto & info = *(file_infos_for_all_hosts[i]);
+
+            if (!info.reference_target.empty())
+                continue;
+
             info.data_file_name = info.file_name;
             info.data_file_index = i;
             info.base_size = 0; /// Base backup must not be used while creating a plain backup.
@@ -101,6 +105,10 @@ void BackupCoordinationFileInfos::prepare() const
         for (size_t i = 0; i != file_infos_for_all_hosts.size(); ++i)
         {
             auto & info = *(file_infos_for_all_hosts[i]);
+
+            if (!info.reference_target.empty())
+                continue;
+
             if (info.size == info.base_size)
             {
                 /// A file is either empty or can be get from the base backup as a whole.
diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h
index 6f8e750697c..f73cbbe29a8 100644
--- a/src/Backups/BackupCoordinationLocal.h
+++ b/src/Backups/BackupCoordinationLocal.h
@@ -66,13 +66,6 @@ private:
     BackupCoordinationKeeperMapTables keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
     std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
 
-    struct KeeperMapTableInfo
-    {
-        String table_id;
-        String data_path_in_backup;
-    };
-
-
     mutable std::mutex replicated_tables_mutex;
     mutable std::mutex replicated_access_mutex;
     mutable std::mutex replicated_sql_objects_mutex;
diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 064e0599f6e..2633e1bedd2 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -116,6 +116,7 @@ namespace
                 writeBinary(info.base_size, out);
                 writeBinary(info.base_checksum, out);
                 writeBinary(info.encrypted_by_disk, out);
+                writeBinary(info.reference_target, out);
                 /// We don't store `info.data_file_name` and `info.data_file_index` because they're determined automalically
                 /// after reading file infos for all the hosts (see the class BackupCoordinationFileInfos).
             }
@@ -138,6 +139,7 @@ namespace
                 readBinary(info.base_size, in);
                 readBinary(info.base_checksum, in);
                 readBinary(info.encrypted_by_disk, in);
+                readBinary(info.reference_target, in);
             }
             return res;
         }
diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h
index a0a9224bf71..f3b6bff3a68 100644
--- a/src/Backups/BackupCoordinationRemote.h
+++ b/src/Backups/BackupCoordinationRemote.h
@@ -114,13 +114,6 @@ private:
     mutable std::optional<BackupCoordinationKeeperMapTables> keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
     std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
 
-    struct KeeperMapTableInfo
-    {
-        String table_id;
-        String data_path_in_backup;
-    };
-
-
     mutable std::mutex zookeeper_mutex;
     mutable std::mutex replicated_tables_mutex;
     mutable std::mutex replicated_access_mutex;
diff --git a/src/Backups/BackupEntryReference.cpp b/src/Backups/BackupEntryReference.cpp
new file mode 100644
index 00000000000..0caff235225
--- /dev/null
+++ b/src/Backups/BackupEntryReference.cpp
@@ -0,0 +1,47 @@
+#include <Backups/BackupEntryReference.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+
+extern const int NOT_IMPLEMENTED;
+
+}
+
+BackupEntryReference::BackupEntryReference(std::string reference_target_)
+    : reference_target(std::move(reference_target_))
+{}
+
+bool BackupEntryReference::isReference() const
+{
+    return true;
+}
+
+String BackupEntryReference::getReferenceTarget() const
+{
+    return reference_target;
+}
+
+UInt64 BackupEntryReference::getSize() const
+{
+    return 0;
+}
+
+UInt128 BackupEntryReference::getChecksum(const ReadSettings & /*read_settings*/) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Checksum not implemented for reference backup entries");
+}
+
+std::unique_ptr<SeekableReadBuffer> BackupEntryReference::getReadBuffer(const ReadSettings & /*read_settings*/) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading not implemented for reference backup entries");
+}
+
+DataSourceDescription BackupEntryReference::getDataSourceDescription() const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Data source description not implemented for reference backup entries");
+}
+
+}
diff --git a/src/Backups/BackupEntryReference.h b/src/Backups/BackupEntryReference.h
new file mode 100644
index 00000000000..967d286ee39
--- /dev/null
+++ b/src/Backups/BackupEntryReference.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <Backups/IBackupEntry.h>
+
+namespace DB
+{
+
+/// Represents a reference to another backup entry.
+class BackupEntryReference : public IBackupEntry
+{
+public:
+    explicit BackupEntryReference(std::string reference_target_);
+
+    UInt64 getSize() const override;
+    UInt128 getChecksum(const ReadSettings & read_settings) const override;
+    std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override;
+    DataSourceDescription getDataSourceDescription() const override;
+
+    bool isReference() const override;
+    String getReferenceTarget() const override;
+private:
+    String reference_target;
+};
+
+}
diff --git a/src/Backups/BackupFileInfo.cpp b/src/Backups/BackupFileInfo.cpp
index 63427de328e..2f3942f89c4 100644
--- a/src/Backups/BackupFileInfo.cpp
+++ b/src/Backups/BackupFileInfo.cpp
@@ -89,6 +89,8 @@ String BackupFileInfo::describe() const
     result += fmt::format("data_file_name: {};\n", data_file_name);
     result += fmt::format("data_file_index: {};\n", data_file_index);
     result += fmt::format("encrypted_by_disk: {};\n", encrypted_by_disk);
+    if (!reference_target.empty())
+        result += fmt::format("reference_target: {};\n", reference_target);
     return result;
 }
 
@@ -104,6 +106,14 @@ BackupFileInfo buildFileInfoForBackupEntry(
 
     BackupFileInfo info;
     info.file_name = adjusted_path;
+
+    /// If it's a "reference" just set the target to a concrete file
+    if (backup_entry->isReference())
+    {
+        info.reference_target = removeLeadingSlash(backup_entry->getReferenceTarget());
+        return info;
+    }
+
     info.size = backup_entry->getSize();
     info.encrypted_by_disk = backup_entry->isEncryptedByDisk();
 
diff --git a/src/Backups/BackupFileInfo.h b/src/Backups/BackupFileInfo.h
index 63da6f23427..1d5607fd418 100644
--- a/src/Backups/BackupFileInfo.h
+++ b/src/Backups/BackupFileInfo.h
@@ -39,6 +39,9 @@ struct BackupFileInfo
     /// Whether this file is encrypted by an encrypted disk.
     bool encrypted_by_disk = false;
 
+    /// Set if this file is just a reference to another file
+    String reference_target;
+
     struct LessByFileName
     {
         bool operator()(const BackupFileInfo & lhs, const BackupFileInfo & rhs) const { return (lhs.file_name < rhs.file_name); }
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index 165cb63456a..bf1853828df 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -362,10 +362,10 @@ void BackupImpl::writeBackupMetadata()
         *out << "<file>";
 
         *out << "<name>" << xml << info.file_name << "</name>";
-        *out << "<size>" << info.size << "</size>";
 
         if (info.size)
         {
+            *out << "<size>" << info.size << "</size>";
             *out << "<checksum>" << hexChecksum(info.checksum) << "</checksum>";
             if (info.base_size)
             {
@@ -381,6 +381,10 @@ void BackupImpl::writeBackupMetadata()
             if (info.encrypted_by_disk)
                 *out << "<encrypted_by_disk>true</encrypted_by_disk>";
         }
+        else if (!info.reference_target.empty())
+            *out << "<reference_target>" << xml << info.reference_target << "</reference_target>";
+        else
+            *out << "<size>" << info.size << "</size>";
 
         total_size += info.size;
         bool has_entry = !deduplicate_files || (info.size && (info.size != info.base_size) && (info.data_file_name.empty() || (info.data_file_name == info.file_name)));
@@ -452,6 +456,7 @@ void BackupImpl::readBackupMetadata()
     size_of_entries = 0;
 
     const auto * contents = config_root->getNodeByPath("contents");
+    std::vector<std::pair<String /*source*/, String /*target*/>> reference_files;
     for (const Poco::XML::Node * child = contents->firstChild(); child; child = child->nextSibling())
     {
         if (child->nodeName() == "file")
@@ -459,6 +464,14 @@ void BackupImpl::readBackupMetadata()
             const Poco::XML::Node * file_config = child;
             BackupFileInfo info;
             info.file_name = getString(file_config, "name");
+
+            info.reference_target = getString(file_config, "reference_target", "");
+            if (!info.reference_target.empty())
+            {
+                reference_files.emplace_back(std::move(info.file_name), std::move(info.reference_target));
+                continue;
+            }
+
             info.size = getUInt64(file_config, "size");
             if (info.size)
             {
@@ -508,6 +521,14 @@ void BackupImpl::readBackupMetadata()
         }
     }
 
+    for (auto & [source_file, target_file] : reference_files)
+    {
+        auto it = file_names.find(target_file);
+        if (it == file_names.end())
+            throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup entry {} referenced by {} not found", target_file, source_file);
+        file_names.emplace(std::move(source_file), it->second);
+    }
+
     uncompressed_size = size_of_entries + str.size();
     compressed_size = uncompressed_size;
     if (!use_archive)
@@ -873,6 +894,10 @@ size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum,
 
 void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
 {
+    /// we don't write anything for reference files
+    if (entry->isReference())
+        return;
+
     if (open_mode != OpenMode::WRITE)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for writing");
 
diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h
index 6070db79aa6..a4ab3d84d0c 100644
--- a/src/Backups/BackupImpl.h
+++ b/src/Backups/BackupImpl.h
@@ -72,14 +72,11 @@ public:
     Strings listFiles(const String & directory, bool recursive) const override;
     bool hasFiles(const String & directory) const override;
     bool fileExists(const String & file_name) const override;
-    bool fileExists(const SizeAndChecksum & size_and_checksum) const override;
     UInt64 getFileSize(const String & file_name) const override;
     UInt128 getFileChecksum(const String & file_name) const override;
     SizeAndChecksum getFileSizeAndChecksum(const String & file_name) const override;
     std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) const override;
-    std::unique_ptr<SeekableReadBuffer> readFile(const SizeAndChecksum & size_and_checksum) const override;
     size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
-    size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
     void writeFile(const BackupFileInfo & info, BackupEntryPtr entry) override;
     void finalizeWriting() override;
     bool supportsWritingInMultipleThreads() const override { return !use_archive; }
@@ -114,6 +111,10 @@ private:
 
     std::unique_ptr<SeekableReadBuffer> readFileImpl(const SizeAndChecksum & size_and_checksum, bool read_encrypted) const;
 
+    bool fileExists(const SizeAndChecksum & size_and_checksum) const override;
+    std::unique_ptr<SeekableReadBuffer> readFile(const SizeAndChecksum & size_and_checksum) const override;
+    size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
+
     BackupInfo backup_info;
     const String backup_name_for_logging;
     const bool use_archive;
diff --git a/src/Backups/IBackupEntriesLazyBatch.cpp b/src/Backups/IBackupEntriesLazyBatch.cpp
index 4974d9f6702..7468560178f 100644
--- a/src/Backups/IBackupEntriesLazyBatch.cpp
+++ b/src/Backups/IBackupEntriesLazyBatch.cpp
@@ -27,6 +27,8 @@ public:
     bool isFromImmutableFile() const override { return getInternalBackupEntry()->isFromImmutableFile(); }
     String getFilePath() const override { return getInternalBackupEntry()->getFilePath(); }
     DiskPtr getDisk() const override { return getInternalBackupEntry()->getDisk(); }
+    bool isReference() const override { return getInternalBackupEntry()->isReference(); }
+    String getReferenceTarget() const override { return getInternalBackupEntry()->getReferenceTarget(); }
 
 private:
     BackupEntryPtr getInternalBackupEntry() const
diff --git a/src/Backups/IBackupEntry.h b/src/Backups/IBackupEntry.h
index 1b72b4358ba..bfff72d147a 100644
--- a/src/Backups/IBackupEntry.h
+++ b/src/Backups/IBackupEntry.h
@@ -9,6 +9,12 @@
 
 namespace DB
 {
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
 class SeekableReadBuffer;
 
 /// A backup entry represents some data which should be written to the backup or has been read from the backup.
@@ -39,6 +45,12 @@ public:
     virtual String getFilePath() const { return ""; }
     virtual DiskPtr getDisk() const { return nullptr; }
 
+    virtual bool isReference() const { return false; }
+    virtual String getReferenceTarget() const
+    {
+        throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "getReferenceTarget not implemented for the backup entry");
+    }
+
     virtual DataSourceDescription getDataSourceDescription() const = 0;
 };
 
diff --git a/src/Backups/IRestoreCoordination.h b/src/Backups/IRestoreCoordination.h
index 489292cb88f..7506e1d9b80 100644
--- a/src/Backups/IRestoreCoordination.h
+++ b/src/Backups/IRestoreCoordination.h
@@ -43,7 +43,7 @@ public:
 
     /// Sets that this table is going to restore data into Keeper for all KeeperMap tables defined on root_zk_path.
     /// The function returns false if data for this specific root path is already being restored by another table.
-    virtual bool acquireInsertingDataForKeeperMap(const String & root_zk_path) = 0;
+    virtual bool acquireInsertingDataForKeeperMap(const String & root_zk_path, const String & table_unique_id) = 0;
 
     /// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica,
     /// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp
index d32625c2c51..cf606151b53 100644
--- a/src/Backups/RestoreCoordinationLocal.cpp
+++ b/src/Backups/RestoreCoordinationLocal.cpp
@@ -52,7 +52,7 @@ bool RestoreCoordinationLocal::acquireReplicatedSQLObjects(const String &, UserD
     return true;
 }
 
-bool RestoreCoordinationLocal::acquireInsertingDataForKeeperMap(const String & root_zk_path)
+bool RestoreCoordinationLocal::acquireInsertingDataForKeeperMap(const String & root_zk_path, const String & /*table_unique_id*/)
 {
     std::lock_guard lock{mutex};
     return acquired_data_in_keeper_map_tables.emplace(root_zk_path).second;
diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h
index 93fbdb79d9f..7f6ffe1eeec 100644
--- a/src/Backups/RestoreCoordinationLocal.h
+++ b/src/Backups/RestoreCoordinationLocal.h
@@ -42,7 +42,7 @@ public:
 
     /// Sets that this table is going to restore data into Keeper for all KeeperMap tables defined on root_zk_path.
     /// The function returns false if data for this specific root path is already being restored by another table.
-    bool acquireInsertingDataForKeeperMap(const String & root_zk_path) override;
+    bool acquireInsertingDataForKeeperMap(const String & root_zk_path, const String & table_unique_id) override;
 
     /// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica,
     /// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp
index 1b814c2889e..b54231afcf7 100644
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@@ -235,9 +235,9 @@ bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loade
     return result;
 }
 
-bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & root_zk_path)
+bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & root_zk_path, const String & table_unique_id)
 {
-    bool result = false;
+    bool lock_acquired = false;
     auto holder = with_retries.createRetriesControlHolder("acquireInsertingDataForKeeperMap");
     holder.retries_ctl.retryLoop(
         [&, &zk = holder.faulty_zookeeper]()
@@ -246,33 +246,22 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String &
 
             /// we need to remove leading '/' from root_zk_path
             auto normalized_root_zk_path = std::string_view{root_zk_path}.substr(1);
-            std::string restore_lock_path = fs::path(zookeeper_path) / "keeper_map_tables" / normalized_root_zk_path / "restore_lock";
+            std::string restore_lock_path = fs::path(zookeeper_path) / "keeper_map_tables" / escapeForFileName(normalized_root_zk_path);
             zk->createAncestors(restore_lock_path);
-            result = zk->tryCreate(restore_lock_path, "restorelock", zkutil::CreateMode::Persistent) == Coordination::Error::ZOK;
+            auto code = zk->tryCreate(restore_lock_path, table_unique_id, zkutil::CreateMode::Persistent);
 
-            if (result)
-                return;
-
-            /// there can be an edge case where a path contains `/restore_lock/ in the middle of it
-            /// to differentiate that case from lock we also set the data
-            for (size_t i = 0; i < 1000; ++i)
+            if (code == Coordination::Error::ZOK)
             {
-                Coordination::Stat lock_stat;
-                auto data = zk->get(restore_lock_path, &lock_stat);
-                if (data == "restorelock")
-                    return;
-
-                if (auto set_result = zk->trySet(restore_lock_path, "restorelock", lock_stat.version);
-                    set_result == Coordination::Error::ZOK)
-                {
-                    result = true;
-                    return;
-                }
-                else if (set_result == Coordination::Error::ZNONODE)
-                    throw zkutil::KeeperException::fromPath(set_result, restore_lock_path);
+                lock_acquired = true;
+                return;
             }
+
+            if (code == Coordination::Error::ZNODEEXISTS)
+                lock_acquired = table_unique_id == zk->get(restore_lock_path);
+            else
+                zkutil::KeeperException::fromPath(code, restore_lock_path);
         });
-    return result;
+    return lock_acquired;
 }
 
 void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query)
diff --git a/src/Backups/RestoreCoordinationRemote.h b/src/Backups/RestoreCoordinationRemote.h
index 7d3ae4ceec9..5ff0e6de002 100644
--- a/src/Backups/RestoreCoordinationRemote.h
+++ b/src/Backups/RestoreCoordinationRemote.h
@@ -48,7 +48,7 @@ public:
 
     /// Sets that this table is going to restore data into Keeper for all KeeperMap tables defined on root_zk_path.
     /// The function returns false if data for this specific root path is already being restored by another table.
-    bool acquireInsertingDataForKeeperMap(const String & root_zk_path) override;
+    bool acquireInsertingDataForKeeperMap(const String & root_zk_path, const String & table_unique_id) override;
 
     /// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica,
     /// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
diff --git a/src/Common/escapeForFileName.cpp b/src/Common/escapeForFileName.cpp
index a1f9bff28d0..790d46a93ec 100644
--- a/src/Common/escapeForFileName.cpp
+++ b/src/Common/escapeForFileName.cpp
@@ -6,6 +6,11 @@ namespace DB
 {
 
 std::string escapeForFileName(const std::string & s)
+{
+    return escapeForFileName(std::string_view{s});
+}
+
+std::string escapeForFileName(std::string_view s)
 {
     std::string res;
     const char * pos = s.data();
diff --git a/src/Common/escapeForFileName.h b/src/Common/escapeForFileName.h
index 9ae29650804..279275f55d5 100644
--- a/src/Common/escapeForFileName.h
+++ b/src/Common/escapeForFileName.h
@@ -11,6 +11,7 @@ namespace DB
   */
 
 std::string escapeForFileName(const std::string & s);
+std::string escapeForFileName(std::string_view s);
 std::string unescapeForFileName(const std::string & s);
 
 }
diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp
index 3032973c411..237b65c6a72 100644
--- a/src/Storages/StorageKeeperMap.cpp
+++ b/src/Storages/StorageKeeperMap.cpp
@@ -46,7 +46,7 @@
 #include <Backups/IBackupCoordination.h>
 #include <Backups/IBackupEntriesLazyBatch.h>
 #include <Backups/BackupEntryFromAppendOnlyFile.h>
-#include <Backups/BackupEntryFromMemory.h>
+#include <Backups/BackupEntryReference.h>
 #include <Backups/IBackup.h>
 #include <Backups/IRestoreCoordination.h>
 #include <Backups/RestorerFromBackup.h>
@@ -643,8 +643,8 @@ NamesAndTypesList StorageKeeperMap::getVirtuals() const
 namespace
 {
 
+/// contains serialized key value pairs for all the data nodes used by this KeeperMap table
 constexpr std::string_view backup_data_filename = "data.bin";
-constexpr std::string_view backup_data_location_filename = "data_location.bin";
 
 class KeeperMapBackup : public IBackupEntriesLazyBatch, boost::noncopyable
 {
@@ -782,8 +782,9 @@ void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collec
             return;
         }
 
-        auto file_path = fs::path(my_data_path_in_backup) / backup_data_location_filename;
-        backup_entries_collector.addBackupEntries({{file_path, std::make_shared<BackupEntryFromMemory>(path_with_data)}});
+        std::string source_path = fs::path(my_data_path_in_backup) / backup_data_filename;
+        std::string target_path = fs::path(path_with_data) / backup_data_filename;
+        backup_entries_collector.addBackupEntries({{source_path, std::make_shared<BackupEntryReference>(std::move(target_path))}});
     };
 
     backup_entries_collector.addPostTask(post_collecting_task);
@@ -795,7 +796,7 @@ void StorageKeeperMap::restoreDataFromBackup(RestorerFromBackup & restorer, cons
     if (!backup->hasFiles(data_path_in_backup))
         return;
 
-    if (!restorer.getRestoreCoordination()->acquireInsertingDataForKeeperMap(zk_root_path))
+    auto table_id = toString(getStorageID().uuid); if (!restorer.getRestoreCoordination()->acquireInsertingDataForKeeperMap(zk_root_path, table_id))
     {
         /// Other table is already restoring the data for this Keeper path.
         /// Tables defined on the same path share data
@@ -853,19 +854,7 @@ void StorageKeeperMap::restoreDataImpl(
     String data_file = data_path_in_backup_fs /  backup_data_filename;
 
     if (!backup->fileExists(data_file))
-    {
-        String data_location_file = data_path_in_backup_fs / "data_location.bin";
-        if (!backup->fileExists(data_location_file))
-            throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Files {} or {} in backup are required to restore table", data_file, data_location_file);
-
-        auto in = backup->readFile(data_location_file);
-        readStringUntilEOF(data_file, *in);
-
-        data_file = fs::path(data_file) / backup_data_filename;
-
-        if (!backup->fileExists(data_file))
-            throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "File {} in backup is required to restore table", data_file);
-    }
+        throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "File {} in backup is required to restore table", data_file);
 
     /// should we store locally in temp file?
     auto in = backup->readFile(data_file);
diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
index ae7c22f6820..ccdf52a6e23 100755
--- a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
+++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
@@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -nm -q "
     DROP DATABASE IF EXISTS $database_name;
     CREATE DATABASE $database_name;
     CREATE TABLE $database_name.02911_backup_restore_keeper_map1 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
-    CREATE TABLE $database_name.02911_backup_restore_keeper_map2 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
+    CREATE TABLE $database_name.02911_backup_restore_keeper_map2 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key; -- table using same Keeper path as 02911_backup_restore_keeper_map1
     CREATE TABLE $database_name.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key;
 
     INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;

From 951d9ac1446d990f6da476217eeb5330ad5b8137 Mon Sep 17 00:00:00 2001
From: SmitaRKulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 13 Nov 2023 15:44:06 +0100
Subject: [PATCH 0452/1097] Disable randomization of
 allow_experimental_block_number_column flag (#56474)

Co-authored-by: Alexander Tokmakov <tavplubix@clickhouse.com>
---
 docker/test/stateless/stress_tests.lib        | 2 --
 tests/config/config.d/merge_tree_settings.xml | 2 --
 2 files changed, 4 deletions(-)

diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib
index 2309e307324..fc35bff5e40 100644
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@@ -74,8 +74,6 @@ function configure()
 
     randomize_config_boolean_value use_compression zookeeper
 
-    randomize_config_boolean_value allow_experimental_block_number_column merge_tree_settings
-
     # for clickhouse-server (via service)
     echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
     # for clickhouse-client
diff --git a/tests/config/config.d/merge_tree_settings.xml b/tests/config/config.d/merge_tree_settings.xml
index 7dcea114b90..f277c18fa3f 100644
--- a/tests/config/config.d/merge_tree_settings.xml
+++ b/tests/config/config.d/merge_tree_settings.xml
@@ -2,7 +2,5 @@
     <merge_tree>
         <!-- 10 seconds (default is 1 minute) -->
         <zookeeper_session_expiration_check_period>10</zookeeper_session_expiration_check_period>
-        <!-- Setting randomized for stress test, it is disabled here and this line is used to randomize it in the script -->
-        <allow_experimental_block_number_column>0</allow_experimental_block_number_column>
     </merge_tree>
 </clickhouse>

From 993f7529001db2872394da5b2b36e8e45fc69d64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 13 Nov 2023 12:53:09 +0000
Subject: [PATCH 0453/1097] Revert "Merge pull request #56543 from
 rschu1ze/bump-grpc"

This reverts commit 213c7cffb5eae1951f67b8531ec69262696c7e3d, reversing
changes made to 9ed47749dee12e900875ff4c6214a177fa07a94c.
---
 contrib/grpc                           | 2 +-
 contrib/sparse-checkout/update-grpc.sh | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/contrib/grpc b/contrib/grpc
index b723ecae099..267af8c3a1e 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit b723ecae0991bb873fe87a595dfb187178733fde
+Subproject commit 267af8c3a1ea4a5a4d9e5a070ad2d1ac7c701923
diff --git a/contrib/sparse-checkout/update-grpc.sh b/contrib/sparse-checkout/update-grpc.sh
index 21628ce8dd1..4571bd6307d 100755
--- a/contrib/sparse-checkout/update-grpc.sh
+++ b/contrib/sparse-checkout/update-grpc.sh
@@ -7,7 +7,6 @@ echo '/*' > $FILES_TO_CHECKOUT
 echo '!/test/*' >> $FILES_TO_CHECKOUT
 echo '/test/build/*' >> $FILES_TO_CHECKOUT
 echo '/test/core/tsi/alts/fake_handshaker/*' >> $FILES_TO_CHECKOUT
-echo '/test/core/event_engine/fuzzing_event_engine/*' >> $FILES_TO_CHECKOUT
 echo '!/tools/*' >> $FILES_TO_CHECKOUT
 echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
 echo '!/examples/*' >> $FILES_TO_CHECKOUT

From 51e8c58a38687698eeb63af7bf18f48c9f1ed854 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 13 Nov 2023 14:12:16 +0000
Subject: [PATCH 0454/1097] Revert "Revert "Merge pull request #56543 from
 rschu1ze/bump-grpc""

This reverts commit d4840adfd4708729ed8f969e61116f58ffe2fb18.
---
 contrib/grpc                           | 2 +-
 contrib/sparse-checkout/update-grpc.sh | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index 267af8c3a1e..b723ecae099 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit 267af8c3a1ea4a5a4d9e5a070ad2d1ac7c701923
+Subproject commit b723ecae0991bb873fe87a595dfb187178733fde
diff --git a/contrib/sparse-checkout/update-grpc.sh b/contrib/sparse-checkout/update-grpc.sh
index 4571bd6307d..21628ce8dd1 100755
--- a/contrib/sparse-checkout/update-grpc.sh
+++ b/contrib/sparse-checkout/update-grpc.sh
@@ -7,6 +7,7 @@ echo '/*' > $FILES_TO_CHECKOUT
 echo '!/test/*' >> $FILES_TO_CHECKOUT
 echo '/test/build/*' >> $FILES_TO_CHECKOUT
 echo '/test/core/tsi/alts/fake_handshaker/*' >> $FILES_TO_CHECKOUT
+echo '/test/core/event_engine/fuzzing_event_engine/*' >> $FILES_TO_CHECKOUT
 echo '!/tools/*' >> $FILES_TO_CHECKOUT
 echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
 echo '!/examples/*' >> $FILES_TO_CHECKOUT

From ed7bfeff6999e69e749acacd1d9e20e2828ccb12 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 13 Nov 2023 14:14:13 +0000
Subject: [PATCH 0455/1097] Update gRPC to not include systemd

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index b723ecae099..6e5e645de7c 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit b723ecae0991bb873fe87a595dfb187178733fde
+Subproject commit 6e5e645de7cb0604e3ad4ba55abff2eca38c1803

From 73d058967a94cab6ec52152bea9c7fe50bc0d2ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 13 Nov 2023 12:07:43 +0000
Subject: [PATCH 0456/1097] Add back flaky tests to analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 6021e7ccf38..8cb921bdd19 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -11,6 +11,7 @@
 01244_optimize_distributed_group_by_sharding_key
 01268_mv_scalars
 01268_shard_avgweighted
+01287_max_execution_speed
 01455_shard_leaf_max_rows_bytes_to_read
 01495_subqueries_in_with_statement
 01560_merge_distributed_join
@@ -42,6 +43,8 @@
 00917_multiple_joins_denny_crane
 00636_partition_key_parts_pruning
 01825_type_json_multiple_files
+02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
+02404_memory_bound_merging
 02725_agg_projection_resprect_PK
 02721_url_cluster
 02534_s3_cluster_insert_select_schema_inference

From 2ad98a58c9a744ccd43651ed1c14bcf4f07182b2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 16:51:01 +0100
Subject: [PATCH 0457/1097] Better caching of arrow build (#56657)

* Better caching of Arrow build
* Fix error
* Remove useless files
---
 contrib/arrow-cmake/CMakeLists.txt            |  9 ++-
 .../arrow-cmake/cpp/src/arrow/util/config.h   | 61 +++++++++++++++++++
 .../cpp/src/orc/c++/include/orc/orc-config.hh | 38 ++++++++++++
 src/Processors/examples/CMakeLists.txt        |  6 --
 src/Processors/examples/native_orc.cpp        | 36 -----------
 .../examples/processors_test_aggregation.cpp  |  0
 ...rocessors_test_merge_sorting_transform.cpp |  0
 7 files changed, 103 insertions(+), 47 deletions(-)
 create mode 100644 contrib/arrow-cmake/cpp/src/arrow/util/config.h
 create mode 100644 contrib/arrow-cmake/cpp/src/orc/c++/include/orc/orc-config.hh
 delete mode 100644 src/Processors/examples/native_orc.cpp
 delete mode 100644 src/Processors/examples/processors_test_aggregation.cpp
 delete mode 100644 src/Processors/examples/processors_test_merge_sorting_transform.cpp

diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index c45d75bb3f2..71133451889 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -109,7 +109,6 @@ set (ORC_CXX_HAS_CSTDINT 1)
 set (ORC_CXX_HAS_THREAD_LOCAL 1)
 
 include(orc_check.cmake)
-configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DIR}/orc/orc-config.hh")
 configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/Adaptor.hh")
 
 
@@ -198,7 +197,9 @@ target_link_libraries(_orc PRIVATE
     ch_contrib::snappy
     ch_contrib::zlib
     ch_contrib::zstd)
-target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR})
+target_include_directories(_orc SYSTEM BEFORE PUBLIC
+    ${ORC_INCLUDE_DIR}
+    "${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src/orc/c++/include")
 target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_BUILD_INCLUDE_DIR})
 target_include_directories(_orc SYSTEM PRIVATE
     ${ORC_SOURCE_SRC_DIR}
@@ -212,8 +213,6 @@ target_include_directories(_orc SYSTEM PRIVATE
 
 set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow")
 
-configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cpp/src/arrow/util/config.h")
-
 # arrow/cpp/src/arrow/CMakeLists.txt (ARROW_SRCS + ARROW_COMPUTE + ARROW_IPC)
 set(ARROW_SRCS
         "${LIBRARY_DIR}/array/array_base.cc"
@@ -450,7 +449,7 @@ target_link_libraries(_arrow PUBLIC _orc)
 add_dependencies(_arrow protoc)
 
 target_include_directories(_arrow SYSTEM BEFORE PUBLIC ${ARROW_SRC_DIR})
-target_include_directories(_arrow SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/cpp/src")
+target_include_directories(_arrow SYSTEM BEFORE PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src")
 
 target_include_directories(_arrow SYSTEM PRIVATE ${ARROW_SRC_DIR})
 target_include_directories(_arrow SYSTEM PRIVATE ${HDFS_INCLUDE_DIR})
diff --git a/contrib/arrow-cmake/cpp/src/arrow/util/config.h b/contrib/arrow-cmake/cpp/src/arrow/util/config.h
new file mode 100644
index 00000000000..cacff7b16cb
--- /dev/null
+++ b/contrib/arrow-cmake/cpp/src/arrow/util/config.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#define ARROW_VERSION_MAJOR 11
+#define ARROW_VERSION_MINOR 0
+#define ARROW_VERSION_PATCH 0
+#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
+
+#define ARROW_VERSION_STRING "11.0.0"
+
+#define ARROW_SO_VERSION "1100"
+#define ARROW_FULL_SO_VERSION "1100.0.0"
+
+#define ARROW_CXX_COMPILER_ID "Clang"
+#define ARROW_CXX_COMPILER_VERSION "ClickHouse"
+#define ARROW_CXX_COMPILER_FLAGS ""
+
+#define ARROW_BUILD_TYPE ""
+
+#define ARROW_GIT_ID ""
+#define ARROW_GIT_DESCRIPTION ""
+
+#define ARROW_PACKAGE_KIND ""
+
+/* #undef ARROW_COMPUTE */
+/* #undef ARROW_CSV */
+/* #undef ARROW_CUDA */
+/* #undef ARROW_DATASET */
+/* #undef ARROW_FILESYSTEM */
+/* #undef ARROW_FLIGHT */
+/* #undef ARROW_FLIGHT_SQL */
+/* #undef ARROW_IPC */
+/* #undef ARROW_JEMALLOC */
+/* #undef ARROW_JEMALLOC_VENDORED */
+/* #undef ARROW_JSON */
+/* #undef ARROW_ORC */
+/* #undef ARROW_PARQUET */
+/* #undef ARROW_SUBSTRAIT */
+
+/* #undef ARROW_GCS */
+/* #undef ARROW_S3 */
+/* #undef ARROW_USE_NATIVE_INT128 */
+/* #undef ARROW_WITH_MUSL */
+/* #undef ARROW_WITH_OPENTELEMETRY */
+/* #undef ARROW_WITH_UCX */
+
+/* #undef GRPCPP_PP_INCLUDE */
diff --git a/contrib/arrow-cmake/cpp/src/orc/c++/include/orc/orc-config.hh b/contrib/arrow-cmake/cpp/src/orc/c++/include/orc/orc-config.hh
new file mode 100644
index 00000000000..1b0f71ddd40
--- /dev/null
+++ b/contrib/arrow-cmake/cpp/src/orc/c++/include/orc/orc-config.hh
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_CONFIG_HH
+#define ORC_CONFIG_HH
+
+#define ORC_VERSION ""
+
+#define ORC_CXX_HAS_CSTDINT
+
+#ifdef ORC_CXX_HAS_CSTDINT
+  #include <cstdint>
+#else
+  #include <stdint.h>
+#endif
+
+// Following MACROS should be keeped for backward compatibility.
+#define ORC_NOEXCEPT noexcept
+#define ORC_NULLPTR nullptr
+#define ORC_OVERRIDE override
+#define ORC_UNIQUE_PTR std::unique_ptr
+
+#endif
diff --git a/src/Processors/examples/CMakeLists.txt b/src/Processors/examples/CMakeLists.txt
index 0c8734aee3c..5d43a0d7d08 100644
--- a/src/Processors/examples/CMakeLists.txt
+++ b/src/Processors/examples/CMakeLists.txt
@@ -2,9 +2,3 @@ if (TARGET ch_contrib::hivemetastore)
     clickhouse_add_executable (comma_separated_streams comma_separated_streams.cpp)
     target_link_libraries (comma_separated_streams PRIVATE dbms)
 endif()
-
-if (USE_ORC)
-    clickhouse_add_executable (native_orc native_orc.cpp)
-    target_link_libraries (native_orc PRIVATE dbms)
-    target_include_directories (native_orc PRIVATE ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include)
-endif ()
diff --git a/src/Processors/examples/native_orc.cpp b/src/Processors/examples/native_orc.cpp
deleted file mode 100644
index 201e87b1f56..00000000000
--- a/src/Processors/examples/native_orc.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <string>
-#include <IO/ReadBufferFromFile.h>
-#include <Processors/Formats/Impl/NativeORCBlockInputFormat.h>
-#include <IO/copyData.h>
-
-using namespace DB;
-
-int main()
-{
-    /// Read schema from orc file
-    String path = "/path/to/orc/file";
-    // String path = "/data1/clickhouse_official/data/user_files/bigolive_audience_stats_orc.orc";
-    {
-        ReadBufferFromFile in(path);
-        NativeORCSchemaReader schema_reader(in, {});
-        auto schema = schema_reader.readSchema();
-        std::cout << "schema:" << schema.toString() << std::endl;
-    }
-
-    /// Read schema from string with orc data
-    {
-        ReadBufferFromFile in(path);
-
-        String content;
-        WriteBufferFromString out(content);
-
-        copyData(in, out);
-
-        content.resize(out.count());
-        ReadBufferFromString in2(content);
-        NativeORCSchemaReader schema_reader(in2, {});
-        auto schema = schema_reader.readSchema();
-        std::cout << "schema:" << schema.toString() << std::endl;
-    }
-    return 0;
-}
diff --git a/src/Processors/examples/processors_test_aggregation.cpp b/src/Processors/examples/processors_test_aggregation.cpp
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/Processors/examples/processors_test_merge_sorting_transform.cpp b/src/Processors/examples/processors_test_merge_sorting_transform.cpp
deleted file mode 100644
index e69de29bb2d..00000000000

From 45f5d2829970902557f0e9df15a3ce1b406c1174 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Wed, 1 Nov 2023 23:23:38 +0100
Subject: [PATCH 0458/1097] Deduplicate concurrent requests in system.replicas

---
 src/Storages/System/StorageSystemReplicas.cpp | 171 ++++++++++++++++--
 src/Storages/System/StorageSystemReplicas.h   |   7 +-
 2 files changed, 158 insertions(+), 20 deletions(-)

diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index 1b87382c222..8f7614273a2 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -1,3 +1,6 @@
+#include <future>
+#include <memory>
+#include <mutex>
 #include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -13,6 +16,7 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/ThreadPool.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
+#include "Storages/MergeTree/ReplicatedTableStatus.h"
 
 
 namespace CurrentMetrics
@@ -24,9 +28,128 @@ namespace CurrentMetrics
 namespace DB
 {
 
+/// Allows to "deduplicate" getStatus() requests for the same table: if a request for a table is already in progress
+/// then the new request will return the same future as the previous one.
+class StatusRequestsPool
+{
+    ThreadPool & thread_pool;
+
+    std::mutex mutex;
+    std::unordered_map<StoragePtr, std::shared_future<ReplicatedTableStatus>> current_requests TSA_GUARDED_BY(mutex);
+    std::deque<std::tuple<UInt64, StoragePtr, std::shared_ptr<std::promise<ReplicatedTableStatus>>, bool>> requests_to_schedule TSA_GUARDED_BY(mutex);
+    UInt64 request_id TSA_GUARDED_BY(mutex) = 0;
+
+    Poco::Logger * log;
+
+public:
+    explicit StatusRequestsPool(ThreadPool & thread_pool_)
+        : thread_pool(thread_pool_)
+        , log(&Poco::Logger::get("StatusRequestsPool"))
+    {}
+
+    ~StatusRequestsPool()
+    {
+        thread_pool.wait();
+    }
+
+    /// Make a new request or "attach" to an existing one.
+    std::pair<UInt64, std::shared_future<ReplicatedTableStatus>> addRequest(StoragePtr storage, bool with_zk_fields)
+    {
+        std::shared_ptr<std::promise<ReplicatedTableStatus>> promise;
+        std::shared_future<ReplicatedTableStatus> future;
+        UInt64 this_request_id = 0;
+
+        {
+            std::lock_guard lock(mutex);
+            auto existing_request = current_requests.find(storage);
+            if (existing_request != current_requests.end())
+            {
+                LOG_TEST(log, "Attaching to existing request for table {}", storage->getStorageID().getNameForLogs());
+                return {0, existing_request->second};
+            }
+
+            promise = std::make_shared<std::promise<ReplicatedTableStatus>>();
+            future = promise->get_future().share();
+
+            current_requests.emplace(storage, future);
+
+            LOG_TEST(log, "Making new request for table {}", storage->getStorageID().getNameForLogs());
+
+            requests_to_schedule.emplace_back(request_id, storage, promise, with_zk_fields);
+            this_request_id = request_id;
+            ++request_id;
+        }
+
+        return {this_request_id, future};
+    }
+
+    /// Schedule requests (if any) that are needed for the current query. This is determined by the maximum request id
+    /// returned by addRequest.
+    void scheduleRequests(UInt64 max_request_id)
+    {
+        while (true)
+        {
+            std::tuple<UInt64, StoragePtr, std::shared_ptr<std::promise<ReplicatedTableStatus>>, bool> req;
+            {
+                std::lock_guard lock(mutex);
+                if (requests_to_schedule.empty())
+                    break;
+
+                req = requests_to_schedule.front();
+                if (std::get<0>(req) > max_request_id)
+                    break;
+
+                requests_to_schedule.pop_front();
+            }
+
+            auto & [_, storage, promise, with_zk_fields] = req;
+
+            /// TODO: handle failure when trying to schedule a request: need to either retry or set an error in the promise.
+            thread_pool.scheduleOrThrowOnError([this, storage, with_zk_fields, promise] () mutable
+            {
+                ReplicatedTableStatus status;
+                if (auto * replicated_table = dynamic_cast<StorageReplicatedMergeTree *>(storage.get()))
+                {
+                    replicated_table->getStatus(status, with_zk_fields);
+                }
+                promise->set_value(std::move(status));
+
+                /// Remove the completed request
+                {
+                    std::lock_guard l(mutex);
+                    current_requests.erase(storage);
+                }
+            });
+        }
+    }
+};
+
+
+class StorageSystemReplicasImpl
+{
+public:
+    explicit StorageSystemReplicasImpl(size_t max_threads)
+        : thread_pool(CurrentMetrics::SystemReplicasThreads, CurrentMetrics::SystemReplicasThreadsActive, max_threads)
+        , requests_without_zk_fields(thread_pool)
+        , requests_with_zk_fields(thread_pool)
+    {}
+
+    Pipe read(
+        const Names & column_names,
+        const StorageSnapshotPtr & storage_snapshot,
+        SelectQueryInfo & query_info,
+        ContextPtr context);
+
+private:
+    ThreadPool thread_pool;
+    StatusRequestsPool requests_without_zk_fields;
+    StatusRequestsPool requests_with_zk_fields;
+};
+
 
 StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_)
     : IStorage(table_id_)
+    , impl(std::make_unique<StorageSystemReplicasImpl>(128))
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(ColumnsDescription({
@@ -69,6 +192,8 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_)
     setInMemoryMetadata(storage_metadata);
 }
 
+StorageSystemReplicas::~StorageSystemReplicas() = default;
+
 
 Pipe StorageSystemReplicas::read(
     const Names & column_names,
@@ -78,6 +203,15 @@ Pipe StorageSystemReplicas::read(
     QueryProcessingStage::Enum /*processed_stage*/,
     const size_t /*max_block_size*/,
     const size_t /*num_streams*/)
+{
+    return impl->read(column_names, storage_snapshot, query_info, context);
+}
+
+Pipe StorageSystemReplicasImpl::read(
+        const Names & column_names,
+        const StorageSnapshotPtr & storage_snapshot,
+        SelectQueryInfo & query_info,
+        ContextPtr context)
 {
     storage_snapshot->check(column_names);
 
@@ -164,30 +298,29 @@ Pipe StorageSystemReplicas::read(
     MutableColumns res_columns = storage_snapshot->metadata->getSampleBlock().cloneEmptyColumns();
 
     size_t tables_size = col_database->size();
-    std::vector<ReplicatedTableStatus> statuses(tables_size);
 
-    size_t thread_pool_size = std::min(tables_size, static_cast<size_t>(getNumberOfPhysicalCPUCores()));
-    auto settings = context->getSettingsRef();
-    if (settings.max_threads != 0)
-        thread_pool_size = std::min(thread_pool_size, static_cast<size_t>(settings.max_threads));
+    /// Use separate queues for requests with and without ZooKeeper fields.
+    StatusRequestsPool & get_status_requests = with_zk_fields ? requests_with_zk_fields : requests_without_zk_fields;
 
-    ThreadPool thread_pool(CurrentMetrics::SystemReplicasThreads, CurrentMetrics::SystemReplicasThreadsActive, thread_pool_size);
+    std::vector<std::shared_future<ReplicatedTableStatus>> futures;
+    futures.reserve(tables_size);
+    UInt64 max_request_id = 0;
+    for (size_t i = 0; i < tables_size; ++i)
+    {
+        auto & storage = replicated_tables[(*col_database)[i].safeGet<const String &>()]
+            [(*col_table)[i].safeGet<const String &>()];
+
+        auto [request_id, future] = get_status_requests.addRequest(storage, with_zk_fields);
+        futures.emplace_back(future);
+        max_request_id = std::max(max_request_id, request_id);
+    }
+    /// Schedule requests up to the maximum request needed for the current query.
+    /// If there are more requests, they will be scheduled by the query that needs them.
+    get_status_requests.scheduleRequests(max_request_id);
 
     for (size_t i = 0; i < tables_size; ++i)
     {
-        thread_pool.scheduleOrThrowOnError([&, my_i = i]
-        {
-            dynamic_cast<StorageReplicatedMergeTree &>(
-            *replicated_tables
-                [(*col_database)[my_i].safeGet<const String &>()]
-                [(*col_table)[my_i].safeGet<const String &>()]).getStatus(statuses[my_i], with_zk_fields);
-        });
-    }
-
-    thread_pool.wait();
-
-    for (const auto & status: statuses)
-    {
+        const auto & status = futures[i].get();
         size_t col_num = 3;
         res_columns[col_num++]->insert(status.is_leader);
         res_columns[col_num++]->insert(status.can_become_leader);
diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h
index e9c29dec0fd..f6bdfc937ea 100644
--- a/src/Storages/System/StorageSystemReplicas.h
+++ b/src/Storages/System/StorageSystemReplicas.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <memory>
 #include <Storages/IStorage.h>
 
 
@@ -7,7 +8,7 @@ namespace DB
 {
 
 class Context;
-
+class StorageSystemReplicasImpl;
 
 /** Implements `replicas` system table, which provides information about the status of the replicated tables.
   */
@@ -15,6 +16,7 @@ class StorageSystemReplicas final : public IStorage
 {
 public:
     explicit StorageSystemReplicas(const StorageID & table_id_);
+    ~StorageSystemReplicas() override;
 
     std::string getName() const override { return "SystemReplicas"; }
 
@@ -28,6 +30,9 @@ public:
         size_t num_streams) override;
 
     bool isSystemStorage() const override { return true; }
+
+private:
+    std::unique_ptr<StorageSystemReplicasImpl> impl;
 };
 
 }

From c27913f09a58ac0ddf4f6dd9e651e33d9f87ae48 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Thu, 2 Nov 2023 20:44:12 +0100
Subject: [PATCH 0459/1097] Separate thread pools for request with and w/o zk

---
 src/Storages/System/StorageSystemReplicas.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index 8f7614273a2..19fc9d7e619 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -32,7 +32,7 @@ namespace DB
 /// then the new request will return the same future as the previous one.
 class StatusRequestsPool
 {
-    ThreadPool & thread_pool;
+    ThreadPool thread_pool;
 
     std::mutex mutex;
     std::unordered_map<StoragePtr, std::shared_future<ReplicatedTableStatus>> current_requests TSA_GUARDED_BY(mutex);
@@ -42,8 +42,8 @@ class StatusRequestsPool
     Poco::Logger * log;
 
 public:
-    explicit StatusRequestsPool(ThreadPool & thread_pool_)
-        : thread_pool(thread_pool_)
+    explicit StatusRequestsPool(size_t max_threads)
+        : thread_pool(CurrentMetrics::SystemReplicasThreads, CurrentMetrics::SystemReplicasThreadsActive, max_threads)
         , log(&Poco::Logger::get("StatusRequestsPool"))
     {}
 
@@ -129,9 +129,8 @@ class StorageSystemReplicasImpl
 {
 public:
     explicit StorageSystemReplicasImpl(size_t max_threads)
-        : thread_pool(CurrentMetrics::SystemReplicasThreads, CurrentMetrics::SystemReplicasThreadsActive, max_threads)
-        , requests_without_zk_fields(thread_pool)
-        , requests_with_zk_fields(thread_pool)
+        : requests_without_zk_fields(max_threads)
+        , requests_with_zk_fields(max_threads)
     {}
 
     Pipe read(
@@ -141,7 +140,6 @@ public:
         ContextPtr context);
 
 private:
-    ThreadPool thread_pool;
     StatusRequestsPool requests_without_zk_fields;
     StatusRequestsPool requests_with_zk_fields;
 };

From 1113b3041b1f9a7d5af6a3b73a44ff9b6dc33c59 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Mon, 6 Nov 2023 13:12:51 +0100
Subject: [PATCH 0460/1097] Return exception from the job

---
 src/Storages/System/StorageSystemReplicas.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index 19fc9d7e619..dfabaaaa78b 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -107,12 +107,20 @@ public:
             /// TODO: handle failure when trying to schedule a request: need to either retry or set an error in the promise.
             thread_pool.scheduleOrThrowOnError([this, storage, with_zk_fields, promise] () mutable
             {
-                ReplicatedTableStatus status;
-                if (auto * replicated_table = dynamic_cast<StorageReplicatedMergeTree *>(storage.get()))
+                try
                 {
-                    replicated_table->getStatus(status, with_zk_fields);
+                    ReplicatedTableStatus status;
+                    if (auto * replicated_table = dynamic_cast<StorageReplicatedMergeTree *>(storage.get()))
+                    {
+                        replicated_table->getStatus(status, with_zk_fields);
+                    }
+                    promise->set_value(std::move(status));
+                }
+                catch (...)
+                {
+                    tryLogCurrentException(log, "Error getting status for table " + storage->getStorageID().getNameForLogs());
+                    promise->set_exception(std::current_exception());
                 }
-                promise->set_value(std::move(status));
 
                 /// Remove the completed request
                 {

From 6e861a617616268ffb1c81758ab366cd72869060 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Mon, 6 Nov 2023 22:05:07 +0100
Subject: [PATCH 0461/1097] Handle query cancellation

---
 src/Storages/System/StorageSystemReplicas.cpp | 77 ++++++++++++++-----
 1 file changed, 58 insertions(+), 19 deletions(-)

diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index dfabaaaa78b..0e371c0e736 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -9,14 +9,14 @@
 #include <Storages/System/StorageSystemReplicas.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/VirtualColumnUtils.h>
+#include <Storages/MergeTree/ReplicatedTableStatus.h>
+#include <Interpreters/ProcessList.h>
 #include <Access/ContextAccess.h>
 #include <Databases/IDatabase.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Common/typeid_cast.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/ThreadPool.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
-#include "Storages/MergeTree/ReplicatedTableStatus.h"
 
 
 namespace CurrentMetrics
@@ -32,11 +32,22 @@ namespace DB
 /// then the new request will return the same future as the previous one.
 class StatusRequestsPool
 {
+public:
+    struct RequestInfo
+    {
+        UInt64 request_id = 0;
+        std::shared_future<ReplicatedTableStatus> future;
+    };
+
+private:
     ThreadPool thread_pool;
 
     std::mutex mutex;
-    std::unordered_map<StoragePtr, std::shared_future<ReplicatedTableStatus>> current_requests TSA_GUARDED_BY(mutex);
+    /// All requests from the queries that are currently being executed.
+    std::unordered_map<StoragePtr, RequestInfo> current_requests TSA_GUARDED_BY(mutex);
+    /// Requests that were added by currently executing queries but have not been scheduled yet.
     std::deque<std::tuple<UInt64, StoragePtr, std::shared_ptr<std::promise<ReplicatedTableStatus>>, bool>> requests_to_schedule TSA_GUARDED_BY(mutex);
+    /// Used to assign unique incremental ids to requests.
     UInt64 request_id TSA_GUARDED_BY(mutex) = 0;
 
     Poco::Logger * log;
@@ -53,7 +64,7 @@ public:
     }
 
     /// Make a new request or "attach" to an existing one.
-    std::pair<UInt64, std::shared_future<ReplicatedTableStatus>> addRequest(StoragePtr storage, bool with_zk_fields)
+    RequestInfo addRequest(StoragePtr storage, bool with_zk_fields)
     {
         std::shared_ptr<std::promise<ReplicatedTableStatus>> promise;
         std::shared_future<ReplicatedTableStatus> future;
@@ -65,19 +76,20 @@ public:
             if (existing_request != current_requests.end())
             {
                 LOG_TEST(log, "Attaching to existing request for table {}", storage->getStorageID().getNameForLogs());
-                return {0, existing_request->second};
+                return existing_request->second;
             }
 
+            this_request_id = request_id;
+            ++request_id;
+
             promise = std::make_shared<std::promise<ReplicatedTableStatus>>();
             future = promise->get_future().share();
 
-            current_requests.emplace(storage, future);
+            current_requests[storage] = { .request_id = this_request_id, .future = future };
 
             LOG_TEST(log, "Making new request for table {}", storage->getStorageID().getNameForLogs());
 
-            requests_to_schedule.emplace_back(request_id, storage, promise, with_zk_fields);
-            this_request_id = request_id;
-            ++request_id;
+            requests_to_schedule.emplace_back(this_request_id, storage, promise, with_zk_fields);
         }
 
         return {this_request_id, future};
@@ -85,10 +97,14 @@ public:
 
     /// Schedule requests (if any) that are needed for the current query. This is determined by the maximum request id
     /// returned by addRequest.
-    void scheduleRequests(UInt64 max_request_id)
+    void scheduleRequests(UInt64 max_request_id, QueryStatusPtr query_status)
     {
         while (true)
         {
+            if (query_status)
+                query_status->checkTimeLimit();
+
+            /// Try to pick up a request to schedule
             std::tuple<UInt64, StoragePtr, std::shared_ptr<std::promise<ReplicatedTableStatus>>, bool> req;
             {
                 std::lock_guard lock(mutex);
@@ -96,6 +112,8 @@ public:
                     break;
 
                 req = requests_to_schedule.front();
+
+                /// Check if all requests for the current query have been scheduled
                 if (std::get<0>(req) > max_request_id)
                     break;
 
@@ -104,8 +122,7 @@ public:
 
             auto & [_, storage, promise, with_zk_fields] = req;
 
-            /// TODO: handle failure when trying to schedule a request: need to either retry or set an error in the promise.
-            thread_pool.scheduleOrThrowOnError([this, storage, with_zk_fields, promise] () mutable
+            auto get_status_task = [this, storage, with_zk_fields, promise] () mutable
             {
                 try
                 {
@@ -122,14 +139,28 @@ public:
                     promise->set_exception(std::current_exception());
                 }
 
-                /// Remove the completed request
-                {
-                    std::lock_guard l(mutex);
-                    current_requests.erase(storage);
-                }
-            });
+                completeRequest(storage);
+            };
+
+            try
+            {
+                thread_pool.scheduleOrThrowOnError(std::move(get_status_task));
+            }
+            catch (...)
+            {
+                tryLogCurrentException(log, "Error scheduling get status task for table " + storage->getStorageID().getNameForLogs());
+                promise->set_exception(std::current_exception());
+                completeRequest(storage);
+            }
         }
     }
+
+private:
+    void completeRequest(StoragePtr storage)
+    {
+        std::lock_guard lock(mutex);
+        current_requests.erase(storage);
+    }
 };
 
 
@@ -308,11 +339,16 @@ Pipe StorageSystemReplicasImpl::read(
     /// Use separate queues for requests with and without ZooKeeper fields.
     StatusRequestsPool & get_status_requests = with_zk_fields ? requests_with_zk_fields : requests_without_zk_fields;
 
+    QueryStatusPtr query_status = context ? context->getProcessListElement() : nullptr;
+
     std::vector<std::shared_future<ReplicatedTableStatus>> futures;
     futures.reserve(tables_size);
     UInt64 max_request_id = 0;
     for (size_t i = 0; i < tables_size; ++i)
     {
+        if (query_status)
+            query_status->checkTimeLimit();
+
         auto & storage = replicated_tables[(*col_database)[i].safeGet<const String &>()]
             [(*col_table)[i].safeGet<const String &>()];
 
@@ -322,10 +358,13 @@ Pipe StorageSystemReplicasImpl::read(
     }
     /// Schedule requests up to the maximum request needed for the current query.
     /// If there are more requests, they will be scheduled by the query that needs them.
-    get_status_requests.scheduleRequests(max_request_id);
+    get_status_requests.scheduleRequests(max_request_id, query_status);
 
     for (size_t i = 0; i < tables_size; ++i)
     {
+        if (query_status)
+            query_status->checkTimeLimit();
+
         const auto & status = futures[i].get();
         size_t col_num = 3;
         res_columns[col_num++]->insert(status.is_leader);

From 54cc193d01b90663d94764e72d1603786068e045 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 7 Nov 2023 21:41:39 +0100
Subject: [PATCH 0462/1097] Test with many concurrent queries to
 system.replicas

---
 ...many_requests_to_system_replicas.reference |  5 ++
 .../02908_many_requests_to_system_replicas.sh | 48 +++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference
 create mode 100755 tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh

diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference
new file mode 100644
index 00000000000..d7850e59dec
--- /dev/null
+++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference
@@ -0,0 +1,5 @@
+Creating 300 tables
+Making making 500 requests to system.replicas
+Query system.replicas while waiting for other concurrent requests to finish
+0
+900
diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh
new file mode 100755
index 00000000000..70dc5f4d8c4
--- /dev/null
+++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Tags: long, zookeeper, no-parallel, no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -e
+
+NUM_TABLES=300
+CONCURRENCY=500
+
+echo "Creating $NUM_TABLES tables"
+
+function init_table()
+{
+    i=$1
+    curl $CLICKHOUSE_URL --silent --fail --data "CREATE TABLE test_02908_r1_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r1') ORDER BY tuple()"
+    curl $CLICKHOUSE_URL --silent --fail --data "CREATE TABLE test_02908_r2_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r2') ORDER BY tuple()"
+    curl $CLICKHOUSE_URL --silent --fail --data "CREATE TABLE test_02908_r3_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r3') ORDER BY tuple()"
+
+    curl $CLICKHOUSE_URL --silent --fail --data "INSERT INTO test_02908_r1_$i  SELECT rand64() FROM numbers(5);"
+}
+
+export init_table;
+
+for i in `seq 1 $NUM_TABLES`;
+do
+    init_table $i &
+done
+
+wait;
+
+
+echo "Making making $CONCURRENCY requests to system.replicas"
+
+for i in `seq 1 $CONCURRENCY`;
+do
+    curl $CLICKHOUSE_URL --silent --fail --data "SELECT * FROM system.replicas WHERE database=currentDatabase() FORMAT Null;" &
+done
+
+echo "Query system.replicas while waiting for other concurrent requests to finish"
+# lost_part_count column is read from ZooKeeper
+curl $CLICKHOUSE_URL --silent --fail --data "SELECT sum(lost_part_count) FROM system.replicas WHERE database=currentDatabase();";
+# is_leader column is filled without ZooKeeper
+curl $CLICKHOUSE_URL --silent --fail --data "SELECT sum(is_leader) FROM system.replicas WHERE database=currentDatabase();";
+
+wait;

From b49f50bf47f3c55d10a39fa437b9ada2f82143b3 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Mon, 13 Nov 2023 14:56:14 +0100
Subject: [PATCH 0463/1097] Set DB:Exception on unscheduled requests

---
 src/Storages/System/StorageSystemReplicas.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index 0e371c0e736..ffefd41327d 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -28,6 +28,11 @@ namespace CurrentMetrics
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int QUERY_WAS_CANCELLED;
+}
+
 /// Allows to "deduplicate" getStatus() requests for the same table: if a request for a table is already in progress
 /// then the new request will return the same future as the previous one.
 class StatusRequestsPool
@@ -61,6 +66,10 @@ public:
     ~StatusRequestsPool()
     {
         thread_pool.wait();
+        /// Cancel unscheduled requests
+        for (auto & request : requests_to_schedule)
+            std::get<2>(request)->set_exception(std::make_exception_ptr(
+                DB::Exception(ErrorCodes::QUERY_WAS_CANCELLED, "StatusRequestsPool is destroyed")));
     }
 
     /// Make a new request or "attach" to an existing one.

From 64d5f636a26b28e2240a89648de94c6ec8449a6f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 13 Nov 2023 18:07:25 +0100
Subject: [PATCH 0464/1097] Do not fetch submodules during preparation
 performance archive

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 150ce1ab385..ba9b104b40b 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -177,7 +177,7 @@ then
     tar c -C /build/ --exclude='.git/modules/**' .git | tar x -C "$PERF_OUTPUT"/ch
     # Create branch pr and origin/master to have them for the following performance comparison
     git -C "$PERF_OUTPUT"/ch branch pr
-    git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin master:origin/master
+    git -C "$PERF_OUTPUT"/ch fetch --no-tags --no-recurse-submodules --depth 50 origin master:origin/master
     # Clean remote, to not have it stale
     git -C "$PERF_OUTPUT"/ch remote | xargs -n1 git -C "$PERF_OUTPUT"/ch remote remove
     # And clean all tags

From 4fdca75daba49fca36c73685347ecd7889fd3933 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Mon, 13 Nov 2023 17:08:40 +0000
Subject: [PATCH 0465/1097] Change cluster name

---
 .../0_stateless/02911_row_policy_on_cluster.sql      | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/02911_row_policy_on_cluster.sql b/tests/queries/0_stateless/02911_row_policy_on_cluster.sql
index 4a132e889a9..6e94e301733 100644
--- a/tests/queries/0_stateless/02911_row_policy_on_cluster.sql
+++ b/tests/queries/0_stateless/02911_row_policy_on_cluster.sql
@@ -1,10 +1,10 @@
 -- Tags: no-parallel, zookeeper
 
-DROP ROW POLICY IF EXISTS 02911_rowpolicy ON default.* ON CLUSTER default;
-DROP USER IF EXISTS 02911_user ON CLUSTER default;
+DROP ROW POLICY IF EXISTS 02911_rowpolicy ON default.* ON CLUSTER test_shard_localhost;
+DROP USER IF EXISTS 02911_user ON CLUSTER test_shard_localhost;
 
-CREATE USER 02911_user ON CLUSTER default;
-CREATE ROW POLICY 02911_rowpolicy ON CLUSTER default ON default.* USING 1 TO 02911_user;
+CREATE USER 02911_user ON CLUSTER test_shard_localhost;
+CREATE ROW POLICY 02911_rowpolicy ON CLUSTER test_shard_localhost ON default.* USING 1 TO 02911_user;
 
-DROP ROW POLICY 02911_rowpolicy ON default.* ON CLUSTER default;
-DROP USER 02911_user ON CLUSTER default;
+DROP ROW POLICY 02911_rowpolicy ON default.* ON CLUSTER test_shard_localhost;
+DROP USER 02911_user ON CLUSTER test_shard_localhost;

From 38d4d669981f8760b0aa1ddb1d2172777db81e3f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 13 Nov 2023 18:43:42 +0100
Subject: [PATCH 0466/1097] Fix missing argument for style_check.py in master
 workflow

---
 .github/workflows/master.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 260fc2fc7d5..e662a5b6f98 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -96,7 +96,7 @@ jobs:
       runner_type: style-checker
       run_command: |
           cd "$REPO_COPY/tests/ci"
-          python3 style_check.py
+          python3 style_check.py --no-push
   CompatibilityCheckX86:
     needs: [BuilderDebRelease]
     uses: ./.github/workflows/reusable_test.yml

From d92e3329234005483a5f6f77baf7c03afa2b96bb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 13 Nov 2023 19:33:56 +0100
Subject: [PATCH 0467/1097] Fix build

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9c8952aea96..9e548c5a6d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -554,7 +554,7 @@ if (ENABLE_RUST)
     endif()
 endif()
 
-if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
+if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
     set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
 else ()
     set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)

From ed95698ff6137962f1bd40d557d50bf4260ddd17 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 13 Nov 2023 19:38:11 +0100
Subject: [PATCH 0468/1097] Decrease unnecessary verbosity

---
 docker/packager/binary/build.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index ba9b104b40b..6b6374d08c9 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -181,7 +181,8 @@ then
     # Clean remote, to not have it stale
     git -C "$PERF_OUTPUT"/ch remote | xargs -n1 git -C "$PERF_OUTPUT"/ch remote remove
     # And clean all tags
-    git -C "$PERF_OUTPUT"/ch tag | xargs git -C "$PERF_OUTPUT"/ch tag -d
+    echo "Deleting $(git -C "$PERF_OUTPUT"/ch tag | wc -l) tags"
+    git -C "$PERF_OUTPUT"/ch tag | xargs git -C "$PERF_OUTPUT"/ch tag -d >/dev/null
     git -C "$PERF_OUTPUT"/ch reset --soft pr
     git -C "$PERF_OUTPUT"/ch log -5
     (

From 1fe986bff29f1d2fae4d59fcd79d0b9a504db19f Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 13 Nov 2023 18:59:09 +0000
Subject: [PATCH 0469/1097] Fix duplicate usage of table function input().

---
 ...input_table_function_in_subquery.reference |  0
 .../02915_input_table_function_in_subquery.sh | 40 +++++++++++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 tests/queries/0_stateless/02915_input_table_function_in_subquery.reference
 create mode 100755 tests/queries/0_stateless/02915_input_table_function_in_subquery.sh

diff --git a/tests/queries/0_stateless/02915_input_table_function_in_subquery.reference b/tests/queries/0_stateless/02915_input_table_function_in_subquery.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh b/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh
new file mode 100755
index 00000000000..80e38338751
--- /dev/null
+++ b/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# Tags: no-random-merge-tree-settings
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -nm -q "
+CREATE TABLE IF NOT EXISTS ts_data_double_raw
+(
+   device_id UInt32 NOT NULL CODEC(ZSTD),
+   data_item_id UInt32 NOT NULL CODEC(ZSTD),
+   data_time DateTime64(3, 'UTC') NOT NULL CODEC(Delta, ZSTD),
+   data_value Float64 NOT NULL CODEC(Delta, ZSTD),
+   is_deleted Bool CODEC(ZSTD),
+   ingestion_time DateTime64(3, 'UTC') NOT NULL CODEC(Delta, ZSTD)
+)
+ENGINE = ReplacingMergeTree
+PARTITION BY toYYYYMM(data_time)
+ORDER BY (device_id, data_item_id, data_time)
+SETTINGS index_granularity = 8192;
+
+
+CREATE VIEW ts_data_double AS
+SELECT
+    device_id,
+    data_item_id,
+    data_time,
+    argMax(data_value, ingestion_time) data_value,
+    max(ingestion_time) version,
+    argMax(is_deleted, ingestion_time) is_deleted
+FROM ts_data_double_raw
+GROUP BY device_id, data_item_id, data_time
+HAVING is_deleted = 0;
+
+INSERT INTO ts_data_double_raw VALUES (100, 1, fromUnixTimestamp64Milli(1697547086760), 3.6, false, fromUnixTimestamp64Milli(1)), (100, 1, fromUnixTimestamp64Milli(1697547086761), 4.6, false, fromUnixTimestamp64Milli(1));
+INSERT INTO ts_data_double_raw VALUES (100, 1, fromUnixTimestamp64Milli(1697547086760), 3.6, true, fromUnixTimestamp64Milli(5)), (100, 1, fromUnixTimestamp64Milli(1697547086761), 4.6, false, fromUnixTimestamp64Milli(4));
+"
+
+$CLICKHOUSE_CLIENT -q "select 1697547086760 format RowBinary" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT%20INTO%20ts_data_double_raw%20%28device_id%2C%20data_item_id%2C%20data_time%2C%20data_value%2C%20is_deleted%2C%20ingestion_time%29%0ASELECT%0A%20%20%20device_id%2C%0A%20%20%20data_item_id%2C%0A%20%20%20data_time%2C%0A%20%20%20data_value%2C%0A%20%20%201%2C%20%20--%20mark%20as%20deleted%0A%20%20%20fromUnixTimestamp64Milli%281697547088995%2C%20%27UTC%27%29%20--%20all%20inserted%20records%20have%20new%20ingestion%20time%0AFROM%20ts_data_double%0AWHERE%20%28device_id%20%3D%20100%29%20AND%20%28data_item_id%20%3D%201%29%0A%20%20%20%20AND%20%28data_time%20%3E%3D%20fromUnixTimestamp64Milli%280%2C%20%27UTC%27%29%29%0A%20%20%20%20AND%20%28data_time%20%3C%3D%20fromUnixTimestamp64Milli%281697547086764%2C%20%27UTC%27%29%29%0A%20%20%20%20AND%20version%20%3C%20fromUnixTimestamp64Milli%281697547088995%2C%20%27UTC%27%29%0A%20%20%20%20AND%20%28toUnixTimestamp64Milli%28data_time%29%20IN%20%28SELECT%20timestamp%20FROM%20input%28%27timestamp%20UInt64%27%29%29%29%20SETTINGS%20insert_quorum%3D1%0A%20FORMAT%20RowBinary" --data-binary @-

From 4004248c13d6e4b191e0b00ece7b4d30281f40e5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 13 Nov 2023 19:01:36 +0000
Subject: [PATCH 0470/1097] Fix duplicate usage of table function input().

---
 .../PredicateExpressionsOptimizer.cpp         | 15 +++++
 src/Storages/StorageInput.cpp                 | 55 ++++++++++++++++---
 src/Storages/StorageInput.h                   |  6 +-
 3 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp
index e64ff34b11f..885c99aeb90 100644
--- a/src/Interpreters/PredicateExpressionsOptimizer.cpp
+++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp
@@ -53,6 +53,18 @@ bool PredicateExpressionsOptimizer::optimize(ASTSelectQuery & select_query)
     return false;
 }
 
+static bool hasInputTableFunction(const ASTPtr & expr)
+{
+    if (const auto * func = typeid_cast<const ASTFunction *>(expr.get()); func && func->name == "input")
+        return true;
+
+    for (const auto & child : expr->children)
+        if (hasInputTableFunction(child))
+            return true;
+
+    return false;
+}
+
 std::vector<ASTs> PredicateExpressionsOptimizer::extractTablesPredicates(const ASTPtr & where, const ASTPtr & prewhere)
 {
     std::vector<ASTs> tables_predicates(tables_with_columns.size());
@@ -72,6 +84,9 @@ std::vector<ASTs> PredicateExpressionsOptimizer::extractTablesPredicates(const A
             return {};   /// Not optimized when predicate contains stateful function or indeterministic function or window functions
         }
 
+        if (hasInputTableFunction(predicate_expression))
+            return {}; /// Not optimized when predicate contains input table function
+
         if (!expression_info.is_array_join)
         {
             if (expression_info.unique_reference_tables_pos.size() == 1)
diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp
index 0ad3afb2e8a..2314d3fb581 100644
--- a/src/Storages/StorageInput.cpp
+++ b/src/Storages/StorageInput.cpp
@@ -5,8 +5,10 @@
 
 #include <memory>
 #include <Processors/ISource.h>
+#include <Processors/QueryPlan/ISourceStep.h>
+#include <Processors/QueryPlan/QueryPlan.h>
 #include <QueryPipeline/Pipe.h>
-
+#include <QueryPipeline/QueryPipelineBuilder.h>
 
 namespace DB
 {
@@ -47,11 +49,33 @@ public:
 void StorageInput::setPipe(Pipe pipe_)
 {
     pipe = std::move(pipe_);
+    was_pipe_initialized = true;
 }
 
+class ReadFromInput : public ISourceStep
+{
+public:
+    std::string getName() const override { return "ReadFromInput"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
 
-Pipe StorageInput::read(
-    const Names & /*column_names*/,
+    ReadFromInput(
+        Block sample_block,
+        //StorageSnapshotPtr storage_snapshot_,
+        StorageInput & storage_)
+        : ISourceStep(DataStream{.header = std::move(sample_block)})
+        //, storage_snapshot(std::move(storage_snapshot_))
+        , storage(storage_)
+    {
+    }
+
+private:
+    //StorageSnapshotPtr storage_snapshot;
+    StorageInput & storage;
+};
+
+void StorageInput::read(
+    QueryPlan & query_plan,
+    const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & /*query_info*/,
     ContextPtr context,
@@ -59,20 +83,35 @@ Pipe StorageInput::read(
     size_t /*max_block_size*/,
     size_t /*num_streams*/)
 {
-    Pipes pipes;
+    storage_snapshot->check(column_names);
+    Block sample_block = storage_snapshot->metadata->getSampleBlock();
+
     auto query_context = context->getQueryContext();
     /// It is TCP request if we have callbacks for input().
-    if (query_context->getInputBlocksReaderCallback())
+    if (!was_pipe_initialized && query_context->getInputBlocksReaderCallback())
     {
         /// Send structure to the client.
         query_context->initializeInput(shared_from_this());
-        return Pipe(std::make_shared<StorageInputSource>(query_context, storage_snapshot->metadata->getSampleBlock()));
     }
 
-    if (pipe.empty())
+    if (!was_pipe_initialized)
         throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "Input stream is not initialized, input() must be used only in INSERT SELECT query");
 
-    return std::move(pipe);
+    auto reading = std::make_unique<ReadFromInput>(
+        std::move(sample_block),
+        //storage_snapshot,
+        *this);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromInput::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    if (storage.was_pipe_used)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to read from input() twice.");
+
+    pipeline.init(std::move(storage.pipe));
+    storage.was_pipe_used = true;
 }
 
 }
diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h
index da4669aaf37..9ac082a9add 100644
--- a/src/Storages/StorageInput.h
+++ b/src/Storages/StorageInput.h
@@ -10,6 +10,7 @@ namespace DB
 
 class StorageInput final : public IStorage
 {
+  friend class ReadFromInput;
 public:
     StorageInput(const StorageID & table_id, const ColumnsDescription & columns_);
 
@@ -18,7 +19,8 @@ public:
     /// A table will read from this stream.
     void setPipe(Pipe pipe_);
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -29,5 +31,7 @@ public:
 
 private:
     Pipe pipe;
+    bool was_pipe_initialized = false;
+    bool was_pipe_used = false;
 };
 }

From d3517140d9c471411fba8d7d98a6f7da7bd48172 Mon Sep 17 00:00:00 2001
From: Jianfei Hu <hujianfei258@gmail.com>
Date: Mon, 13 Nov 2023 19:03:27 +0000
Subject: [PATCH 0471/1097] Revert "Revert "Add /keeper/availability-zone node
 to allow server load balancing within AZ.""

This reverts commit e5aad4a6fd308e49c16d36a4bb04462cc2574bb8.
---
 src/Common/ErrorCodes.cpp                     |   1 +
 src/Coordination/KeeperConstants.h            |   1 +
 src/Coordination/KeeperContext.cpp            |  10 +-
 src/Coordination/KeeperContext.h              |   3 +-
 src/Coordination/KeeperDispatcher.cpp         |  12 +-
 src/Coordination/KeeperStorage.cpp            |   3 +-
 src/IO/S3/Credentials.cpp                     | 155 ++++++++++++++----
 src/IO/S3/Credentials.h                       |  23 ++-
 tests/integration/helpers/keeper_config1.xml  |   1 +
 tests/integration/helpers/keeper_config2.xml  |   1 +
 .../test_keeper_availability_zone/__init__.py |   0
 .../configs/keeper_config.xml                 |   2 +
 .../test_keeper_availability_zone/test.py     |  31 ++++
 .../test_keeper_four_word_command/test.py     |   8 +-
 14 files changed, 212 insertions(+), 39 deletions(-)
 create mode 100644 tests/integration/test_keeper_availability_zone/__init__.py
 create mode 100644 tests/integration/test_keeper_availability_zone/configs/keeper_config.xml
 create mode 100644 tests/integration/test_keeper_availability_zone/test.py

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 28f8e6c6021..8e0d7fabf6b 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -586,6 +586,7 @@
     M(704, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
     M(705, TABLE_NOT_EMPTY) \
     M(706, LIBSSH_ERROR) \
+    M(707, GCP_ERROR) \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
     M(1001, STD_EXCEPTION) \
diff --git a/src/Coordination/KeeperConstants.h b/src/Coordination/KeeperConstants.h
index 5a52fbf272b..08a7c85585a 100644
--- a/src/Coordination/KeeperConstants.h
+++ b/src/Coordination/KeeperConstants.h
@@ -17,5 +17,6 @@ const String keeper_system_path = "/keeper";
 const String keeper_api_version_path = keeper_system_path + "/api_version";
 const String keeper_api_feature_flags_path = keeper_system_path + "/feature_flags";
 const String keeper_config_path = keeper_system_path + "/config";
+const String keeper_availability_zone_path = keeper_system_path + "/availability_zone";
 
 }
diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp
index 25bfb6c6384..1cee2a8e446 100644
--- a/src/Coordination/KeeperContext.cpp
+++ b/src/Coordination/KeeperContext.cpp
@@ -32,9 +32,17 @@ KeeperContext::KeeperContext(bool standalone_keeper_)
     system_nodes_with_data[keeper_api_version_path] = toString(static_cast<uint8_t>(KeeperApiVersion::WITH_MULTI_READ));
 }
 
-void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_)
+void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az)
 {
     dispatcher = dispatcher_;
+
+    /// We only use the environment availability zone when configuration option is missing.
+    auto keeper_az = config.getString("keeper_server.availability_zone", environment_az);
+    if (!keeper_az.empty())
+        system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
+    LOG_INFO(&Poco::Logger::get("KeeperContext"),
+        "Initialize the KeeperContext with availability zone: '{}', environment availability zone '{}'. ", keeper_az, environment_az);
+
     digest_enabled = config.getBool("keeper_server.digest_enabled", false);
     ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false);
 
diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h
index 2485f3d6c55..71f00ce5a93 100644
--- a/src/Coordination/KeeperContext.h
+++ b/src/Coordination/KeeperContext.h
@@ -3,7 +3,6 @@
 #include <Disks/DiskSelector.h>
 #include <IO/WriteBufferFromString.h>
 #include <Poco/Util/AbstractConfiguration.h>
-
 #include <cstdint>
 #include <memory>
 
@@ -24,7 +23,7 @@ public:
         SHUTDOWN
     };
 
-    void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_);
+    void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az);
 
     Phase getServerState() const;
     void setServerState(Phase server_state_);
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index ca454c18084..6d43445474d 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -11,6 +11,7 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/ProfileEvents.h>
 #include <Common/logger_useful.h>
+#include <IO/S3/Credentials.h>
 
 #include <atomic>
 #include <future>
@@ -370,7 +371,16 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
     configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
 
     keeper_context = std::make_shared<KeeperContext>(standalone_keeper);
-    keeper_context->initialize(config, this);
+    String availability_zone;
+    try
+    {
+        availability_zone = DB::S3::getRunningAvailabilityZone();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+    keeper_context->initialize(config, this, availability_zone);
 
     requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_request_queue_size);
     request_thread = ThreadFromGlobalPool([this] { requestThread(); });
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index a4bb65a2a72..0d1d07ec7c5 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -1081,7 +1081,8 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce
         Coordination::ZooKeeperGetRequest & request = dynamic_cast<Coordination::ZooKeeperGetRequest &>(*zk_request);
 
         if (request.path == Coordination::keeper_api_feature_flags_path
-            || request.path == Coordination::keeper_config_path)
+            || request.path == Coordination::keeper_config_path
+            || request.path == Coordination::keeper_availability_zone_path)
             return {};
 
         if (!storage.uncommitted_state.getNode(request.path))
diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index 1eea167e1b9..eb87568c8ab 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -1,4 +1,9 @@
+#include <exception>
+#include <variant>
 #include <IO/S3/Credentials.h>
+#include <boost/algorithm/string/classification.hpp>
+#include <Poco/Exception.h>
+#include "Common/Exception.h"
 
 #if USE_AWS_S3
 
@@ -11,6 +16,7 @@
 #    include <aws/core/utils/UUID.h>
 #    include <aws/core/http/HttpClientFactory.h>
 
+#    include <IO/S3/PocoHTTPClientFactory.h>
 #    include <aws/core/utils/HashingUtils.h>
 #    include <aws/core/platform/FileSystem.h>
 
@@ -22,6 +28,16 @@
 #    include <fstream>
 #    include <base/EnumReflection.h>
 
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/split.hpp>
+
+
+#include <Poco/URI.h>
+#include <Poco/Net/HTTPClientSession.h>
+#include <Poco/Net/HTTPRequest.h>
+#include <Poco/Net/HTTPResponse.h>
+#include <Poco/StreamCopier.h>
+
 
 namespace DB
 {
@@ -29,6 +45,8 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int AWS_ERROR;
+    extern const int GCP_ERROR;
+    extern const int UNSUPPORTED_METHOD;
 }
 
 namespace S3
@@ -151,30 +169,6 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const
     return GetResourceWithAWSWebServiceResult(credentials_request).GetPayload();
 }
 
-Aws::String AWSEC2MetadataClient::getCurrentAvailabilityZone() const
-{
-    String user_agent_string = awsComputeUserAgentString();
-    auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
-    if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
-        throw DB::Exception(ErrorCodes::AWS_ERROR,
-            "Failed to make token request. HTTP response code: {}", response_code);
-
-    token = std::move(new_token);
-    const String url = endpoint + EC2_AVAILABILITY_ZONE_RESOURCE;
-    std::shared_ptr<Aws::Http::HttpRequest> profile_request(
-        Aws::Http::CreateHttpRequest(url, Aws::Http::HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
-
-    profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, token);
-    profile_request->SetUserAgent(user_agent_string);
-
-    const auto result = GetResourceWithAWSWebServiceResult(profile_request);
-    if (result.GetResponseCode() != Aws::Http::HttpResponseCode::OK)
-        throw DB::Exception(ErrorCodes::AWS_ERROR,
-            "Failed to get availability zone. HTTP response code: {}", result.GetResponseCode());
-
-    return Aws::Utils::StringUtils::Trim(result.GetPayload().c_str());
-}
-
 std::pair<Aws::String, Aws::Http::HttpResponseCode> AWSEC2MetadataClient::getEC2MetadataToken(const std::string & user_agent_string) const
 {
     std::lock_guard locker(token_mutex);
@@ -199,10 +193,10 @@ Aws::String AWSEC2MetadataClient::getCurrentRegion() const
     return Aws::Region::AWS_GLOBAL;
 }
 
-std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
+static Aws::String getAWSMetadataEndpoint()
 {
-    Aws::String ec2_metadata_service_endpoint = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT");
     auto * logger = &Poco::Logger::get("AWSEC2InstanceProfileConfigLoader");
+    Aws::String ec2_metadata_service_endpoint = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT");
     if (ec2_metadata_service_endpoint.empty())
     {
         Aws::String ec2_metadata_service_endpoint_mode = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT_MODE");
@@ -233,8 +227,95 @@ std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::C
             }
         }
     }
-    LOG_INFO(logger, "Using IMDS endpoint: {}", ec2_metadata_service_endpoint);
-    return std::make_shared<AWSEC2MetadataClient>(client_configuration, ec2_metadata_service_endpoint.c_str());
+    return ec2_metadata_service_endpoint;
+}
+
+std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
+{
+    auto endpoint = getAWSMetadataEndpoint();
+    return std::make_shared<AWSEC2MetadataClient>(client_configuration, endpoint.c_str());
+}
+
+String AWSEC2MetadataClient::getAvailabilityZoneOrException()
+{
+    Poco::URI uri(getAWSMetadataEndpoint() + EC2_AVAILABILITY_ZONE_RESOURCE);
+    Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
+
+    Poco::Net::HTTPResponse response;
+    Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
+    session.sendRequest(request);
+
+    std::istream & rs = session.receiveResponse(response);
+    if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
+        throw DB::Exception(ErrorCodes::AWS_ERROR, "Failed to get AWS availability zone. HTTP response code: {}", response.getStatus());
+    String response_data;
+    Poco::StreamCopier::copyToString(rs, response_data);
+    return response_data;
+}
+
+String getGCPAvailabilityZoneOrException()
+{
+    Poco::URI uri(String(GCP_METADATA_SERVICE_ENDPOINT) + "/computeMetadata/v1/instance/zone");
+    Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
+    Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
+    Poco::Net::HTTPResponse response;
+    request.set("Metadata-Flavor", "Google");
+    session.sendRequest(request);
+    std::istream & rs = session.receiveResponse(response);
+    if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
+        throw DB::Exception(ErrorCodes::GCP_ERROR, "Failed to get GCP availability zone. HTTP response code: {}", response.getStatus());
+    String response_data;
+    Poco::StreamCopier::copyToString(rs, response_data);
+    Strings zone_info;
+    boost::split(zone_info, response_data, boost::is_any_of("/"));
+    /// We expect GCP returns a string as "projects/123456789/zones/us-central1a".
+    if (zone_info.size() != 4)
+        throw DB::Exception(ErrorCodes::GCP_ERROR, "Invalid format of GCP zone information, expect projects/<project-number>/zones/<zone-value>, got {}", response_data);
+    return zone_info[3];
+}
+
+String getRunningAvailabilityZoneImpl()
+{
+    LOG_INFO(&Poco::Logger::get("Application"), "Trying to detect the availability zone.");
+    try
+    {
+        auto aws_az = AWSEC2MetadataClient::getAvailabilityZoneOrException();
+        return aws_az;
+    }
+    catch (const DB::Exception & aws_ex)
+    {
+        try
+        {
+            auto gcp_zone = getGCPAvailabilityZoneOrException();
+            return gcp_zone;
+        }
+        catch (const DB::Exception & gcp_ex)
+        {
+            throw DB::Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                "Failed to find the availability zone, tried AWS and GCP. AWS Error: {}\nGCP Error: {}", aws_ex.displayText(), gcp_ex.displayText());
+        }
+    }
+}
+
+std::variant<String, std::exception_ptr> getRunningAvailabilityZoneImplOrException()
+{
+    try
+    {
+        return getRunningAvailabilityZoneImpl();
+    }
+    catch (...)
+    {
+        return std::current_exception();
+    }
+}
+
+String getRunningAvailabilityZone()
+{
+    static auto az_or_exception = getRunningAvailabilityZoneImplOrException();
+    if (const auto * az = std::get_if<String>(&az_or_exception))
+        return *az;
+    else
+        std::rethrow_exception(std::get<std::exception_ptr>(az_or_exception));
 }
 
 AWSEC2InstanceProfileConfigLoader::AWSEC2InstanceProfileConfigLoader(const std::shared_ptr<AWSEC2MetadataClient> & client_, bool use_secure_pull_)
@@ -703,7 +784,6 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
             aws_client_configuration.requestTimeoutMs = 1000;
 
             aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);
-
             auto ec2_metadata_client = InitEC2MetadataClient(aws_client_configuration);
             auto config_loader = std::make_shared<AWSEC2InstanceProfileConfigLoader>(ec2_metadata_client, !credentials_configuration.use_insecure_imds_request);
 
@@ -721,4 +801,21 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
 
 }
 
+#else
+
+namespace DB
+{
+
+namespace S3
+{
+
+String getRunningAvailabilityZone()
+{
+    throw Poco::Exception("Does not support availability zone detection for non-cloud environment");
+}
+
+}
+
+}
+
 #endif
diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h
index 0243e8e4986..a978679348f 100644
--- a/src/IO/S3/Credentials.h
+++ b/src/IO/S3/Credentials.h
@@ -1,5 +1,8 @@
 #pragma once
 
+#include <exception>
+#include <base/types.h>
+#include <variant>
 #include "config.h"
 
 #if USE_AWS_S3
@@ -18,6 +21,12 @@ namespace DB::S3
 
 inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120;
 
+/// In GCP metadata service can be accessed via DNS regardless of IPv4 or IPv6.
+static constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal";
+
+/// getRunningAvailabilityZone returns the availability zone of the underlying compute resources where the current process runs.
+String getRunningAvailabilityZone();
+
 class AWSEC2MetadataClient : public Aws::Internal::AWSHttpResourceClient
 {
     static constexpr char EC2_SECURITY_CREDENTIALS_RESOURCE[] = "/latest/meta-data/iam/security-credentials";
@@ -50,10 +59,11 @@ public:
 
     virtual Aws::String getCurrentRegion() const;
 
-    virtual Aws::String getCurrentAvailabilityZone() const;
+    friend String getRunningAvailabilityZoneImpl();
 
 private:
     std::pair<Aws::String, Aws::Http::HttpResponseCode> getEC2MetadataToken(const std::string & user_agent_string) const;
+    static String getAvailabilityZoneOrException();
 
     const Aws::String endpoint;
     mutable std::recursive_mutex token_mutex;
@@ -177,4 +187,15 @@ public:
 
 }
 
+#else
+
+namespace DB
+{
+
+namespace S3
+{
+String getRunningAvailabilityZone();
+}
+
+}
 #endif
diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml
index 7702aecba9c..29232989084 100644
--- a/tests/integration/helpers/keeper_config1.xml
+++ b/tests/integration/helpers/keeper_config1.xml
@@ -11,6 +11,7 @@
 
     <keeper_server>
         <tcp_port>2181</tcp_port>
+        <availability_zone>az-zoo1</availability_zone>
         <server_id>1</server_id>
 
         <coordination_settings>
diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml
index 2a1a1c1003c..2601efbb313 100644
--- a/tests/integration/helpers/keeper_config2.xml
+++ b/tests/integration/helpers/keeper_config2.xml
@@ -12,6 +12,7 @@
     <keeper_server>
         <tcp_port>2181</tcp_port>
         <server_id>2</server_id>
+        <availability_zone>az-zoo2</availability_zone>
 
         <coordination_settings>
             <operation_timeout_ms>10000</operation_timeout_ms>
diff --git a/tests/integration/test_keeper_availability_zone/__init__.py b/tests/integration/test_keeper_availability_zone/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_keeper_availability_zone/configs/keeper_config.xml b/tests/integration/test_keeper_availability_zone/configs/keeper_config.xml
new file mode 100644
index 00000000000..3cbf717bb67
--- /dev/null
+++ b/tests/integration/test_keeper_availability_zone/configs/keeper_config.xml
@@ -0,0 +1,2 @@
+<clickhouse>
+</clickhouse>
diff --git a/tests/integration/test_keeper_availability_zone/test.py b/tests/integration/test_keeper_availability_zone/test.py
new file mode 100644
index 00000000000..1836f0e679b
--- /dev/null
+++ b/tests/integration/test_keeper_availability_zone/test.py
@@ -0,0 +1,31 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+from helpers.keeper_utils import KeeperClient
+
+
+cluster = ClickHouseCluster(__file__)
+
+node = cluster.add_instance(
+    "node",
+    main_configs=["configs/keeper_config.xml"],
+    with_zookeeper=True,
+    stay_alive=True,
+)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_get_availability_zone():
+    with KeeperClient.from_cluster(cluster, "zoo1") as client1:
+        assert client1.get("/keeper/availability_zone") == "az-zoo1"
+
+    with KeeperClient.from_cluster(cluster, "zoo2") as client2:
+        assert client2.get("/keeper/availability_zone") == "az-zoo2"
diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index 71501133ae7..25c4bc55327 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -183,8 +183,8 @@ def test_cmd_mntr(started_cluster):
         # contains:
         #   10 nodes created by test
         #   3 nodes created by clickhouse "/clickhouse/task_queue/ddl"
-        #   1 root node, 3 keeper system nodes
-        assert int(result["zk_znode_count"]) == 14
+        #   1 root node, 4 keeper system nodes
+        assert int(result["zk_znode_count"]) == 15
         assert int(result["zk_watch_count"]) == 2
         assert int(result["zk_ephemerals_count"]) == 2
         assert int(result["zk_approximate_data_size"]) > 0
@@ -333,7 +333,7 @@ def test_cmd_srvr(started_cluster):
         assert int(result["Connections"]) == 1
         assert int(result["Zxid"], 16) > 10
         assert result["Mode"] == "leader"
-        assert result["Node count"] == "14"
+        assert result["Node count"] == "15"
 
     finally:
         destroy_zk_client(zk)
@@ -373,7 +373,7 @@ def test_cmd_stat(started_cluster):
         assert int(result["Connections"]) == 1
         assert int(result["Zxid"], 16) >= 10
         assert result["Mode"] == "leader"
-        assert result["Node count"] == "14"
+        assert result["Node count"] == "15"
 
         # filter connection statistics
         cons = [n for n in data.split("\n") if "=" in n]

From 0622379ec7079174bc0f1f34c27eff9ea5cb0721 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Mon, 13 Nov 2023 21:11:07 +0100
Subject: [PATCH 0472/1097] Revert "Add a setting max_execution_time_leaf to
 limit the execution time on shard for distributed query"

---
 .../operations/settings/query-complexity.md   | 22 +------------------
 src/Core/Settings.h                           | 16 ++++++--------
 .../ClusterProxy/SelectStreamFactory.cpp      | 13 +++++------
 .../ClusterProxy/executeQuery.cpp             |  8 -------
 .../02786_max_execution_time_leaf.reference   |  0
 .../02786_max_execution_time_leaf.sql         |  4 ----
 6 files changed, 13 insertions(+), 50 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02786_max_execution_time_leaf.reference
 delete mode 100644 tests/queries/0_stateless/02786_max_execution_time_leaf.sql

diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index 9e36aa26946..2211b0c2de2 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -172,27 +172,7 @@ If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use c
 
 ## timeout_overflow_mode {#timeout-overflow-mode}
 
-What to do if the query is run longer than `max_execution_time`: `throw` or `break`. By default, `throw`.
-
-# max_execution_time_leaf
-
-Similar semantic to `max_execution_time` but only apply on leaf node for distributed or remote queries.
-
-For example, if we want to limit execution time on leaf node to `10s` but no limit on the initial node, instead of having `max_execution_time` in the nested subquery settings:
-
-``` sql
-SELECT count() FROM cluster(cluster, view(SELECT * FROM t SETTINGS max_execution_time = 10));
-```
-
-We can use `max_execution_time_leaf` as the query settings:
-
-``` sql
-SELECT count() FROM cluster(cluster, view(SELECT * FROM t)) SETTINGS max_execution_time_leaf = 10;
-```
-
-# timeout_overflow_mode_leaf
-
-What to do when the query in leaf node run longer than `max_execution_time_leaf`: `throw` or `break`. By default, `throw`.
+What to do if the query is run longer than ‘max_execution_time’: ‘throw’ or ‘break’. By default, throw.
 
 ## min_execution_speed {#min-execution-speed}
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 0c9c1d21852..0a26e4ef5f3 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -364,16 +364,16 @@ class IColumn;
     M(UInt64, max_bytes_to_read, 0, "Limit on read bytes (after decompression) from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.", 0) \
     M(OverflowMode, read_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
     \
-    M(UInt64, max_rows_to_read_leaf, 0, "Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
-    M(UInt64, max_bytes_to_read_leaf, 0, "Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
+    M(UInt64, max_rows_to_read_leaf, 0, "Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
+    M(UInt64, max_bytes_to_read_leaf, 0, "Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
     M(OverflowMode, read_overflow_mode_leaf, OverflowMode::THROW, "What to do when the leaf limit is exceeded.", 0) \
     \
-    M(UInt64, max_rows_to_group_by, 0, "If aggregation during GROUP BY is generating more than the specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode.", 0) \
+    M(UInt64, max_rows_to_group_by, 0, "If aggregation during GROUP BY is generating more than specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode.", 0) \
     M(OverflowModeGroupBy, group_by_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
     M(UInt64, max_bytes_before_external_group_by, 0, "If memory usage during GROUP BY operation is exceeding this threshold in bytes, activate the 'external aggregation' mode (spill data to disk). Recommended value is half of available system memory.", 0) \
     \
-    M(UInt64, max_rows_to_sort, 0, "If more than the specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
-    M(UInt64, max_bytes_to_sort, 0, "If more than the specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
+    M(UInt64, max_rows_to_sort, 0, "If more than specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
+    M(UInt64, max_bytes_to_sort, 0, "If more than specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
     M(OverflowMode, sort_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
     M(UInt64, max_bytes_before_external_sort, 0, "If memory usage during ORDER BY operation is exceeding this threshold in bytes, activate the 'external sorting' mode (spill data to disk). Recommended value is half of available system memory.", 0) \
     M(UInt64, max_bytes_before_remerge_sort, 1000000000, "In case of ORDER BY with LIMIT, when memory usage is higher than specified threshold, perform additional steps of merging blocks before final merge to keep just top LIMIT rows.", 0) \
@@ -384,10 +384,8 @@ class IColumn;
     M(OverflowMode, result_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
     \
     /* TODO: Check also when merging and finalizing aggregate functions. */ \
-    M(Seconds, max_execution_time, 0, "If query runtime exceeds the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode', which by default is - throw an exception. Note that the timeout is checked and query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting.", 0) \
+    M(Seconds, max_execution_time, 0, "If query run time exceeded the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode' which by default is - throw an exception. Note that the timeout is checked and query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting.", 0) \
     M(OverflowMode, timeout_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
-    M(Seconds, max_execution_time_leaf, 0, "Similar semantic to max_execution_time but only apply on leaf node for distributed queries, the time out behavior will be determined by 'timeout_overflow_mode_leaf' which by default is - throw an exception", 0) \
-    M(OverflowMode, timeout_overflow_mode_leaf, OverflowMode::THROW, "What to do when the leaf limit is exceeded.", 0) \
     \
     M(UInt64, min_execution_speed, 0, "Minimum number of execution rows per second.", 0) \
     M(UInt64, max_execution_speed, 0, "Maximum number of execution rows per second.", 0) \
@@ -401,7 +399,7 @@ class IColumn;
     \
     M(UInt64, max_sessions_for_user, 0, "Maximum number of simultaneous sessions for a user.", 0) \
     \
-    M(UInt64, max_subquery_depth, 100, "If a query has more than the specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.", 0) \
+    M(UInt64, max_subquery_depth, 100, "If a query has more than specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.", 0) \
     M(UInt64, max_analyze_depth, 5000, "Maximum number of analyses performed by interpreter.", 0) \
     M(UInt64, max_ast_depth, 1000, "Maximum depth of query syntax tree. Checked after parsing.", 0) \
     M(UInt64, max_ast_elements, 50000, "Maximum size of query syntax tree in number of nodes. Checked after parsing.", 0) \
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 4edc9d4d4e5..3935028f27c 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -1,21 +1,20 @@
+#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
+#include <Interpreters/Cluster.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <Common/checkStackSize.h>
-#include <Common/logger_useful.h>
-#include <Common/FailPoint.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <IO/ConnectionTimeouts.h>
-#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
-#include <Interpreters/Cluster.h>
 #include <Interpreters/AddDefaultDatabaseVisitor.h>
 #include <Interpreters/RequiredSourceColumnsVisitor.h>
 #include <Interpreters/TranslateQualifiedNamesVisitor.h>
 #include <DataTypes/ObjectUtils.h>
+
 #include <Client/IConnections.h>
-#include <Parsers/ASTSelectQuery.h>
-#include <Parsers/ASTSetQuery.h>
+#include <Common/logger_useful.h>
+#include <Common/FailPoint.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/ReadFromRemote.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
@@ -23,7 +22,6 @@
 #include <Processors/QueryPlan/DistributedCreateLocalPlan.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 
-
 namespace ProfileEvents
 {
     extern const Event DistributedConnectionMissingTable;
@@ -123,7 +121,6 @@ void SelectStreamFactory::createForShard(
     if (it != objects_by_shard.end())
         replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, query_ast);
 
-
     auto emplace_local_stream = [&]()
     {
         local_plans.emplace_back(createLocalPlan(
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 420bb447027..41235d107cd 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -141,14 +141,6 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster,
             new_settings.allow_experimental_parallel_reading_from_replicas = false;
     }
 
-    if (settings.max_execution_time_leaf.value > 0)
-    {
-        /// Replace 'max_execution_time' of this sub-query with 'max_execution_time_leaf' and 'timeout_overflow_mode'
-        /// with 'timeout_overflow_mode_leaf'
-        new_settings.max_execution_time = settings.max_execution_time_leaf;
-        new_settings.timeout_overflow_mode = settings.timeout_overflow_mode_leaf;
-    }
-
     auto new_context = Context::createCopy(context);
     new_context->setSettings(new_settings);
     return new_context;
diff --git a/tests/queries/0_stateless/02786_max_execution_time_leaf.reference b/tests/queries/0_stateless/02786_max_execution_time_leaf.reference
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql
deleted file mode 100644
index 1d02e82569c..00000000000
--- a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql
+++ /dev/null
@@ -1,4 +0,0 @@
--- Tags: no-fasttest
-SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) SETTINGS max_execution_time_leaf = 1; -- { serverError 159 }
--- Can return partial result
-SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) FORMAT Null SETTINGS max_execution_time_leaf = 1, timeout_overflow_mode_leaf = 'break';

From de42b7f9e0020e71e94a4b09062b110e9d014690 Mon Sep 17 00:00:00 2001
From: justindeguzman <justin@justindeguzman.net>
Date: Mon, 13 Nov 2023 12:57:05 -0800
Subject: [PATCH 0473/1097] [Docs] Add details to S3 and GCS table functions

---
 docs/en/sql-reference/table-functions/gcs.md       | 2 ++
 docs/en/sql-reference/table-functions/s3.md        | 2 ++
 docs/en/sql-reference/table-functions/s3Cluster.md | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md
index c49ae6a8501..1cd1bfd3aae 100644
--- a/docs/en/sql-reference/table-functions/gcs.md
+++ b/docs/en/sql-reference/table-functions/gcs.md
@@ -9,6 +9,8 @@ keywords: [gcs, bucket]
 
 Provides a table-like interface to `SELECT` and `INSERT` data from [Google Cloud Storage](https://cloud.google.com/storage/). Requires the [`Storage Object User` IAM role](https://cloud.google.com/storage/docs/access-control/iam-roles).
 
+If you have multiple replicas in your cluster, you can use the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md) (which works with GCS) instead to parallelize inserts.
+
 **Syntax**
 
 ``` sql
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 8649295e815..9263e10540f 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -9,6 +9,8 @@ keywords: [s3, gcs, bucket]
 
 Provides a table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/) and [Google Cloud Storage](https://cloud.google.com/storage/). This table function is similar to the [hdfs function](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features.
 
+If you have multiple replicas in your cluster, you can use the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md) instead to parallelize inserts.
+
 **Syntax**
 
 ``` sql
diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md
index 675aef54d34..799eb31446a 100644
--- a/docs/en/sql-reference/table-functions/s3Cluster.md
+++ b/docs/en/sql-reference/table-functions/s3Cluster.md
@@ -5,7 +5,7 @@ sidebar_label: s3Cluster
 title: "s3Cluster Table Function"
 ---
 
-Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
+Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) and Google Cloud Storage [Google Cloud Storage](https://cloud.google.com/storage/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
 
 **Syntax**
 

From bdb624f20f208e571a8b7b85882eca3c8b5df480 Mon Sep 17 00:00:00 2001
From: justindeguzman <justin@justindeguzman.net>
Date: Mon, 13 Nov 2023 13:03:25 -0800
Subject: [PATCH 0474/1097] [Docs] Add details on S3 table function memory
 usage/streaming

---
 docs/en/sql-reference/table-functions/gcs.md | 2 ++
 docs/en/sql-reference/table-functions/s3.md  | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md
index 1cd1bfd3aae..5ffc20189da 100644
--- a/docs/en/sql-reference/table-functions/gcs.md
+++ b/docs/en/sql-reference/table-functions/gcs.md
@@ -9,6 +9,8 @@ keywords: [gcs, bucket]
 
 Provides a table-like interface to `SELECT` and `INSERT` data from [Google Cloud Storage](https://cloud.google.com/storage/). Requires the [`Storage Object User` IAM role](https://cloud.google.com/storage/docs/access-control/iam-roles).
 
+This is an alias of the [s3 table function](../../sql-reference/table-functions/s3.md).
+
 If you have multiple replicas in your cluster, you can use the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md) (which works with GCS) instead to parallelize inserts.
 
 **Syntax**
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 9263e10540f..c80488df05e 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -11,6 +11,8 @@ Provides a table-like interface to select/insert files in [Amazon S3](https://aw
 
 If you have multiple replicas in your cluster, you can use the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md) instead to parallelize inserts.
 
+When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-reference/statements/insert-into#inserting-the-results-of-select), data is read and inserted in a streaming fashion. Only a few blocks of data reside in memory while the blocks are continuously read from S3 and pushed into the destination table.
+
 **Syntax**
 
 ``` sql

From 207c4c50cb3ec790aa168113adec9212674f922b Mon Sep 17 00:00:00 2001
From: Ethan Shea <142333519+ethshea@users.noreply.github.com>
Date: Mon, 13 Nov 2023 16:12:05 -0500
Subject: [PATCH 0475/1097] Improve legibility of window functions list

Before, markdown was rendering these as one big paragraph.
---
 docs/en/sql-reference/window-functions/index.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index a8f494a5afc..7a18335fbdb 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -86,14 +86,14 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
 
 These functions can be used only as a window function.
 
-`row_number()` -	Number the current row within its partition starting from 1.
-`first_value(x)` -	Return the first non-NULL value evaluated within its ordered frame.
-`last_value(x)` -	Return the last non-NULL value evaluated within its ordered frame.
-`nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
-`rank()` -	Rank the current row within its partition with gaps.
-`dense_rank()`	- Rank the current row within its partition without gaps.
-`lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
-`leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
+- `row_number()` -	Number the current row within its partition starting from 1.
+- `first_value(x)` -	Return the first non-NULL value evaluated within its ordered frame.
+- `last_value(x)` -	Return the last non-NULL value evaluated within its ordered frame.
+- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
+- `rank()` -	Rank the current row within its partition with gaps.
+- `dense_rank()`	- Rank the current row within its partition without gaps.
+- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
+- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
 
 ```text
       PARTITION

From 37dc535d780ed2f515b3a904a14b44555d232435 Mon Sep 17 00:00:00 2001
From: Ethan Shea <142333519+ethshea@users.noreply.github.com>
Date: Mon, 13 Nov 2023 16:16:07 -0500
Subject: [PATCH 0476/1097] Remove tabs

---
 docs/en/sql-reference/window-functions/index.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index 7a18335fbdb..6340c369bff 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -86,12 +86,12 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
 
 These functions can be used only as a window function.
 
-- `row_number()` -	Number the current row within its partition starting from 1.
-- `first_value(x)` -	Return the first non-NULL value evaluated within its ordered frame.
+- `row_number()` - Number the current row within its partition starting from 1.
+- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame.
 - `last_value(x)` -	Return the last non-NULL value evaluated within its ordered frame.
 - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
-- `rank()` -	Rank the current row within its partition with gaps.
-- `dense_rank()`	- Rank the current row within its partition without gaps.
+- `rank()` - Rank the current row within its partition with gaps.
+- `dense_rank()` - Rank the current row within its partition without gaps.
 - `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
 - `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
 

From a09437c81630e522345cc805be010d200166e61e Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 13 Nov 2023 22:23:38 +0000
Subject: [PATCH 0477/1097] Fix possible 'uncaught exception of type
 std::__1::system_error: mutex lock failed: Invalid argument' in
 clickhouse-local

---
 src/Client/LocalConnection.cpp                            | 2 ++
 .../02916_local_insert_into_function.reference            | 0
 .../0_stateless/02916_local_insert_into_function.sh       | 8 ++++++++
 3 files changed, 10 insertions(+)
 create mode 100644 tests/queries/0_stateless/02916_local_insert_into_function.reference
 create mode 100755 tests/queries/0_stateless/02916_local_insert_into_function.sh

diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp
index d01c40a9c34..849308155b0 100644
--- a/src/Client/LocalConnection.cpp
+++ b/src/Client/LocalConnection.cpp
@@ -251,10 +251,12 @@ void LocalConnection::finishQuery()
     else if (state->pushing_async_executor)
     {
         state->pushing_async_executor->finish();
+        state->pushing_async_executor.reset();
     }
     else if (state->pushing_executor)
     {
         state->pushing_executor->finish();
+        state->pushing_executor.reset();
     }
 
     state->io.onFinish();
diff --git a/tests/queries/0_stateless/02916_local_insert_into_function.reference b/tests/queries/0_stateless/02916_local_insert_into_function.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02916_local_insert_into_function.sh b/tests/queries/0_stateless/02916_local_insert_into_function.sh
new file mode 100755
index 00000000000..2eca2c2ce0a
--- /dev/null
+++ b/tests/queries/0_stateless/02916_local_insert_into_function.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "insert into function file('/dev/null', CSV, 'c1 UInt32') values (42)"
+

From 554d9071898f13082576145e6c7409c1964ef2e6 Mon Sep 17 00:00:00 2001
From: Jianfei Hu <hujianfei258@gmail.com>
Date: Mon, 13 Nov 2023 23:42:51 +0000
Subject: [PATCH 0478/1097] Fix the keeper_server availability zone
 configuration.

Signed-off-by: Jianfei Hu <hujianfei258@gmail.com>
---
 src/Coordination/KeeperContext.cpp            | 29 ++++++++++++++-----
 src/Coordination/KeeperContext.h              |  2 +-
 src/Coordination/KeeperDispatcher.cpp         | 12 +-------
 src/IO/S3/Credentials.cpp                     |  5 ++++
 tests/integration/helpers/keeper_config1.xml  |  4 ++-
 tests/integration/helpers/keeper_config2.xml  |  4 ++-
 .../test_keeper_availability_zone/test.py     |  5 ++++
 7 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp
index 1cee2a8e446..9745a53d1ab 100644
--- a/src/Coordination/KeeperContext.cpp
+++ b/src/Coordination/KeeperContext.cpp
@@ -3,6 +3,7 @@
 #include <Coordination/Defines.h>
 #include <Disks/DiskLocal.h>
 #include <Interpreters/Context.h>
+#include <IO/S3/Credentials.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Coordination/KeeperConstants.h>
 #include <Common/logger_useful.h>
@@ -32,16 +33,30 @@ KeeperContext::KeeperContext(bool standalone_keeper_)
     system_nodes_with_data[keeper_api_version_path] = toString(static_cast<uint8_t>(KeeperApiVersion::WITH_MULTI_READ));
 }
 
-void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az)
+void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_)
 {
     dispatcher = dispatcher_;
 
-    /// We only use the environment availability zone when configuration option is missing.
-    auto keeper_az = config.getString("keeper_server.availability_zone", environment_az);
-    if (!keeper_az.empty())
-        system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
-    LOG_INFO(&Poco::Logger::get("KeeperContext"),
-        "Initialize the KeeperContext with availability zone: '{}', environment availability zone '{}'. ", keeper_az, environment_az);
+    if (config.hasProperty("keeper_server.availability_zone"))
+    {
+        auto keeper_az = config.getString("keeper_server.availability_zone.value");
+        if (config.getBool("keeper_server.availability_zone.enable_auto_detection_on_cloud", false))
+        {
+            try
+            {
+                keeper_az = DB::S3::getRunningAvailabilityZone();
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+        if (!keeper_az.empty())
+        {
+            system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
+            LOG_INFO(&Poco::Logger::get("KeeperContext"), "Initialize the KeeperContext with availability zone: '{}'.'. ", keeper_az);
+        }
+    }
 
     digest_enabled = config.getBool("keeper_server.digest_enabled", false);
     ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false);
diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h
index 71f00ce5a93..1af34b19ccf 100644
--- a/src/Coordination/KeeperContext.h
+++ b/src/Coordination/KeeperContext.h
@@ -23,7 +23,7 @@ public:
         SHUTDOWN
     };
 
-    void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_, const std::string & environment_az);
+    void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_);
 
     Phase getServerState() const;
     void setServerState(Phase server_state_);
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index 6d43445474d..ca454c18084 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -11,7 +11,6 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/ProfileEvents.h>
 #include <Common/logger_useful.h>
-#include <IO/S3/Credentials.h>
 
 #include <atomic>
 #include <future>
@@ -371,16 +370,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
     configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
 
     keeper_context = std::make_shared<KeeperContext>(standalone_keeper);
-    String availability_zone;
-    try
-    {
-        availability_zone = DB::S3::getRunningAvailabilityZone();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
-    keeper_context->initialize(config, this, availability_zone);
+    keeper_context->initialize(config, this);
 
     requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_request_queue_size);
     request_thread = ThreadFromGlobalPool([this] { requestThread(); });
diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index eb87568c8ab..4ba14572589 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -65,6 +65,7 @@ bool areCredentialsEmptyOrExpired(const Aws::Auth::AWSCredentials & credentials,
 }
 
 const char SSO_CREDENTIALS_PROVIDER_LOG_TAG[] = "SSOCredentialsProvider";
+const int AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS = 5;
 
 }
 
@@ -244,6 +245,7 @@ String AWSEC2MetadataClient::getAvailabilityZoneOrException()
     Poco::Net::HTTPResponse response;
     Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
     session.sendRequest(request);
+    session.setTimeout(Poco::Timespan(AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS, 0));
 
     std::istream & rs = session.receiveResponse(response);
     if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
@@ -257,9 +259,12 @@ String getGCPAvailabilityZoneOrException()
 {
     Poco::URI uri(String(GCP_METADATA_SERVICE_ENDPOINT) + "/computeMetadata/v1/instance/zone");
     Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
+    session.setTimeout(Poco::Timespan(AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS, 0));
+
     Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
     Poco::Net::HTTPResponse response;
     request.set("Metadata-Flavor", "Google");
+
     session.sendRequest(request);
     std::istream & rs = session.receiveResponse(response);
     if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml
index 29232989084..12c6c0b78b6 100644
--- a/tests/integration/helpers/keeper_config1.xml
+++ b/tests/integration/helpers/keeper_config1.xml
@@ -11,7 +11,9 @@
 
     <keeper_server>
         <tcp_port>2181</tcp_port>
-        <availability_zone>az-zoo1</availability_zone>
+        <availability_zone>
+            <value>az-zoo1</value>
+        </availability_zone>
         <server_id>1</server_id>
 
         <coordination_settings>
diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml
index 2601efbb313..0c58aaceb1c 100644
--- a/tests/integration/helpers/keeper_config2.xml
+++ b/tests/integration/helpers/keeper_config2.xml
@@ -12,7 +12,9 @@
     <keeper_server>
         <tcp_port>2181</tcp_port>
         <server_id>2</server_id>
-        <availability_zone>az-zoo2</availability_zone>
+        <availability_zone>
+            <value>az-zoo2</value>
+        </availability_zone>
 
         <coordination_settings>
             <operation_timeout_ms>10000</operation_timeout_ms>
diff --git a/tests/integration/test_keeper_availability_zone/test.py b/tests/integration/test_keeper_availability_zone/test.py
index 1836f0e679b..b78e776f3c6 100644
--- a/tests/integration/test_keeper_availability_zone/test.py
+++ b/tests/integration/test_keeper_availability_zone/test.py
@@ -29,3 +29,8 @@ def test_get_availability_zone():
 
     with KeeperClient.from_cluster(cluster, "zoo2") as client2:
         assert client2.get("/keeper/availability_zone") == "az-zoo2"
+    
+    # keeper3 is not configured with availability_zone value.
+    with KeeperClient.from_cluster(cluster, "zoo3") as client3:
+        with pytest.raises(Exception):
+            client3.get("/keeper/availability_zone")
\ No newline at end of file

From 8c9f12fe17ecdf3b4449a8a2bd699cf0cea4852d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 14 Nov 2023 08:05:33 +0300
Subject: [PATCH 0479/1097] Update wide_integer_impl.h

---
 base/base/wide_integer_impl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h
index 8d1da7de642..c1fd7b69b7f 100644
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@@ -104,7 +104,7 @@ public:
 
     static constexpr wide::integer<Bits, Signed> min() noexcept
     {
-        if (is_same_v<Signed, signed>)
+        if constexpr (is_same_v<Signed, signed>)
         {
             using T = wide::integer<Bits, signed>;
             T res{};

From 32dd1b26b305a323144f77f3d6081746f4c28578 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 14 Nov 2023 06:26:05 +0100
Subject: [PATCH 0480/1097] Make autogenerated file as light as possible

---
 src/Daemon/BaseDaemon.cpp | 2 +-
 src/Daemon/GitHash.cpp.in | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index 8833156386f..7d19b935c2d 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -967,7 +967,7 @@ static void blockSignals(const std::vector<int> & signals)
         throw Poco::Exception("Cannot block signal.");
 }
 
-extern String getGitHash();
+extern const char * getGitHash();
 
 void BaseDaemon::initializeTerminationAndSignalProcessing()
 {
diff --git a/src/Daemon/GitHash.cpp.in b/src/Daemon/GitHash.cpp.in
index 7e33682d670..65937af8848 100644
--- a/src/Daemon/GitHash.cpp.in
+++ b/src/Daemon/GitHash.cpp.in
@@ -1,8 +1,6 @@
 /// This file was autogenerated by CMake
 
-#include <base/types.h>
-
-String getGitHash()
+const char * getGitHash()
 {
     return "@GIT_HASH@";
 }

From cb74f52c5cec99d674068b7304d150c2a5bd1e23 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 14 Nov 2023 06:27:39 +0100
Subject: [PATCH 0481/1097] Make autogenerated file as light as possible

---
 src/Daemon/BaseDaemon.cpp | 4 ++--
 src/Daemon/GitHash.cpp.in | 5 +----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index 7d19b935c2d..d66bdf3583f 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -967,7 +967,7 @@ static void blockSignals(const std::vector<int> & signals)
         throw Poco::Exception("Cannot block signal.");
 }
 
-extern const char * getGitHash();
+extern const char * GIT_HASH;
 
 void BaseDaemon::initializeTerminationAndSignalProcessing()
 {
@@ -1007,7 +1007,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
     build_id = "";
 #endif
 
-    git_hash = getGitHash();
+    git_hash = GIT_HASH;
 
 #if defined(OS_LINUX)
     std::string executable_path = getExecutablePath();
diff --git a/src/Daemon/GitHash.cpp.in b/src/Daemon/GitHash.cpp.in
index 65937af8848..c5990531449 100644
--- a/src/Daemon/GitHash.cpp.in
+++ b/src/Daemon/GitHash.cpp.in
@@ -1,6 +1,3 @@
 /// This file was autogenerated by CMake
 
-const char * getGitHash()
-{
-    return "@GIT_HASH@";
-}
+const char * GIT_HASH = "@GIT_HASH@";

From 80bbe7dcc6a3d6f381325ae93a1a59f41904fe91 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 14 Nov 2023 06:55:06 +0100
Subject: [PATCH 0482/1097] Less CMake checks

---
 base/glibc-compatibility/CMakeLists.txt |   8 --
 cmake/cpu_features.cmake                | 180 ++++--------------------
 contrib/aws-cmake/AwsFeatureTests.cmake | 115 +--------------
 contrib/aws-cmake/AwsSIMD.cmake         |  51 +------
 contrib/libunwind-cmake/CMakeLists.txt  |  23 +--
 5 files changed, 38 insertions(+), 339 deletions(-)

diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt
index 0539f0c231d..65677ed2cf3 100644
--- a/base/glibc-compatibility/CMakeLists.txt
+++ b/base/glibc-compatibility/CMakeLists.txt
@@ -5,9 +5,6 @@ if (GLIBC_COMPATIBILITY)
     endif()
 
     enable_language(ASM)
-    include(CheckIncludeFile)
-
-    check_include_file("sys/random.h" HAVE_SYS_RANDOM_H)
 
     add_headers_and_sources(glibc_compatibility .)
     add_headers_and_sources(glibc_compatibility musl)
@@ -21,11 +18,6 @@ if (GLIBC_COMPATIBILITY)
         message (FATAL_ERROR "glibc_compatibility can only be used on x86_64 or aarch64.")
     endif ()
 
-    list(REMOVE_ITEM glibc_compatibility_sources musl/getentropy.c)
-    if(HAVE_SYS_RANDOM_H)
-        list(APPEND glibc_compatibility_sources musl/getentropy.c)
-    endif()
-
     # Need to omit frame pointers to match the performance of glibc
     set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer")
 
diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake
index 9fc3960c166..765e36403ad 100644
--- a/cmake/cpu_features.cmake
+++ b/cmake/cpu_features.cmake
@@ -1,10 +1,5 @@
 # https://software.intel.com/sites/landingpage/IntrinsicsGuide/
 
-include (CheckCXXSourceCompiles)
-include (CMakePushCheckState)
-
-cmake_push_check_state ()
-
 # The variables HAVE_* determine if compiler has support for the flag to use the corresponding instruction set.
 # The options ENABLE_* determine if we will tell compiler to actually use the corresponding instruction set if compiler can do it.
 
@@ -137,178 +132,53 @@ elseif (ARCH_AMD64)
     endif()
 
     # ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/
-    # AVX. We only check that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
-    # Therefore, use check_cxx_source_compiles (= does the code compile+link?) instead of check_cxx_source_runs (= does the code
-    # compile+link+run).
+    # AVX. We only assume that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
+
+    SET (HAVE_SSSE3 1)
+    SET (HAVE_SSE41 1)
+    SET (HAVE_SSE42 1)
+    SET (HAVE_PCLMULQDQ 1)
+    SET (HAVE_POPCNT 1)
+    SET (HAVE_AVX 1)
+    SET (HAVE_AVX2 1)
+    SET (HAVE_AVX512 1)
+    SET (HAVE_AVX512_VBMI 1)
+    SET (HAVE_BMI 1)
+    SET (HAVE_BMI2 1)
 
-    set (TEST_FLAG "-mssse3")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <tmmintrin.h>
-        int main() {
-            __m64 a = _mm_abs_pi8(__m64());
-            (void)a;
-            return 0;
-        }
-    " HAVE_SSSE3)
     if (HAVE_SSSE3 AND ENABLE_SSSE3)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mssse3")
     endif ()
-
-    set (TEST_FLAG "-msse4.1")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <smmintrin.h>
-        int main() {
-            auto a = _mm_insert_epi8(__m128i(), 0, 0);
-            (void)a;
-            return 0;
-        }
-    " HAVE_SSE41)
     if (HAVE_SSE41 AND ENABLE_SSE41)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -msse4.1")
     endif ()
-
-    set (TEST_FLAG "-msse4.2")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <nmmintrin.h>
-        int main() {
-            auto a = _mm_crc32_u64(0, 0);
-            (void)a;
-            return 0;
-        }
-    " HAVE_SSE42)
     if (HAVE_SSE42 AND ENABLE_SSE42)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -msse4.2")
     endif ()
-
-    set (TEST_FLAG "-mpclmul")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <wmmintrin.h>
-        int main() {
-            auto a = _mm_clmulepi64_si128(__m128i(), __m128i(), 0);
-            (void)a;
-            return 0;
-        }
-    " HAVE_PCLMULQDQ)
     if (HAVE_PCLMULQDQ AND ENABLE_PCLMULQDQ)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpclmul")
     endif ()
-
-    set (TEST_FLAG "-mpopcnt")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        int main() {
-            auto a = __builtin_popcountll(0);
-            (void)a;
-            return 0;
-        }
-    " HAVE_POPCNT)
     if (HAVE_POPCNT AND ENABLE_POPCNT)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpopcnt")
     endif ()
-
-    set (TEST_FLAG "-mavx")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <immintrin.h>
-        int main() {
-            auto a = _mm256_insert_epi8(__m256i(), 0, 0);
-            (void)a;
-            return 0;
-        }
-    " HAVE_AVX)
     if (HAVE_AVX AND ENABLE_AVX)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx")
     endif ()
-
-    set (TEST_FLAG "-mavx2")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <immintrin.h>
-        int main() {
-            auto a = _mm256_add_epi16(__m256i(), __m256i());
-            (void)a;
-            return 0;
-        }
-    " HAVE_AVX2)
     if (HAVE_AVX2 AND ENABLE_AVX2)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx2")
     endif ()
-
-    set (TEST_FLAG "-mavx512f -mavx512bw -mavx512vl")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <immintrin.h>
-        int main() {
-            auto a = _mm512_setzero_epi32();
-            (void)a;
-            auto b = _mm512_add_epi16(__m512i(), __m512i());
-            (void)b;
-            auto c = _mm_cmp_epi8_mask(__m128i(), __m128i(), 0);
-            (void)c;
-            return 0;
-        }
-    " HAVE_AVX512)
     if (HAVE_AVX512 AND ENABLE_AVX512)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512f -mavx512bw -mavx512vl")
     endif ()
-
-    set (TEST_FLAG "-mavx512vbmi")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <immintrin.h>
-        int main() {
-            auto a = _mm512_permutexvar_epi8(__m512i(), __m512i());
-            (void)a;
-            return 0;
-        }
-    " HAVE_AVX512_VBMI)
     if (HAVE_AVX512 AND ENABLE_AVX512 AND HAVE_AVX512_VBMI AND ENABLE_AVX512_VBMI)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512vbmi")
     endif ()
-
-    set (TEST_FLAG "-mbmi")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <immintrin.h>
-        int main() {
-            auto a = _blsr_u32(0);
-            (void)a;
-            return 0;
-        }
-    " HAVE_BMI)
     if (HAVE_BMI AND ENABLE_BMI)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi")
     endif ()
-
-    set (TEST_FLAG "-mbmi2")
-    set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
-    check_cxx_source_compiles("
-        #include <immintrin.h>
-        int main() {
-            auto a = _pdep_u64(0, 0);
-            (void)a;
-            return 0;
-        }
-    " HAVE_BMI2)
     if (HAVE_BMI2 AND HAVE_AVX2 AND ENABLE_AVX2 AND ENABLE_BMI2)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi2")
     endif ()
-
-    # Limit avx2/avx512 flag for specific source build
-    set (X86_INTRINSICS_FLAGS "")
-    if (ENABLE_AVX2_FOR_SPEC_OP)
-        if (HAVE_BMI)
-            set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi")
-        endif ()
-        if (HAVE_AVX AND HAVE_AVX2)
-            set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx -mavx2")
-        endif ()
-    endif ()
-
     if (ENABLE_AVX512_FOR_SPEC_OP)
         set (X86_INTRINSICS_FLAGS "")
         if (HAVE_BMI)
@@ -321,5 +191,3 @@ elseif (ARCH_AMD64)
 else ()
     # RISC-V + exotic platforms
 endif ()
-
-cmake_pop_check_state ()
diff --git a/contrib/aws-cmake/AwsFeatureTests.cmake b/contrib/aws-cmake/AwsFeatureTests.cmake
index 54727e08d6b..e58b6634f42 100644
--- a/contrib/aws-cmake/AwsFeatureTests.cmake
+++ b/contrib/aws-cmake/AwsFeatureTests.cmake
@@ -1,114 +1,13 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0.
 
-include(CheckCSourceRuns)
-
 option(USE_CPU_EXTENSIONS "Whenever possible, use functions optimized for CPUs with specific extensions (ex: SSE, AVX)." ON)
 
-# In the current (11/2/21) state of mingw64, the packaged gcc is not capable of emitting properly aligned avx2 instructions under certain circumstances.
-# This leads to crashes for windows builds using mingw64 when invoking the avx2-enabled versions of certain functions.  Until we can find a better
-# work-around, disable avx2 (and all other extensions) in mingw builds.
-#
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
-#
-if (MINGW)
-    message(STATUS "MINGW detected!  Disabling avx2 and other CPU extensions")
-    set(USE_CPU_EXTENSIONS OFF)
-endif()
+if (ARCH_AMD64)
+    set (AWS_ARCH_INTEL 1)
+elseif (ARCH_AARCH64)
+    set (AWS_ARCH_ARM64 1)
+endif ()
 
-if(NOT CMAKE_CROSSCOMPILING)
-    check_c_source_runs("
-    #include <stdbool.h>
-    bool foo(int a, int b, int *c) {
-        return __builtin_mul_overflow(a, b, c);
-    }
-
-    int main() {
-        int out;
-        if (foo(1, 2, &out)) {
-            return 0;
-        }
-
-        return 0;
-    }" AWS_HAVE_GCC_OVERFLOW_MATH_EXTENSIONS)
-
-    if (USE_CPU_EXTENSIONS)
-        check_c_source_runs("
-        int main() {
-        int foo = 42;
-        _mulx_u32(1, 2, &foo);
-        return foo != 2;
-        }" AWS_HAVE_MSVC_MULX)
-    endif()
-
-endif()
-
-check_c_source_compiles("
-    #include <Windows.h>
-    #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
-    int main() {
-        return 0;
-    }
-    #else
-    it's not windows desktop
-    #endif
-" AWS_HAVE_WINAPI_DESKTOP)
-
-check_c_source_compiles("
-    int main() {
-#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86))
-#    error \"not intel\"
-#endif
-        return 0;
-    }
-" AWS_ARCH_INTEL)
-
-check_c_source_compiles("
-    int main() {
-#if !(defined(__aarch64__) || defined(_M_ARM64))
-#    error \"not arm64\"
-#endif
-        return 0;
-    }
-" AWS_ARCH_ARM64)
-
-check_c_source_compiles("
-    int main() {
-#if !(defined(__arm__) || defined(_M_ARM))
-#    error \"not arm\"
-#endif
-        return 0;
-    }
-" AWS_ARCH_ARM32)
-
-check_c_source_compiles("
-int main() {
-    int foo = 42, bar = 24;
-    __asm__ __volatile__(\"\":\"=r\"(foo):\"r\"(bar):\"memory\");
-}" AWS_HAVE_GCC_INLINE_ASM)
-
-check_c_source_compiles("
-#include <sys/auxv.h>
-int main() {
-#ifdef __linux__
-    getauxval(AT_HWCAP);
-    getauxval(AT_HWCAP2);
-#endif
-    return 0;
-}" AWS_HAVE_AUXV)
-
-string(REGEX MATCH "^(aarch64|arm)" ARM_CPU "${CMAKE_SYSTEM_PROCESSOR}")
-if(NOT LEGACY_COMPILER_SUPPORT OR ARM_CPU)
-    check_c_source_compiles("
-    #include <execinfo.h>
-    int main() {
-        backtrace(NULL, 0);
-        return 0;
-    }" AWS_HAVE_EXECINFO)
-endif()
-
-check_c_source_compiles("
-#include <linux/if_link.h>
-int main() {
-    return 1;
-}" AWS_HAVE_LINUX_IF_LINK_H)
+set (AWS_HAVE_GCC_INLINE_ASM 1)
+set (AWS_HAVE_AUXV 1)
diff --git a/contrib/aws-cmake/AwsSIMD.cmake b/contrib/aws-cmake/AwsSIMD.cmake
index bd6f4064e78..a2f50f27d4e 100644
--- a/contrib/aws-cmake/AwsSIMD.cmake
+++ b/contrib/aws-cmake/AwsSIMD.cmake
@@ -1,54 +1,13 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0.
 
-include(CheckCCompilerFlag)
-include(CheckIncludeFile)
-
 if (USE_CPU_EXTENSIONS)
-    if (MSVC)
-        check_c_compiler_flag("/arch:AVX2" HAVE_M_AVX2_FLAG)
-        if (HAVE_M_AVX2_FLAG)
-            set(AVX2_CFLAGS "/arch:AVX2")
-        endif()
-    else()
-        check_c_compiler_flag(-mavx2 HAVE_M_AVX2_FLAG)
-        if (HAVE_M_AVX2_FLAG)
-            set(AVX2_CFLAGS "-mavx -mavx2")
-        endif()
+    if (HAVE_AVX2)
+        set (AVX2_CFLAGS "-mavx -mavx2")
+        set (HAVE_AVX2_INTRINSICS 1)
+        set (HAVE_MM256_EXTRACT_EPI64 1)
     endif()
-
-
-    cmake_push_check_state()
-    set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX2_CFLAGS}")
-
-    check_c_source_compiles("
-        #include <immintrin.h>
-        #include <emmintrin.h>
-        #include <string.h>
-
-        int main() {
-            __m256i vec;
-            memset(&vec, 0, sizeof(vec));
-
-            _mm256_shuffle_epi8(vec, vec);
-            _mm256_set_epi32(1,2,3,4,5,6,7,8);
-            _mm256_permutevar8x32_epi32(vec, vec);
-
-            return 0;
-        }"  HAVE_AVX2_INTRINSICS)
-
-    check_c_source_compiles("
-        #include <immintrin.h>
-        #include <string.h>
-
-        int main() {
-            __m256i vec;
-            memset(&vec, 0, sizeof(vec));
-            return (int)_mm256_extract_epi64(vec, 2);
-        }" HAVE_MM256_EXTRACT_EPI64)
-
-    cmake_pop_check_state()
-endif() # USE_CPU_EXTENSIONS
+endif()
 
 macro(simd_add_definition_if target definition)
     if(${definition})
diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt
index 733f99d07f5..0d872bae5d1 100644
--- a/contrib/libunwind-cmake/CMakeLists.txt
+++ b/contrib/libunwind-cmake/CMakeLists.txt
@@ -1,6 +1,3 @@
-include(CheckCCompilerFlag)
-include(CheckCXXCompilerFlag)
-
 set(LIBUNWIND_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libunwind")
 
 set(LIBUNWIND_CXX_SOURCES
@@ -48,27 +45,11 @@ target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIB
 # and disable sanitizers (otherwise infinite loop may happen)
 target_compile_options(unwind PRIVATE -O3 -fno-exceptions -funwind-tables -fno-sanitize=all $<$<COMPILE_LANGUAGE:CXX>:-nostdinc++ -fno-rtti>)
 
-check_c_compiler_flag(-Wunused-but-set-variable HAVE_WARNING_UNUSED_BUT_SET_VARIABLE)
-if (HAVE_WARNING_UNUSED_BUT_SET_VARIABLE)
-    target_compile_options(unwind PRIVATE -Wno-unused-but-set-variable)
-endif ()
-
-check_cxx_compiler_flag(-Wmissing-attributes HAVE_WARNING_MISSING_ATTRIBUTES)
-if (HAVE_WARNING_MISSING_ATTRIBUTES)
-    target_compile_options(unwind PRIVATE -Wno-missing-attributes)
-endif ()
-
-check_cxx_compiler_flag(-Wmaybe-uninitialized HAVE_WARNING_MAYBE_UNINITIALIZED)
-if (HAVE_WARNING_MAYBE_UNINITIALIZED)
-    target_compile_options(unwind PRIVATE -Wno-maybe-uninitialized)
-endif ()
+target_compile_options(unwind PRIVATE -Wno-unused-but-set-variable)
 
 # The library is using register variables that are bound to specific registers
 # Example: DwarfInstructions.hpp: register unsigned long long x16 __asm("x16") = cfa;
-check_cxx_compiler_flag(-Wregister HAVE_WARNING_REGISTER)
-if (HAVE_WARNING_REGISTER)
-    target_compile_options(unwind PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-Wno-register>")
-endif ()
+target_compile_options(unwind PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-Wno-register>")
 
 install(
     TARGETS unwind

From 4e3e8e32fde948d69ad78fb7f7cf9fe293f55b91 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 14 Nov 2023 07:24:41 +0100
Subject: [PATCH 0483/1097] Remove more garbage

---
 contrib/aws-cmake/AwsThreadAffinity.cmake | 45 +------------
 contrib/aws-cmake/AwsThreadName.cmake     | 52 +--------------
 contrib/libcxx-cmake/CMakeLists.txt       |  2 -
 contrib/rocksdb-cmake/CMakeLists.txt      | 80 ++---------------------
 contrib/xz-cmake/CMakeLists.txt           |  2 -
 utils/check-style/check-style             |  3 +
 6 files changed, 13 insertions(+), 171 deletions(-)

diff --git a/contrib/aws-cmake/AwsThreadAffinity.cmake b/contrib/aws-cmake/AwsThreadAffinity.cmake
index 9e53481272c..7f30fb71b43 100644
--- a/contrib/aws-cmake/AwsThreadAffinity.cmake
+++ b/contrib/aws-cmake/AwsThreadAffinity.cmake
@@ -1,50 +1,9 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0.
 
-include(CheckSymbolExists)
-
 # Check if the platform supports setting thread affinity
 # (important for hitting full NIC entitlement on NUMA architectures)
 function(aws_set_thread_affinity_method target)
-
-    # Non-POSIX, Android, and Apple platforms do not support thread affinity.
-    if (NOT UNIX OR ANDROID OR APPLE)
-        target_compile_definitions(${target} PRIVATE
-            -DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_NONE)
-        return()
-    endif()
-
-    cmake_push_check_state()
-    list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
-    list(APPEND CMAKE_REQUIRED_LIBRARIES pthread)
-
-    set(headers "pthread.h")
-    # BSDs put nonportable pthread declarations in a separate header.
-    if(CMAKE_SYSTEM_NAME MATCHES BSD)
-        set(headers "${headers};pthread_np.h")
-    endif()
-
-    # Using pthread attrs is the preferred method, but is glibc-specific.
-    check_symbol_exists(pthread_attr_setaffinity_np "${headers}" USE_PTHREAD_ATTR_SETAFFINITY)
-    if (USE_PTHREAD_ATTR_SETAFFINITY)
-        target_compile_definitions(${target} PRIVATE
-            -DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_PTHREAD_ATTR)
-        return()
-    endif()
-
-    # This method is still nonportable, but is supported by musl and BSDs.
-    check_symbol_exists(pthread_setaffinity_np "${headers}" USE_PTHREAD_SETAFFINITY)
-    if (USE_PTHREAD_SETAFFINITY)
-        target_compile_definitions(${target} PRIVATE
-            -DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_PTHREAD)
-        return()
-    endif()
-
-    # If we got here, we expected thread affinity support but didn't find it.
-    # We still build with degraded NUMA performance, but show a warning.
-    message(WARNING "No supported method for setting thread affinity")
-    target_compile_definitions(${target} PRIVATE
-        -DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_NONE)
-
-    cmake_pop_check_state()
+    # This code has been cut, because I don't care about it.
+    target_compile_definitions(${target} PRIVATE -DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_NONE)
 endfunction()
diff --git a/contrib/aws-cmake/AwsThreadName.cmake b/contrib/aws-cmake/AwsThreadName.cmake
index a67416b4f83..e17759435ed 100644
--- a/contrib/aws-cmake/AwsThreadName.cmake
+++ b/contrib/aws-cmake/AwsThreadName.cmake
@@ -1,61 +1,13 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0.
 
-include(CheckSymbolExists)
-
 # Check how the platform supports setting thread name
 function(aws_set_thread_name_method target)
-
-    if (WINDOWS)
-        # On Windows we do a runtime check, instead of compile-time check
-        return()
-    elseif (APPLE)
+    if (APPLE)
         # All Apple platforms we support have the same function, so no need for compile-time check.
         return()
     endif()
 
-    cmake_push_check_state()
-    list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
-    list(APPEND CMAKE_REQUIRED_LIBRARIES pthread)
-
-    # The start of the test program
-    set(c_source_start "
-        #define _GNU_SOURCE
-        #include <pthread.h>
-
-        #if defined(__FreeBSD__) || defined(__NETBSD__)
-        #include <pthread_np.h>
-        #endif
-
-        int main() {
-            pthread_t thread_id;
-        ")
-
-    # The end of the test program
-    set(c_source_end "}")
-
     # pthread_setname_np() usually takes 2 args
-    check_c_source_compiles("
-        ${c_source_start}
-        pthread_setname_np(thread_id, \"asdf\");
-        ${c_source_end}"
-        PTHREAD_SETNAME_TAKES_2ARGS)
-    if (PTHREAD_SETNAME_TAKES_2ARGS)
-        target_compile_definitions(${target} PRIVATE -DAWS_PTHREAD_SETNAME_TAKES_2ARGS)
-        return()
-    endif()
-
-    # But on NetBSD it takes 3!
-    check_c_source_compiles("
-        ${c_source_start}
-        pthread_setname_np(thread_id, \"asdf\", NULL);
-        ${c_source_end}
-        " PTHREAD_SETNAME_TAKES_3ARGS)
-    if (PTHREAD_SETNAME_TAKES_3ARGS)
-        target_compile_definitions(${target} PRIVATE -DAWS_PTHREAD_SETNAME_TAKES_3ARGS)
-        return()
-    endif()
-
-    # And on many older/weirder platforms it's just not supported
-    cmake_pop_check_state()
+    target_compile_definitions(${target} PRIVATE -DAWS_PTHREAD_SETNAME_TAKES_2ARGS)
 endfunction()
diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt
index b7e59e2c9a3..c77d5d8319e 100644
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@@ -1,5 +1,3 @@
-include(CheckCXXCompilerFlag)
-
 set(LIBCXX_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/libcxx")
 
 set(SRCS
diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt
index 466adf6aff0..2b6c48f0b38 100644
--- a/contrib/rocksdb-cmake/CMakeLists.txt
+++ b/contrib/rocksdb-cmake/CMakeLists.txt
@@ -76,7 +76,6 @@ else()
   endif()
 endif()
 
-include(CheckCCompilerFlag)
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
   if(POWER9)
     set(HAS_POWER9 1)
@@ -88,21 +87,12 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
 endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
 
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
-  CHECK_C_COMPILER_FLAG("-march=armv8-a+crc+crypto" HAS_ARMV8_CRC)
-  if(HAS_ARMV8_CRC)
-    message(STATUS " HAS_ARMV8_CRC yes")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
-  endif(HAS_ARMV8_CRC)
+  set(HAS_ARMV8_CRC 1)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
 endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
 
 
-include(CheckCXXSourceCompiles)
-if(NOT MSVC)
-  set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul")
-endif()
-
-unset(CMAKE_REQUIRED_FLAGS)
 if(HAVE_SSE42)
   add_definitions(-DHAVE_SSE42)
   add_definitions(-DHAVE_PCLMUL)
@@ -121,75 +111,18 @@ elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
   add_definitions(-DOS_LINUX)
 elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS")
   add_definitions(-DOS_SOLARIS)
-elseif(CMAKE_SYSTEM_NAME MATCHES "kFreeBSD")
-  add_definitions(-DOS_GNU_KFREEBSD)
 elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
   add_definitions(-DOS_FREEBSD)
-elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD")
-  add_definitions(-DOS_NETBSD)
-elseif(CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
-  add_definitions(-DOS_OPENBSD)
-elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly")
-  add_definitions(-DOS_DRAGONFLYBSD)
 elseif(CMAKE_SYSTEM_NAME MATCHES "Android")
   add_definitions(-DOS_ANDROID)
-elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
-  add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DNOMINMAX)
-  if(MINGW)
-    add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_VISTA)
-  endif()
 endif()
 
-if(NOT WIN32)
-  add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX)
-endif()
+add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX)
 
-option(WITH_FALLOCATE "build with fallocate" ON)
-if(WITH_FALLOCATE)
-  CHECK_C_SOURCE_COMPILES("
-#include <fcntl.h>
-#include <linux/falloc.h>
-int main() {
- int fd = open(\"/dev/null\", 0);
- fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 1024);
-}
-" HAVE_FALLOCATE)
-  if(HAVE_FALLOCATE)
-    add_definitions(-DROCKSDB_FALLOCATE_PRESENT)
-  endif()
-endif()
-
-CHECK_C_SOURCE_COMPILES("
-#include <fcntl.h>
-int main() {
-  int fd = open(\"/dev/null\", 0);
-  sync_file_range(fd, 0, 1024, SYNC_FILE_RANGE_WRITE);
-}
-" HAVE_SYNC_FILE_RANGE_WRITE)
-if(HAVE_SYNC_FILE_RANGE_WRITE)
-  add_definitions(-DROCKSDB_RANGESYNC_PRESENT)
-endif()
-
-CHECK_C_SOURCE_COMPILES("
-#include <pthread.h>
-int main() {
-  (void) PTHREAD_MUTEX_ADAPTIVE_NP;
-}
-" HAVE_PTHREAD_MUTEX_ADAPTIVE_NP)
-if(HAVE_PTHREAD_MUTEX_ADAPTIVE_NP)
+if (OS_LINUX OR OS_FREEBSD)
   add_definitions(-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX)
 endif()
 
-include(CheckCXXSymbolExists)
-if (OS_FREEBSD)
-  check_cxx_symbol_exists(malloc_usable_size "${ROCKSDB_SOURCE_DIR}/malloc_np.h" HAVE_MALLOC_USABLE_SIZE)
-else()
-  check_cxx_symbol_exists(malloc_usable_size "${ROCKSDB_SOURCE_DIR}/malloc.h" HAVE_MALLOC_USABLE_SIZE)
-endif()
-if(HAVE_MALLOC_USABLE_SIZE)
-  add_definitions(-DROCKSDB_MALLOC_USABLE_SIZE)
-endif()
-
 if (OS_LINUX)
   add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT)
   add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT)
@@ -204,7 +137,6 @@ include_directories("${ROCKSDB_SOURCE_DIR}/include")
 if(WITH_FOLLY_DISTRIBUTED_MUTEX)
   include_directories("${ROCKSDB_SOURCE_DIR}/third-party/folly")
 endif()
-find_package(Threads REQUIRED)
 
 # Main library source code
 
@@ -497,7 +429,7 @@ set(SOURCES
     ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc
     rocksdb_build_version.cc)
 
-if(HAVE_SSE42 AND NOT MSVC)
+if(HAVE_SSE42)
   set_source_files_properties(
     "${ROCKSDB_SOURCE_DIR}/util/crc32c.cc"
     PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
diff --git a/contrib/xz-cmake/CMakeLists.txt b/contrib/xz-cmake/CMakeLists.txt
index c3a8203c83e..c73433d9863 100644
--- a/contrib/xz-cmake/CMakeLists.txt
+++ b/contrib/xz-cmake/CMakeLists.txt
@@ -98,8 +98,6 @@ if (ARCH_S390X)
     add_compile_definitions(WORDS_BIGENDIAN)
 endif ()
 
-find_package(Threads REQUIRED)
-
 
 add_library(_liblzma
     ${SRC_DIR}/src/common/mythread.h
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index f87d2e292b5..bd6f111354e 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -422,3 +422,6 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep
 
 # Cyrillic characters hiding inside Latin.
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."
+
+# Don't allow dynamic compiler check with CMake, because we are using hermetic, reproducible, cross-compiled, static (TLDR, good) builds.
+ls -1d $ROOT_PATH/contrib/*-cmake | xargs -I@ find @ -name 'CMakeLists.txt' -or -name '*.cmake' | xargs grep --with-filename -i -P 'check_c_compiler_flag|check_cxx_compiler_flag|check_c_source_compiles|check_cxx_source_compiles|check_include_file|check_symbol_exists|cmake_push_check_state|cmake_pop_check_state|find_package|CMAKE_REQUIRED_FLAGS|CheckIncludeFile|CheckCCompilerFlag|CheckCXXCompilerFlag|CheckCSourceCompiles|CheckCXXSourceCompiles|CheckCSymbolExists|CheckCXXSymbolExists' | grep -v Rust && echo "^ It's not allowed to have dynamic compiler checks with CMake."

From 9bf62dd6220739e183208ee0152606bae7595efe Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 14 Nov 2023 07:43:11 +0100
Subject: [PATCH 0484/1097] Remove orphan header files

---
 src/Common/HashTable/FixedClearableHashMap.h  | 75 ----------------
 src/Common/SharedBlockRowRef.h                | 89 -------------------
 src/Common/SimpleActionBlocker.h              | 79 ----------------
 src/Common/SmallObjectPool.h                  | 52 -----------
 src/Databases/DictionaryAttachInfo.h          | 18 ----
 .../ExternalLoaderDatabaseConfigRepository.h  | 32 -------
 .../Executors/IReadProgressCallback.h         | 18 ----
 src/Processors/Executors/traverse.h           | 30 -------
 src/Processors/QueueBuffer.h                  | 41 ---------
 utils/check-style/check-style                 |  4 +
 10 files changed, 4 insertions(+), 434 deletions(-)
 delete mode 100644 src/Common/HashTable/FixedClearableHashMap.h
 delete mode 100644 src/Common/SharedBlockRowRef.h
 delete mode 100644 src/Common/SimpleActionBlocker.h
 delete mode 100644 src/Common/SmallObjectPool.h
 delete mode 100644 src/Databases/DictionaryAttachInfo.h
 delete mode 100644 src/Interpreters/ExternalLoaderDatabaseConfigRepository.h
 delete mode 100644 src/Processors/Executors/IReadProgressCallback.h
 delete mode 100644 src/Processors/Executors/traverse.h
 delete mode 100644 src/Processors/QueueBuffer.h

diff --git a/src/Common/HashTable/FixedClearableHashMap.h b/src/Common/HashTable/FixedClearableHashMap.h
deleted file mode 100644
index 6be7fde88b5..00000000000
--- a/src/Common/HashTable/FixedClearableHashMap.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/ClearableHashMap.h>
-#include <Common/HashTable/FixedHashMap.h>
-
-
-template <typename Key, typename TMapped>
-struct FixedClearableHashMapCell
-{
-    using Mapped = TMapped;
-    using State = ClearableHashSetState;
-
-    using value_type = PairNoInit<Key, Mapped>;
-    using mapped_type = Mapped;
-
-    UInt32 version;
-    Mapped mapped;
-
-    FixedClearableHashMapCell() {} /// NOLINT
-    FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {}
-    FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {}
-
-    const VoidKey getKey() const { return {}; } /// NOLINT
-    Mapped & getMapped() { return mapped; }
-    const Mapped & getMapped() const { return mapped; }
-
-    bool isZero(const State & state) const { return version != state.version; }
-    void setZero() { version = 0; }
-
-    struct CellExt
-    {
-        CellExt() {} /// NOLINT
-        CellExt(Key && key_, FixedClearableHashMapCell * ptr_) : key(key_), ptr(ptr_) {}
-        void update(Key && key_, FixedClearableHashMapCell * ptr_)
-        {
-            key = key_;
-            ptr = ptr_;
-        }
-        Key key;
-        FixedClearableHashMapCell * ptr;
-        const Key & getKey() const { return key; }
-        Mapped & getMapped() { return ptr->mapped; }
-        const Mapped & getMapped() const { return *ptr->mapped; }
-        const value_type getValue() const { return {key, *ptr->mapped}; } /// NOLINT
-    };
-};
-
-
-template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
-class FixedClearableHashMap : public FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>
-{
-public:
-    using Base = FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>;
-    using Self = FixedClearableHashMap;
-    using LookupResult = typename Base::LookupResult;
-
-    using Base::Base;
-
-    Mapped & operator[](const Key & x)
-    {
-        LookupResult it;
-        bool inserted;
-        this->emplace(x, it, inserted);
-        if (inserted)
-            new (&it->getMapped()) Mapped();
-
-        return it->getMapped();
-    }
-
-    void clear()
-    {
-        ++this->version;
-        this->m_size = 0;
-    }
-};
diff --git a/src/Common/SharedBlockRowRef.h b/src/Common/SharedBlockRowRef.h
deleted file mode 100644
index 77dd0f1cc13..00000000000
--- a/src/Common/SharedBlockRowRef.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <Core/Block.h>
-#include <Columns/IColumn.h>
-#include <boost/smart_ptr/intrusive_ptr.hpp>
-
-
-namespace DB
-{
-
-/// Allows you refer to the row in the block and hold the block ownership,
-///  and thus avoid creating a temporary row object.
-/// Do not use std::shared_ptr, since there is no need for a place for `weak_count` and `deleter`;
-///  does not use Poco::SharedPtr, since you need to allocate a block and `refcount` in one piece;
-///  does not use Poco::AutoPtr, since it does not have a `move` constructor and there are extra checks for nullptr;
-/// The reference counter is not atomic, since it is used from one thread.
-namespace detail
-{
-    struct SharedBlock : Block
-    {
-        int refcount = 0;
-
-        ColumnRawPtrs all_columns;
-        ColumnRawPtrs sort_columns;
-
-        explicit SharedBlock(Block && block) : Block(std::move(block)) {}
-    };
-}
-
-inline void intrusive_ptr_add_ref(detail::SharedBlock * ptr)
-{
-    ++ptr->refcount;
-}
-
-inline void intrusive_ptr_release(detail::SharedBlock * ptr)
-{
-    if (0 == --ptr->refcount)
-        delete ptr;
-}
-
-using SharedBlockPtr = boost::intrusive_ptr<detail::SharedBlock>;
-
-struct SharedBlockRowRef
-{
-    ColumnRawPtrs * columns = nullptr;
-    size_t row_num = 0;
-    SharedBlockPtr shared_block;
-
-    void swap(SharedBlockRowRef & other)
-    {
-        std::swap(columns, other.columns);
-        std::swap(row_num, other.row_num);
-        std::swap(shared_block, other.shared_block);
-    }
-
-    /// The number and types of columns must match.
-    bool operator==(const SharedBlockRowRef & other) const
-    {
-        size_t size = columns->size();
-        for (size_t i = 0; i < size; ++i)
-            if (0 != (*columns)[i]->compareAt(row_num, other.row_num, *(*other.columns)[i], 1))
-                return false;
-        return true;
-    }
-
-    bool operator!=(const SharedBlockRowRef & other) const
-    {
-        return !(*this == other);
-    }
-
-    void reset()
-    {
-        SharedBlockRowRef empty;
-        swap(empty);
-    }
-
-    bool empty() const { return columns == nullptr; }
-    size_t size() const { return empty() ? 0 : columns->size(); }
-
-    void set(SharedBlockPtr & shared_block_, ColumnRawPtrs * columns_, size_t row_num_)
-    {
-        shared_block = shared_block_;
-        columns = columns_;
-        row_num = row_num_;
-    }
-};
-
-}
diff --git a/src/Common/SimpleActionBlocker.h b/src/Common/SimpleActionBlocker.h
deleted file mode 100644
index 4a96db0e09d..00000000000
--- a/src/Common/SimpleActionBlocker.h
+++ /dev/null
@@ -1,79 +0,0 @@
-#pragma once
-#include <atomic>
-
-
-namespace DB
-{
-
-class SimpleActionLock;
-
-
-/// Similar to ActionBlocker, but without weak_ptr magic
-class SimpleActionBlocker
-{
-    using Counter = std::atomic<int>;
-    Counter counter = 0;
-
-public:
-
-    SimpleActionBlocker() = default;
-
-    bool isCancelled() const { return counter > 0; }
-
-    /// Temporarily blocks corresponding actions (while the returned object is alive)
-    friend class SimpleActionLock;
-    inline SimpleActionLock cancel();
-
-    /// Cancel the actions forever.
-    void cancelForever() { ++counter; }
-};
-
-
-/// Blocks related action while a SimpleActionLock instance exists
-class SimpleActionLock
-{
-    SimpleActionBlocker * block = nullptr;
-
-public:
-
-    SimpleActionLock() = default;
-
-    explicit SimpleActionLock(SimpleActionBlocker & block_) : block(&block_)
-    {
-        ++block->counter;
-    }
-
-    SimpleActionLock(const SimpleActionLock &) = delete;
-
-    SimpleActionLock(SimpleActionLock && rhs) noexcept
-    {
-        *this = std::move(rhs);
-    }
-
-    SimpleActionLock & operator=(const SimpleActionLock &) = delete;
-
-    SimpleActionLock & operator=(SimpleActionLock && rhs) noexcept
-    {
-        if (block)
-            --block->counter;
-
-        block = rhs.block;
-        rhs.block = nullptr;
-
-        return *this;
-    }
-
-    ~SimpleActionLock()
-    {
-        if (block)
-            --block->counter;
-    }
-};
-
-
-SimpleActionLock SimpleActionBlocker::cancel()
-{
-    return SimpleActionLock(*this);
-}
-
-}
diff --git a/src/Common/SmallObjectPool.h b/src/Common/SmallObjectPool.h
deleted file mode 100644
index 4d56a92a419..00000000000
--- a/src/Common/SmallObjectPool.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#pragma once
-
-#include <Common/Arena.h>
-#include <base/unaligned.h>
-
-
-namespace DB
-{
-
-/** Can allocate memory objects of fixed size with deletion support.
-  * For small `object_size`s allocated no less than pointer size.
-  */
-class SmallObjectPool
-{
-private:
-    const size_t object_size;
-    Arena pool;
-    char * free_list = nullptr;
-
-public:
-    explicit SmallObjectPool(size_t object_size_)
-        : object_size{std::max(object_size_, sizeof(char *))}
-    {
-    }
-
-    char * alloc()
-    {
-        if (free_list)
-        {
-            char * res = free_list;
-            free_list = unalignedLoad<char *>(free_list);
-            return res;
-        }
-
-        return pool.alloc(object_size);
-    }
-
-    void free(char * ptr)
-    {
-        unalignedStore<char *>(ptr, free_list);
-        free_list = ptr;
-    }
-
-    /// The size of the allocated pool in bytes
-    size_t size() const
-    {
-        return pool.size();
-    }
-
-};
-
-}
diff --git a/src/Databases/DictionaryAttachInfo.h b/src/Databases/DictionaryAttachInfo.h
deleted file mode 100644
index b2214d26f3c..00000000000
--- a/src/Databases/DictionaryAttachInfo.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <Parsers/IAST_fwd.h>
-#include <Poco/AutoPtr.h>
-#include <Poco/Util/AbstractConfiguration.h>
-
-
-namespace DB
-{
-
-struct DictionaryAttachInfo
-{
-    ASTPtr create_query;
-    Poco::AutoPtr<Poco::Util::AbstractConfiguration> config;
-    time_t modification_time;
-};
-
-}
diff --git a/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h b/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h
deleted file mode 100644
index b8dd6e278ad..00000000000
--- a/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-
-#include <Interpreters/IExternalLoaderConfigRepository.h>
-#include <Databases/IDatabase.h>
-
-
-namespace DB
-{
-
-/// Repository from database, which stores dictionary definitions on disk.
-/// Tracks update time and existence of .sql files through IDatabase.
-class ExternalLoaderDatabaseConfigRepository : public IExternalLoaderConfigRepository, WithContext
-{
-public:
-    ExternalLoaderDatabaseConfigRepository(IDatabase & database_, ContextPtr global_context_);
-
-    std::string getName() const override { return database_name; }
-
-    std::set<std::string> getAllLoadablesDefinitionNames() override;
-
-    bool exists(const std::string & loadable_definition_name) override;
-
-    Poco::Timestamp getUpdateTime(const std::string & loadable_definition_name) override;
-
-    LoadablesConfigurationPtr load(const std::string & loadable_definition_name) override;
-
-private:
-    const String database_name;
-    IDatabase & database;
-};
-
-}
diff --git a/src/Processors/Executors/IReadProgressCallback.h b/src/Processors/Executors/IReadProgressCallback.h
deleted file mode 100644
index 75a75eeb61d..00000000000
--- a/src/Processors/Executors/IReadProgressCallback.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-#include <memory>
-
-namespace DB
-{
-
-/// An interface for read progress callback.
-class IReadProgressCallback
-{
-public:
-    virtual ~IReadProgressCallback() = default;
-    virtual bool onProgress(uint64_t read_rows, uint64_t read_bytes) = 0;
-};
-
-using ReadProgressCallbackPtr = std::unique_ptr<IReadProgressCallback>;
-
-
-}
diff --git a/src/Processors/Executors/traverse.h b/src/Processors/Executors/traverse.h
deleted file mode 100644
index 2fd89adcb43..00000000000
--- a/src/Processors/Executors/traverse.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#pragma once
-
-#include <Processors/IProcessor.h>
-
-
-namespace DB
-{
-
-/// Look for first Ready or Async processor by depth-first search in needed input ports and full output ports.
-/// NOTE: Pipeline must not have cycles.
-//template <typename Visit>
-//void traverse(IProcessor & processor, Visit && visit)
-//{
-//    IProcessor::Status status = visit(processor);
-//
-//    if (status == IProcessor::Status::Ready || status == IProcessor::Status::Async)
-//        return;
-//
-//    if (status == IProcessor::Status::NeedData)
-//        for (auto & input : processor.getInputs())
-//            if (input.isNeeded() && !input.hasData())
-//                traverse(input.getOutputPort().getProcessor(), std::forward<Visit>(visit));
-//
-//    if (status == IProcessor::Status::PortFull)
-//        for (auto & output : processor.getOutputs())
-//            if (output.hasData())
-//                traverse(output.getInputPort().getProcessor(), std::forward<Visit>(visit));
-//}
-
-}
diff --git a/src/Processors/QueueBuffer.h b/src/Processors/QueueBuffer.h
deleted file mode 100644
index 0736d6fbf43..00000000000
--- a/src/Processors/QueueBuffer.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#pragma once
-
-#include <queue>
-#include <Processors/IAccumulatingTransform.h>
-
-
-namespace DB
-{
-
-/** Reads all data into queue.
-  * After all data has been read - output it in the same order.
-  */
-class QueueBuffer final : public IAccumulatingTransform
-{
-private:
-    std::queue<Chunk> chunks;
-public:
-    String getName() const override { return "QueueBuffer"; }
-
-    explicit QueueBuffer(Block header)
-        : IAccumulatingTransform(header, header)
-    {
-    }
-
-    void consume(Chunk block) override
-    {
-        chunks.push(std::move(block));
-    }
-
-    Chunk generate() override
-    {
-        if (chunks.empty())
-            return {};
-
-        auto res = std::move(chunks.front());
-        chunks.pop();
-        return res;
-    }
-};
-
-}
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index f87d2e292b5..5076c737cd9 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -422,3 +422,7 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep
 
 # Cyrillic characters hiding inside Latin.
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."
+
+# Orphaned header files.
+join -v1 <(find $ROOT_PATH/{src,programs,utils} -name '*.h' -printf '%f\n' | sort | uniq) <(find $ROOT_PATH/{src,programs,utils} -name '*.cpp' -or -name '*.c' -or -name '*.h' -or -name '*.S' | xargs grep --no-filename -o -P '[\w-]+\.h' | sort | uniq) |
+    grep . && echo '^ Found orphan header files.'

From 57b160b6b9c7cdcf07efd70f947748b6a813f2f7 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Tue, 14 Nov 2023 07:12:12 +0000
Subject: [PATCH 0485/1097] compare type name in column description

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Storages/ColumnsDescription.cpp             |  1 +
 ...cated_with_simple_aggregate_column.reference |  0
 ..._replicated_with_simple_aggregate_column.sql | 17 +++++++++++++++++
 3 files changed, 18 insertions(+)
 create mode 100644 tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.reference
 create mode 100644 tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.sql

diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index dafd341a9f8..961cb8b4704 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -59,6 +59,7 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
 
     return name == other.name
         && type->equals(*other.type)
+        && type->getName() == other.type->getName()
         && default_desc == other.default_desc
         && comment == other.comment
         && ast_to_str(codec) == ast_to_str(other.codec)
diff --git a/tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.reference b/tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.sql b/tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.sql
new file mode 100644
index 00000000000..90da4725ace
--- /dev/null
+++ b/tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.sql
@@ -0,0 +1,17 @@
+CREATE TABLE t_r1
+(
+    `id` UInt64,
+    `val` SimpleAggregateFunction(max, Nullable(String))
+)
+ENGINE = ReplicatedAggregatingMergeTree('/tables/{database}/t', 'r1')
+ORDER BY id
+SETTINGS index_granularity = 8192;
+
+CREATE TABLE t_r2
+(
+    `id` UInt64,
+    `val` SimpleAggregateFunction(anyLast, Nullable(String))
+)
+ENGINE = ReplicatedAggregatingMergeTree('/tables/{database}/t', 'r2')
+ORDER BY id
+SETTINGS index_granularity = 8192; -- { serverError INCOMPATIBLE_COLUMNS }
\ No newline at end of file

From 3152cd25dc5ad7286648b0e44e483596a03fb961 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Tue, 14 Nov 2023 07:41:45 +0000
Subject: [PATCH 0486/1097] only check for exact column definitions in
 replicated merge tree

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Storages/ColumnsDescription.cpp         | 1 -
 src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 961cb8b4704..dafd341a9f8 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -59,7 +59,6 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
 
     return name == other.name
         && type->equals(*other.type)
-        && type->getName() == other.type->getName()
         && default_desc == other.default_desc
         && comment == other.comment
         && ast_to_str(codec) == ast_to_str(other.codec)
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 03ebe38e6cf..837c0c6bbbf 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1227,8 +1227,12 @@ bool StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr
     Coordination::Stat columns_stat;
     auto columns_from_zk = ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_prefix) / "columns", &columns_stat));
 
-    const ColumnsDescription & old_columns = metadata_snapshot->getColumns();
-    if (columns_from_zk == old_columns)
+    const auto & old_columns = metadata_snapshot->getColumns();
+
+    /// Replicated tables on different replicas must have exactly same column definitions
+    /// We cannot just compare column descriptions here because data types like SimpleAggregateFunction
+    /// may have different aggregate function in 1st argument but still compatible if 2nd argument is same.
+    if (columns_from_zk.toString() == old_columns.toString())
         return true;
 
     if (!strict_check && metadata_stat.version != 0)

From 5d81080e3108510ba0cd20883bc83fa9ed2ca7e5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 14 Nov 2023 09:01:50 +0100
Subject: [PATCH 0487/1097] Enable profiling for linker

---
 CMakeLists.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e548c5a6d0..060f3407bb2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -273,6 +273,11 @@ option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
 if (ENABLE_BUILD_PROFILING)
      if (COMPILER_CLANG)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace")
+
+        if (LINKER_NAME MATCHES "lld")
+            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--time-trace")
+            set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--time-trace")
+        endif ()
      else ()
         message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang")
      endif ()

From 5849e06595d96372d0e74c469550332b836c49ba Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 14 Nov 2023 09:07:17 +0000
Subject: [PATCH 0488/1097] Fix

---
 src/Coordination/KeeperServer.cpp       | 2 +-
 src/Coordination/KeeperStateMachine.cpp | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index f229c4499c7..56c873bbbb9 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -616,6 +616,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
     {
         const auto preprocess_logs = [&]
         {
+            keeper_context->local_logs_preprocessed = true;
             auto log_store = state_manager->load_log_store();
             if (last_log_idx_on_disk > 0 && last_log_idx_on_disk > state_machine->last_commit_index())
             {
@@ -641,7 +642,6 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
             {
                 LOG_INFO(log, "All local log entries preprocessed");
             }
-            keeper_context->local_logs_preprocessed = true;
         };
 
         switch (type)
diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp
index cc0a07d2339..dcdd724f2bd 100644
--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@@ -511,6 +511,10 @@ void KeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptr<nuraf
 
 void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data)
 {
+    /// Don't rollback anything until the first commit because nothing was preprocessed
+    if (!keeper_context->local_logs_preprocessed)
+        return;
+
     auto request_for_session = parseRequest(data, true);
     // If we received a log from an older node, use the log_idx as the zxid
     // log_idx will always be larger or equal to the zxid so we can safely do this

From 3f5ccabba60de0181319822292664452393a059c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 14 Nov 2023 08:32:37 +0000
Subject: [PATCH 0489/1097] Don't append different log types in same file

---
 src/Coordination/Changelog.cpp | 12 ++++++++++--
 src/Coordination/Changelog.h   |  1 +
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 3c2004a1b75..11f73da63d9 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -14,6 +14,7 @@
 #include <Common/SipHash.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/logger_useful.h>
+#include "IO/CompressionMethod.h"
 #include <libnuraft/log_val_type.hxx>
 
 
@@ -476,6 +477,9 @@ struct ChangelogReadResult
 
     /// last offset we were able to read from log
     off_t last_position;
+
+    /// Whether the changelog file was written using compression
+    bool compressed_log;
     bool error;
 };
 
@@ -484,7 +488,7 @@ class ChangelogReader
 public:
     explicit ChangelogReader(DiskPtr disk_, const std::string & filepath_) : disk(disk_), filepath(filepath_)
     {
-        auto compression_method = chooseCompressionMethod(filepath, "");
+        compression_method = chooseCompressionMethod(filepath, "");
         auto read_buffer_from_file = disk->readFile(filepath);
         read_buf = wrapReadBufferWithCompressionMethod(std::move(read_buffer_from_file), compression_method);
     }
@@ -493,6 +497,7 @@ public:
     ChangelogReadResult readChangelog(IndexToLogEntry & logs, uint64_t start_log_index, Poco::Logger * log)
     {
         ChangelogReadResult result{};
+        result.compressed_log = compression_method != CompressionMethod::None;
         try
         {
             while (!read_buf->eof())
@@ -583,6 +588,7 @@ public:
 private:
     DiskPtr disk;
     std::string filepath;
+    CompressionMethod compression_method;
     std::unique_ptr<ReadBuffer> read_buf;
 };
 
@@ -590,6 +596,7 @@ Changelog::Changelog(
     Poco::Logger * log_, LogFileSettings log_file_settings, FlushSettings flush_settings_, KeeperContextPtr keeper_context_)
     : changelogs_detached_dir("detached")
     , rotate_interval(log_file_settings.rotate_interval)
+    , compress_logs(log_file_settings.compress_logs)
     , log(log_)
     , write_operations(std::numeric_limits<size_t>::max())
     , append_completion_queue(std::numeric_limits<size_t>::max())
@@ -830,7 +837,8 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
             existing_changelogs.erase(last_log_read_result->log_start_index);
             std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first >= last_log_read_result->log_start_index; });
         }
-        else
+        /// don't mix compressed and uncompressed writes
+        else if (compress_logs == last_log_read_result->compressed_log)
         {
             initWriter(description);
         }
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index dddcb9aa218..68d8c810823 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -182,6 +182,7 @@ private:
 
     const String changelogs_detached_dir;
     const uint64_t rotate_interval;
+    const bool compress_logs;
     Poco::Logger * log;
 
     std::mutex writer_mutex;

From 1ab7e5693de914f6147e701788d023315c2d6de8 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 14 Nov 2023 09:43:09 +0000
Subject: [PATCH 0490/1097] Add unit test

---
 src/Coordination/CoordinationSettings.h       |  2 +-
 src/Coordination/tests/gtest_coordination.cpp | 96 +++++++++++++++++++
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
index 3cbfa3e449d..a58f2b04797 100644
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@@ -45,7 +45,7 @@ struct Settings;
     M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
     M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
     M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
-    M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
+    M(Bool, compress_logs, false, "Write compressed coordination logs in ZSTD format", 0) \
     M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
     M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
     M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index 418b5225fa4..3658df12f69 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -1102,6 +1102,102 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
     EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
 }
 
+TEST_F(CoordinationTest, ChangelogTestMixedLogTypes)
+{
+    ChangelogDirTest test("./logs");
+    setLogDirectory("./logs");
+
+    std::vector<std::string> changelog_files;
+
+    const auto verify_changelog_files = [&]
+    {
+        for (const auto & log_file : changelog_files)
+            EXPECT_TRUE(fs::exists(log_file)) << "File " << log_file << " not found";
+    };
+
+    size_t last_term = 0;
+    size_t log_size = 0;
+
+    const auto append_log = [&](auto & changelog, const std::string & data, uint64_t term)
+    {
+        last_term = term;
+        ++log_size;
+        auto entry = getLogEntry(data, last_term);
+        changelog.append(entry);
+    };
+
+    const auto verify_log_content = [&](const auto & changelog)
+    {
+        EXPECT_EQ(changelog.size(), log_size);
+        EXPECT_EQ(changelog.last_entry()->get_term(), last_term);
+    };
+
+    {
+        SCOPED_TRACE("Initial uncompressed log");
+        DB::KeeperLogStore changelog(
+            DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20},
+            DB::FlushSettings(),
+            keeper_context);
+        changelog.init(1, 0);
+
+        for (size_t i = 0; i < 35; ++i)
+            append_log(changelog, std::to_string(i) + "_hello_world", (i+ 44) * 10);
+
+        changelog.end_of_append_batch(0, 0);
+
+        waitDurableLogs(changelog);
+        changelog_files.push_back("./logs/changelog_1_20.bin");
+        changelog_files.push_back("./logs/changelog_21_40.bin");
+        verify_changelog_files();
+
+        verify_log_content(changelog);
+    }
+
+    {
+        SCOPED_TRACE("Compressed log");
+        DB::KeeperLogStore changelog_compressed(
+            DB::LogFileSettings{.force_sync = true, .compress_logs = true, .rotate_interval = 20},
+            DB::FlushSettings(),
+            keeper_context);
+        changelog_compressed.init(1, 0);
+
+        verify_changelog_files();
+        verify_log_content(changelog_compressed);
+
+        append_log(changelog_compressed, "hello_world", 7777);
+        changelog_compressed.end_of_append_batch(0, 0);
+
+        waitDurableLogs(changelog_compressed);
+
+        verify_log_content(changelog_compressed);
+
+        changelog_files.push_back("./logs/changelog_36_55.bin.zstd");
+        verify_changelog_files();
+    }
+
+    {
+        SCOPED_TRACE("Final uncompressed log");
+        DB::KeeperLogStore changelog(
+            DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20},
+            DB::FlushSettings(),
+            keeper_context);
+        changelog.init(1, 0);
+
+        verify_changelog_files();
+        verify_log_content(changelog);
+
+        append_log(changelog, "hello_world", 7778);
+        changelog.end_of_append_batch(0, 0);
+
+        waitDurableLogs(changelog);
+
+        verify_log_content(changelog);
+
+        changelog_files.push_back("./logs/changelog_37_56.bin");
+        verify_changelog_files();
+    }
+}
+
 TEST_P(CoordinationTest, ChangelogTestLostFiles)
 {
     auto params = GetParam();

From 0ae79f6d183279ad3aa2a7b795fe77db74344cef Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Mon, 13 Nov 2023 21:49:36 +0100
Subject: [PATCH 0491/1097] Fix crash

---
 src/Compression/CompressionCodecGCD.cpp       | 31 +++++++++++++++++--
 .../02896_gcd-codec-crash.reference           |  0
 .../0_stateless/02896_gcd-codec-crash.sql     |  7 +++++
 .../1_stateful/00178_gcd_codec.reference      |  0
 tests/queries/1_stateful/00178_gcd_codec.sql  | 13 ++++++++
 5 files changed, 48 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02896_gcd-codec-crash.reference
 create mode 100644 tests/queries/0_stateless/02896_gcd-codec-crash.sql
 create mode 100644 tests/queries/1_stateful/00178_gcd_codec.reference
 create mode 100644 tests/queries/1_stateful/00178_gcd_codec.sql

diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp
index d51e75067a1..cc3079f647f 100644
--- a/src/Compression/CompressionCodecGCD.cpp
+++ b/src/Compression/CompressionCodecGCD.cpp
@@ -1,10 +1,11 @@
 #include <Compression/ICompressionCodec.h>
+#include <DataTypes/IDataType.h>
 #include <Compression/CompressionInfo.h>
 #include <Compression/CompressionFactory.h>
+#include <Common/Exception.h>
+#include <base/arithmeticOverflow.h>
 #include <base/unaligned.h>
 #include <Parsers/IAST.h>
-#include "Common/Exception.h"
-#include "DataTypes/IDataType.h"
 
 #include <boost/integer/common_factor.hpp>
 #include <libdivide-config.h>
@@ -92,6 +93,16 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
     unalignedStore<T>(dest, gcd_divider);
     dest += sizeof(T);
 
+    /// There are two cases:
+    /// 1) GCD is 0. It may happen if there are some zeros in the source data.
+    /// We cannot apply this codec anymore. So let's copy the source data to the destination.
+    /// 2) GCD is 1. It means that we can do nothing (except copying source data) and the result will be the same.
+    if unlikely(gcd_divider == 0)
+    {
+        memcpy(dest, source, source_size);
+        return;
+    }
+
     if constexpr (sizeof(T) <= 8)
     {
         /// libdivide supports only UInt32 and UInt64.
@@ -132,10 +143,24 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
     const T gcd_multiplier = unalignedLoad<T>(source);
     source += sizeof(T);
 
+    /// Again two cases:
+    /// 1) GCD is 0. This is "special" flag which signals what remaining data
+    /// left unchanged due to unapplicability of the codec.
+    /// 2) GCD is 1. Even if we proceed futher with the loop the resulting data will likely be the same.
+    if unlikely(gcd_multiplier == 0 || gcd_multiplier == 1)
+    {
+        /// Subtraction is safe, because we checked that source_size >= sizeof(T)
+        if unlikely(source_size - sizeof(T) != output_size)
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress");
+
+        memcpy(dest, source, source_size);
+        return;
+    }
+
     while (source < source_end)
     {
         if (dest + sizeof(T) > dest_end) [[unlikely]]
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress");
         unalignedStore<T>(dest, unalignedLoad<T>(source) * gcd_multiplier);
 
         source += sizeof(T);
diff --git a/tests/queries/0_stateless/02896_gcd-codec-crash.reference b/tests/queries/0_stateless/02896_gcd-codec-crash.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02896_gcd-codec-crash.sql b/tests/queries/0_stateless/02896_gcd-codec-crash.sql
new file mode 100644
index 00000000000..9af6906cb0f
--- /dev/null
+++ b/tests/queries/0_stateless/02896_gcd-codec-crash.sql
@@ -0,0 +1,7 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (a Nullable(Int64) CODEC (GCD,LZ4)) ENGINE=MergeTree Order by ();
+INSERT INTO test SELECT 0 FROM numbers(1e2);
+SELECT * FROM test FORMAT Null;
+
+DROP TABLE IF EXISTS test;
\ No newline at end of file
diff --git a/tests/queries/1_stateful/00178_gcd_codec.reference b/tests/queries/1_stateful/00178_gcd_codec.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/1_stateful/00178_gcd_codec.sql b/tests/queries/1_stateful/00178_gcd_codec.sql
new file mode 100644
index 00000000000..443075b1b24
--- /dev/null
+++ b/tests/queries/1_stateful/00178_gcd_codec.sql
@@ -0,0 +1,13 @@
+DROP TABLE IF EXISTS hits_gcd;
+
+CREATE TABLE hits_gcd (`WatchID` UInt64 CODEC (GCD,LZ4), `JavaEnable` UInt8 CODEC (GCD,LZ4), `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32 CODEC (GCD,LZ4), `ClientIP` UInt32 CODEC (GCD,LZ4), `ClientIP6` FixedString(16), `RegionID` UInt32 CODEC (GCD,LZ4), `UserID` UInt64 CODEC (GCD,LZ4), `CounterClass` Int8, `OS` UInt8 CODEC (GCD,LZ4), `UserAgent` UInt8 CODEC (GCD,LZ4), `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8 CODEC (GCD,LZ4), `IsRobot` UInt8 CODEC (GCD,LZ4), `RefererCategories` Array(UInt16) CODEC (GCD,LZ4), `URLCategories` Array(UInt16) CODEC (GCD,LZ4), `URLRegions` Array(UInt32) CODEC (GCD,LZ4), `RefererRegions` Array(UInt32) CODEC (GCD,LZ4), `ResolutionWidth` UInt16 CODEC (GCD,LZ4), `ResolutionHeight` UInt16 CODEC (GCD,LZ4), `ResolutionDepth` UInt8 CODEC (GCD,LZ4), `FlashMajor` UInt8 CODEC (GCD,LZ4), `FlashMinor` UInt8 CODEC (GCD,LZ4), `FlashMinor2` String, `NetMajor` UInt8 CODEC (GCD,LZ4), `NetMinor` UInt8 CODEC (GCD,LZ4), `UserAgentMajor` UInt16 CODEC (GCD,LZ4), `UserAgentMinor` FixedString(2), `CookieEnable` UInt8 CODEC (GCD,LZ4), `JavascriptEnable` UInt8 CODEC (GCD,LZ4), `IsMobile` UInt8 CODEC (GCD,LZ4), `MobilePhone` UInt8 CODEC (GCD,LZ4), `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32 CODEC (GCD,LZ4), `TraficSourceID` Int8, `SearchEngineID` UInt16 CODEC (GCD,LZ4), `SearchPhrase` String, `AdvEngineID` UInt8 CODEC (GCD,LZ4), `IsArtifical` UInt8 CODEC (GCD,LZ4), `WindowClientWidth` UInt16 CODEC (GCD,LZ4), `WindowClientHeight` UInt16 CODEC (GCD,LZ4), `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8 CODEC (GCD,LZ4), `SilverlightVersion2` UInt8 CODEC (GCD,LZ4), `SilverlightVersion3` UInt32 CODEC (GCD,LZ4), `SilverlightVersion4` UInt16 CODEC (GCD,LZ4), `PageCharset` String, `CodeVersion` UInt32 CODEC (GCD,LZ4), `IsLink` UInt8 CODEC (GCD,LZ4), `IsDownload` UInt8 CODEC (GCD,LZ4), `IsNotBounce` UInt8 CODEC (GCD,LZ4), `FUniqID` UInt64 CODEC (GCD,LZ4), `HID` UInt32 CODEC (GCD,LZ4), `IsOldCounter` UInt8 CODEC (GCD,LZ4), `IsEvent` UInt8 CODEC (GCD,LZ4), `IsParameter` UInt8 CODEC (GCD,LZ4), `DontCountHits` UInt8 CODEC (GCD,LZ4), `WithHash` UInt8 CODEC (GCD,LZ4), `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8 CODEC (GCD,LZ4), `Sex` UInt8 CODEC (GCD,LZ4), `Income` UInt8 CODEC (GCD,LZ4), `Interests` UInt16 CODEC (GCD,LZ4), `Robotness` UInt8 CODEC (GCD,LZ4), `GeneralInterests` Array(UInt16) CODEC (GCD,LZ4), `RemoteIP` UInt32 CODEC (GCD,LZ4), `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16 CODEC (GCD,LZ4), `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8 CODEC (GCD,LZ4), `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16 CODEC (GCD,LZ4), `GoalsReached` Array(UInt32) CODEC (GCD,LZ4), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8 CODEC (GCD,LZ4), `RefererHash` UInt64 CODEC (GCD,LZ4), `URLHash` UInt64 CODEC (GCD,LZ4), `CLID` UInt32 CODEC (GCD,LZ4), `YCLID` UInt64 CODEC (GCD,LZ4), `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32 CODEC (GCD,LZ4), `RequestTry` UInt8)
+    ENGINE = MergeTree()
+        PARTITION BY toYYYYMM(EventDate)
+        ORDER BY (CounterID, EventDate, intHash32(UserID))
+        SAMPLE BY intHash32(UserID);
+
+
+INSERT INTO hits_gcd SELECT * FROM hits;
+SELECT * FROM hits_gcd FORMAT Null;
+
+DROP TABLE IF EXISTS hits_gcd;
\ No newline at end of file

From a7f8171d3ec581148f133e7b98d61785267f798b Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 13 Nov 2023 21:51:34 +0100
Subject: [PATCH 0492/1097] Update CompressionCodecGCD.cpp

---
 src/Compression/CompressionCodecGCD.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp
index cc3079f647f..b25f251a844 100644
--- a/src/Compression/CompressionCodecGCD.cpp
+++ b/src/Compression/CompressionCodecGCD.cpp
@@ -3,7 +3,6 @@
 #include <Compression/CompressionInfo.h>
 #include <Compression/CompressionFactory.h>
 #include <Common/Exception.h>
-#include <base/arithmeticOverflow.h>
 #include <base/unaligned.h>
 #include <Parsers/IAST.h>
 

From 6811d80338a5d411429dc76d88cf25bd5db84cda Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Mon, 13 Nov 2023 21:55:13 +0100
Subject: [PATCH 0493/1097] Better

---
 .../02896_gcd-codec-crash.reference           | 200 ++++++++++++++++++
 .../0_stateless/02896_gcd-codec-crash.sql     |  10 +-
 2 files changed, 208 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02896_gcd-codec-crash.reference b/tests/queries/0_stateless/02896_gcd-codec-crash.reference
index e69de29bb2d..56f0d46cddc 100644
--- a/tests/queries/0_stateless/02896_gcd-codec-crash.reference
+++ b/tests/queries/0_stateless/02896_gcd-codec-crash.reference
@@ -0,0 +1,200 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/02896_gcd-codec-crash.sql b/tests/queries/0_stateless/02896_gcd-codec-crash.sql
index 9af6906cb0f..764d7168b07 100644
--- a/tests/queries/0_stateless/02896_gcd-codec-crash.sql
+++ b/tests/queries/0_stateless/02896_gcd-codec-crash.sql
@@ -1,7 +1,13 @@
 DROP TABLE IF EXISTS test;
+DROP TABLE IF EXISTS test2;
 
 CREATE TABLE test (a Nullable(Int64) CODEC (GCD,LZ4)) ENGINE=MergeTree Order by ();
 INSERT INTO test SELECT 0 FROM numbers(1e2);
-SELECT * FROM test FORMAT Null;
+SELECT * FROM test;
 
-DROP TABLE IF EXISTS test;
\ No newline at end of file
+CREATE TABLE test2 (a Nullable(Int64) CODEC (GCD,LZ4)) ENGINE=MergeTree Order by ();
+INSERT INTO test2 SELECT 1 FROM numbers(1e2);
+SELECT * FROM test2;
+
+DROP TABLE IF EXISTS test;
+DROP TABLE IF EXISTS test2;

From 71f4f7519ba5f91925c2372d4e631dd6acc8625b Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 13 Nov 2023 21:56:18 +0100
Subject: [PATCH 0494/1097] Update CompressionCodecGCD.cpp

---
 src/Compression/CompressionCodecGCD.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp
index b25f251a844..6a3b889011a 100644
--- a/src/Compression/CompressionCodecGCD.cpp
+++ b/src/Compression/CompressionCodecGCD.cpp
@@ -143,7 +143,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
     source += sizeof(T);
 
     /// Again two cases:
-    /// 1) GCD is 0. This is "special" flag which signals what remaining data
+    /// 1) GCD is 0. This is "special" flag which signals that remaining data
     /// left unchanged due to unapplicability of the codec.
     /// 2) GCD is 1. Even if we proceed futher with the loop the resulting data will likely be the same.
     if unlikely(gcd_multiplier == 0 || gcd_multiplier == 1)

From a80f8082db60ea3026cff386cd72d5068adee50b Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 13 Nov 2023 21:58:43 +0100
Subject: [PATCH 0495/1097] Update CompressionCodecGCD.cpp

---
 src/Compression/CompressionCodecGCD.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp
index 6a3b889011a..cdda77a4150 100644
--- a/src/Compression/CompressionCodecGCD.cpp
+++ b/src/Compression/CompressionCodecGCD.cpp
@@ -96,7 +96,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
     /// 1) GCD is 0. It may happen if there are some zeros in the source data.
     /// We cannot apply this codec anymore. So let's copy the source data to the destination.
     /// 2) GCD is 1. It means that we can do nothing (except copying source data) and the result will be the same.
-    if unlikely(gcd_divider == 0)
+    if unlikely(gcd_divider == 0 || gcd_divider == 1)
     {
         memcpy(dest, source, source_size);
         return;

From 40df5b8a74f36f47eb904dfd7669b76cc47a97c6 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 13 Nov 2023 22:08:44 +0100
Subject: [PATCH 0496/1097] Update CompressionCodecGCD.cpp

---
 src/Compression/CompressionCodecGCD.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp
index cdda77a4150..a89603af4bc 100644
--- a/src/Compression/CompressionCodecGCD.cpp
+++ b/src/Compression/CompressionCodecGCD.cpp
@@ -145,7 +145,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
     /// Again two cases:
     /// 1) GCD is 0. This is "special" flag which signals that remaining data
     /// left unchanged due to unapplicability of the codec.
-    /// 2) GCD is 1. Even if we proceed futher with the loop the resulting data will likely be the same.
+    /// 2) GCD is 1. Even if we proceed further with the loop the resulting data will likely be the same.
     if unlikely(gcd_multiplier == 0 || gcd_multiplier == 1)
     {
         /// Subtraction is safe, because we checked that source_size >= sizeof(T)

From afb5c3e81047c6d00a14efc7858288e820b5ce50 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 13 Nov 2023 21:36:30 +0000
Subject: [PATCH 0497/1097] Some fixups (mostly for consistency)

- Made the exception texts consistent accross codecs
- Slightly less noisy comments in C++ code
- Remove unlikely() because it did not make a difference considering the
  total function costs
- Consolidated tests into existing test, delete leftover
  02869_gcd_codec_test_incorrect_type test (it was consolidated into
  02872_gcd_codec long time ago)
---
 src/Compression/CompressionCodecDelta.cpp     |  12 +-
 .../CompressionCodecDoubleDelta.cpp           |  14 +-
 src/Compression/CompressionCodecFPC.cpp       |   8 +-
 src/Compression/CompressionCodecGCD.cpp       |  55 +++--
 src/Compression/CompressionCodecGorilla.cpp   |  10 +-
 src/Compression/CompressionCodecLZ4.cpp       |   4 +-
 src/Compression/CompressionCodecT64.cpp       |  14 +-
 src/Compression/CompressionCodecZSTD.cpp      |   4 +-
 ...69_gcd_codec_test_incorrect_type.reference |   0
 .../02869_gcd_codec_test_incorrect_type.sql   |   2 -
 .../0_stateless/02872_gcd_codec.reference     | 200 ++++++++++++++++++
 tests/queries/0_stateless/02872_gcd_codec.sql |  24 ++-
 .../02896_gcd-codec-crash.reference           | 200 ------------------
 .../0_stateless/02896_gcd-codec-crash.sql     |  13 --
 14 files changed, 278 insertions(+), 282 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.reference
 delete mode 100644 tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.sql
 delete mode 100644 tests/queries/0_stateless/02896_gcd-codec-crash.reference
 delete mode 100644 tests/queries/0_stateless/02896_gcd-codec-crash.sql

diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp
index 113535489ac..924cdd9a60f 100644
--- a/src/Compression/CompressionCodecDelta.cpp
+++ b/src/Compression/CompressionCodecDelta.cpp
@@ -66,7 +66,7 @@ template <typename T>
 void compressDataForType(const char * source, UInt32 source_size, char * dest)
 {
     if (source_size % sizeof(T) != 0)
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot delta compress, data size {} is not aligned to {}", source_size, sizeof(T));
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with Delta codec, data size {} is not aligned to {}", source_size, sizeof(T));
 
     T prev_src = 0;
     const char * const source_end = source + source_size;
@@ -87,7 +87,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
     const char * const output_end = dest + output_size;
 
     if (source_size % sizeof(T) != 0)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot delta decompress, data size {} is not aligned to {}", source_size, sizeof(T));
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Delta-encoded data, data size {} is not aligned to {}", source_size, sizeof(T));
 
     T accumulator{};
     const char * const source_end = source + source_size;
@@ -95,7 +95,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
     {
         accumulator += unalignedLoadLittleEndian<T>(source);
         if (dest + sizeof(accumulator) > output_end) [[unlikely]]
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data");
         unalignedStoreLittleEndian<T>(dest, accumulator);
 
         source += sizeof(T);
@@ -133,7 +133,7 @@ UInt32 CompressionCodecDelta::doCompressData(const char * source, UInt32 source_
 void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
 {
     if (source_size < 2)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data. File has wrong header");
 
     if (uncompressed_size == 0)
         return;
@@ -141,13 +141,13 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
     UInt8 bytes_size = source[0];
 
     if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8))
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data. File has wrong header");
 
     UInt8 bytes_to_skip = uncompressed_size % bytes_size;
     UInt32 output_size = uncompressed_size - bytes_to_skip;
 
     if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data. File has wrong header");
 
     memcpy(dest, &source[2], bytes_to_skip);
     UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp
index 60e81bbb640..99089ed6770 100644
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -287,7 +287,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
     using SignedDeltaType = typename std::make_signed_t<UnsignedDeltaType>;
 
     if (source_size % sizeof(ValueType) != 0)
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress, data size {} is not aligned to {}",
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with DoubleDelta codec, data size {} is not aligned to {}",
                         source_size, sizeof(ValueType));
     const char * source_end = source + source_size;
     const char * dest_start = dest;
@@ -381,7 +381,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
 
     prev_value = unalignedLoadLittleEndian<ValueType>(source);
     if (dest + sizeof(prev_value) > output_end)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta-encoded data");
     unalignedStoreLittleEndian<ValueType>(dest, prev_value);
 
     source += sizeof(prev_value);
@@ -394,7 +394,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
     prev_delta = unalignedLoadLittleEndian<UnsignedDeltaType>(source);
     prev_value = prev_value + static_cast<ValueType>(prev_delta);
     if (dest + sizeof(prev_value) > output_end)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta-encoded data");
     unalignedStoreLittleEndian<ValueType>(dest, prev_value);
 
     source += sizeof(prev_delta);
@@ -427,7 +427,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
         const UnsignedDeltaType delta = double_delta + prev_delta;
         const ValueType curr_value = prev_value + delta;
         if (dest + sizeof(curr_value) > output_end)
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta-encoded data");
         unalignedStoreLittleEndian<ValueType>(dest, curr_value);
         dest += sizeof(curr_value);
 
@@ -511,18 +511,18 @@ UInt32 CompressionCodecDoubleDelta::doCompressData(const char * source, UInt32 s
 void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
 {
     if (source_size < 2)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta encoded data. File has wrong header");
 
     UInt8 bytes_size = source[0];
 
     if (bytes_size == 0)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta encoded data. File has wrong header");
 
     UInt8 bytes_to_skip = uncompressed_size % bytes_size;
     UInt32 output_size = uncompressed_size - bytes_to_skip;
 
     if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress double-delta encoded data. File has wrong header");
 
     memcpy(dest, &source[2], bytes_to_skip);
     UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index 506093bbe49..81a27e30cd8 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -475,19 +475,19 @@ UInt32 CompressionCodecFPC::doCompressData(const char * source, UInt32 source_si
         default:
             break;
     }
-    throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress. File has incorrect float width");
+    throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with FPC codec. File has incorrect float width");
 }
 
 void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
 {
     if (source_size < HEADER_SIZE)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress FPC-encoded data. File has wrong header");
 
     auto compressed_data = std::as_bytes(std::span(source, source_size));
     auto compressed_float_width = std::to_integer<UInt8>(compressed_data[0]);
     auto compressed_level = std::to_integer<UInt8>(compressed_data[1]);
     if (compressed_level == 0 || compressed_level > MAX_COMPRESSION_LEVEL)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has incorrect level");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress FPC-encoded data. File has incorrect level");
 
     auto destination = std::as_writable_bytes(std::span(dest, uncompressed_size));
     auto src = compressed_data.subspan(HEADER_SIZE);
@@ -500,7 +500,7 @@ void CompressionCodecFPC::doDecompressData(const char * source, UInt32 source_si
             FPCOperation<UInt32>(destination, compressed_level).decode(src, uncompressed_size);
             break;
         default:
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has incorrect float width");
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress FPC-encoded data. File has incorrect float width");
     }
 }
 
diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp
index a89603af4bc..70fff01ebf7 100644
--- a/src/Compression/CompressionCodecGCD.cpp
+++ b/src/Compression/CompressionCodecGCD.cpp
@@ -1,10 +1,10 @@
 #include <Compression/ICompressionCodec.h>
-#include <DataTypes/IDataType.h>
-#include <Compression/CompressionInfo.h>
-#include <Compression/CompressionFactory.h>
 #include <Common/Exception.h>
-#include <base/unaligned.h>
+#include <Compression/CompressionFactory.h>
+#include <Compression/CompressionInfo.h>
+#include <DataTypes/IDataType.h>
 #include <Parsers/IAST.h>
+#include <base/unaligned.h>
 
 #include <boost/integer/common_factor.hpp>
 #include <libdivide-config.h>
@@ -74,29 +74,27 @@ template <typename T>
 void compressDataForType(const char * source, UInt32 source_size, char * dest)
 {
     if (source_size % sizeof(T) != 0)
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot GCD compress, data size {} is not aligned to {}", source_size, sizeof(T));
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with GCD codec, data size {} is not aligned to {}", source_size, sizeof(T));
 
     const char * const source_end = source + source_size;
 
-    T gcd_divider = 0;
+    T gcd = 0;
     const auto * cur_source = source;
-    while (gcd_divider != T(1) && cur_source < source_end)
+    while (gcd != T(1) && cur_source < source_end)
     {
         if (cur_source == source)
-            gcd_divider = unalignedLoad<T>(cur_source);
+            gcd = unalignedLoad<T>(cur_source);
         else
-            gcd_divider = boost::integer::gcd(gcd_divider, unalignedLoad<T>(cur_source));
+            gcd = boost::integer::gcd(gcd, unalignedLoad<T>(cur_source));
         cur_source += sizeof(T);
     }
 
-    unalignedStore<T>(dest, gcd_divider);
+    unalignedStore<T>(dest, gcd);
     dest += sizeof(T);
 
-    /// There are two cases:
-    /// 1) GCD is 0. It may happen if there are some zeros in the source data.
-    /// We cannot apply this codec anymore. So let's copy the source data to the destination.
-    /// 2) GCD is 1. It means that we can do nothing (except copying source data) and the result will be the same.
-    if unlikely(gcd_divider == 0 || gcd_divider == 1)
+    /// GCD compression is pointless if GCD = 1 or GCD = 0 (happens with 0 values in data).
+    /// In these cases only copy the source to dest, i.e. don't compress.
+    if (gcd == 0 || gcd == 1)
     {
         memcpy(dest, source, source_size);
         return;
@@ -106,7 +104,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
     {
         /// libdivide supports only UInt32 and UInt64.
         using LibdivideT = std::conditional_t<sizeof(T) <= 4, UInt32, UInt64>;
-        libdivide::divider<LibdivideT> divider(static_cast<LibdivideT>(gcd_divider));
+        libdivide::divider<LibdivideT> divider(static_cast<LibdivideT>(gcd));
         cur_source = source;
         while (cur_source < source_end)
         {
@@ -120,7 +118,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
         cur_source = source;
         while (cur_source < source_end)
         {
-            unalignedStore<T>(dest, unalignedLoad<T>(cur_source) / gcd_divider);
+            unalignedStore<T>(dest, unalignedLoad<T>(cur_source) / gcd);
             cur_source += sizeof(T);
             dest += sizeof(T);
         }
@@ -131,10 +129,10 @@ template <typename T>
 void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
 {
     if (source_size % sizeof(T) != 0)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is not aligned to {}", source_size, sizeof(T));
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data, data size {} is not aligned to {}", source_size, sizeof(T));
 
     if (source_size < sizeof(T))
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is less than {}", source_size, sizeof(T));
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data, data size {} is less than {}", source_size, sizeof(T));
 
     const char * const source_end = source + source_size;
     const char * const dest_end = dest + output_size;
@@ -142,15 +140,12 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
     const T gcd_multiplier = unalignedLoad<T>(source);
     source += sizeof(T);
 
-    /// Again two cases:
-    /// 1) GCD is 0. This is "special" flag which signals that remaining data
-    /// left unchanged due to unapplicability of the codec.
-    /// 2) GCD is 1. Even if we proceed further with the loop the resulting data will likely be the same.
-    if unlikely(gcd_multiplier == 0 || gcd_multiplier == 1)
+    /// Handle special cases GCD = 1 and GCD = 0.
+    if (gcd_multiplier == 0 || gcd_multiplier == 1)
     {
         /// Subtraction is safe, because we checked that source_size >= sizeof(T)
-        if unlikely(source_size - sizeof(T) != output_size)
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress");
+        if (source_size - sizeof(T) != output_size)
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data");
 
         memcpy(dest, source, source_size);
         return;
@@ -159,7 +154,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
     while (source < source_end)
     {
         if (dest + sizeof(T) > dest_end) [[unlikely]]
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress");
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data");
         unalignedStore<T>(dest, unalignedLoad<T>(source) * gcd_multiplier);
 
         source += sizeof(T);
@@ -203,7 +198,7 @@ UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_si
 void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
 {
     if (source_size < 2)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data. File has wrong header");
 
     if (uncompressed_size == 0)
         return;
@@ -211,13 +206,13 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si
     UInt8 bytes_size = source[0];
 
     if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8 || bytes_size == 16 || bytes_size == 32))
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data. File has wrong header");
 
     UInt8 bytes_to_skip = uncompressed_size % bytes_size;
     UInt32 output_size = uncompressed_size - bytes_to_skip;
 
     if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data. File has wrong header");
 
     memcpy(dest, &source[2], bytes_to_skip);
     UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp
index 7e36a424f03..a41a3d1fe8e 100644
--- a/src/Compression/CompressionCodecGorilla.cpp
+++ b/src/Compression/CompressionCodecGorilla.cpp
@@ -197,7 +197,7 @@ template <typename T>
 UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     if (source_size % sizeof(T) != 0)
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress, data size {} is not aligned to {}", source_size, sizeof(T));
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with Gorilla codec, data size {} is not aligned to {}", source_size, sizeof(T));
 
     const char * const source_end = source + source_size;
     const char * const dest_start = dest;
@@ -317,7 +317,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
                 && curr_xored_info.data_bits == 0
                 && curr_xored_info.trailing_zero_bits == 0) [[unlikely]]
             {
-                throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress gorilla-encoded data: corrupted input data.");
+                throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data: corrupted input data.");
             }
 
             xored_data = static_cast<T>(reader.readBits(curr_xored_info.data_bits));
@@ -410,17 +410,17 @@ UInt32 CompressionCodecGorilla::doCompressData(const char * source, UInt32 sourc
 void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
 {
     if (source_size < 2)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header");
 
     UInt8 bytes_size = source[0];
 
     if (bytes_size == 0)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header");
 
     UInt8 bytes_to_skip = uncompressed_size % bytes_size;
 
     if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header");
 
     memcpy(dest, &source[2], bytes_to_skip);
     UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp
index ae739ab4e6b..597c1ba3acb 100644
--- a/src/Compression/CompressionCodecLZ4.cpp
+++ b/src/Compression/CompressionCodecLZ4.cpp
@@ -96,7 +96,7 @@ void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_si
     bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
 
     if (!success)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress LZ4-encoded data");
 }
 
 void registerCodecLZ4(CompressionCodecFactory & factory)
@@ -112,7 +112,7 @@ UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_
     auto success = LZ4_compress_HC(source, dest, source_size, LZ4_COMPRESSBOUND(source_size), level);
 
     if (!success)
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot LZ4_compress_HC");
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with LZ4 codec");
 
     return success;
 }
diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp
index ec7922a030d..bf9a9414bc1 100644
--- a/src/Compression/CompressionCodecT64.cpp
+++ b/src/Compression/CompressionCodecT64.cpp
@@ -483,7 +483,7 @@ UInt32 compressData(const char * src, UInt32 bytes_size, char * dst)
     static constexpr const UInt32 header_size = 2 * sizeof(UInt64);
 
     if (bytes_size % sizeof(T))
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress, data size {} is not multiplier of {}",
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with T64 codec, data size {} is not multiplier of {}",
                         bytes_size, sizeof(T));
 
     UInt32 src_size = bytes_size / sizeof(T);
@@ -538,11 +538,11 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco
     static constexpr const UInt32 header_size = 2 * sizeof(UInt64);
 
     if (bytes_size < header_size)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress, data size ({}) is less than the size of T64 header",
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data, data size ({}) is less than the size of T64 header",
                         bytes_size);
 
     if (uncompressed_size % sizeof(T))
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress, unexpected uncompressed size ({})"
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data, unexpected uncompressed size ({})"
                         " isn't a multiple of the data type size ({})",
                         uncompressed_size, sizeof(T));
 
@@ -571,7 +571,7 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco
     UInt32 dst_shift = sizeof(T) * matrix_size;
 
     if (!bytes_size || bytes_size % src_shift)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress, data size ({}) is not a multiplier of {}",
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data, data size ({}) is not a multiplier of {}",
                         bytes_size, src_shift);
 
     UInt32 num_full = bytes_size / src_shift;
@@ -666,13 +666,13 @@ UInt32 CompressionCodecT64::doCompressData(const char * src, UInt32 src_size, ch
             break;
     }
 
-    throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with T64");
+    throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with T64 codec");
 }
 
 void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const
 {
     if (!src_size)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress with T64");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data");
 
     UInt8 cookie = unalignedLoad<UInt8>(src);
     src += 1;
@@ -703,7 +703,7 @@ void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, ch
             break;
     }
 
-    throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress with T64");
+    throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress T64-encoded data");
 }
 
 uint8_t CompressionCodecT64::getMethodByte() const
diff --git a/src/Compression/CompressionCodecZSTD.cpp b/src/Compression/CompressionCodecZSTD.cpp
index 891f1acaa9a..ec37ec6a7b5 100644
--- a/src/Compression/CompressionCodecZSTD.cpp
+++ b/src/Compression/CompressionCodecZSTD.cpp
@@ -82,7 +82,7 @@ UInt32 CompressionCodecZSTD::doCompressData(const char * source, UInt32 source_s
     ZSTD_freeCCtx(cctx);
 
     if (ZSTD_isError(compressed_size))
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress block with ZSTD: {}", std::string(ZSTD_getErrorName(compressed_size)));
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with ZSTD codec: {}", std::string(ZSTD_getErrorName(compressed_size)));
 
     return static_cast<UInt32>(compressed_size);
 }
@@ -93,7 +93,7 @@ void CompressionCodecZSTD::doDecompressData(const char * source, UInt32 source_s
     size_t res = ZSTD_decompress(dest, uncompressed_size, source, source_size);
 
     if (ZSTD_isError(res))
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot ZSTD_decompress: {}", std::string(ZSTD_getErrorName(res)));
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress ZSTD-encoded data: {}", std::string(ZSTD_getErrorName(res)));
 }
 
 CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_) : level(level_), enable_long_range(true), window_log(window_log_)
diff --git a/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.reference b/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.reference
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.sql b/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.sql
deleted file mode 100644
index 61a964a288f..00000000000
--- a/tests/queries/0_stateless/02869_gcd_codec_test_incorrect_type.sql
+++ /dev/null
@@ -1,2 +0,0 @@
-DROP TABLE IF EXISTS table_gcd_codec;
-CREATE TABLE table_gcd_codec (str String CODEC(GCD, LZ4)) ENGINE = Memory; -- { serverError 36 }
diff --git a/tests/queries/0_stateless/02872_gcd_codec.reference b/tests/queries/0_stateless/02872_gcd_codec.reference
index 1dd1b67e047..a8224b85b3c 100644
--- a/tests/queries/0_stateless/02872_gcd_codec.reference
+++ b/tests/queries/0_stateless/02872_gcd_codec.reference
@@ -1002,3 +1002,203 @@
 0
 0
 0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/02872_gcd_codec.sql b/tests/queries/0_stateless/02872_gcd_codec.sql
index 245a1211052..3020bbc44ac 100644
--- a/tests/queries/0_stateless/02872_gcd_codec.sql
+++ b/tests/queries/0_stateless/02872_gcd_codec.sql
@@ -103,8 +103,24 @@ SELECT * FROM table_gcd_codec_datetime;
 SELECT * FROM table_gcd_codec_datetime64;
 
 
--- A column with all 0 values can be compressed/decompressed
+-- A column with all zero values can be compressed/decompressed
 
-CREATE TEMPORARY TABLE table_gcd_codec_only_zero_values (n UInt8 CODEC(GCD, LZ4)) ENGINE = Memory;
-INSERT INTO table_gcd_codec_only_zero_values VALUES (0), (0), (0);
-SELECT * FROM table_gcd_codec_only_zero_values;
+CREATE TEMPORARY TABLE table_gcd_codec_only_zeros (n UInt8 CODEC(GCD, LZ4)) ENGINE = Memory;
+INSERT INTO table_gcd_codec_only_zeros VALUES (0), (0), (0);
+SELECT * FROM table_gcd_codec_only_zeros;
+
+-- Tests for Bug #56672:
+
+DROP TABLE IF EXISTS table_gcd_codec_one_hundred_zeros;
+DROP TABLE IF EXISTS table_gcd_codec_one_hundred_ones;
+
+CREATE TABLE table_gcd_codec_one_hundred_zeros (a Nullable(Int64) CODEC (GCD,LZ4)) ENGINE=MergeTree ORDER BY ();
+INSERT INTO table_gcd_codec_one_hundred_zeros SELECT 0 FROM numbers(100);
+SELECT * FROM table_gcd_codec_one_hundred_zeros;
+
+CREATE TABLE table_gcd_codec_one_hundred_ones (a Nullable(Int64) CODEC (GCD,LZ4)) ENGINE=MergeTree Order by ();
+INSERT INTO table_gcd_codec_one_hundred_ones SELECT 1 FROM numbers(100);
+SELECT * FROM table_gcd_codec_one_hundred_ones;
+
+DROP TABLE table_gcd_codec_one_hundred_zeros;
+DROP TABLE table_gcd_codec_one_hundred_ones;
diff --git a/tests/queries/0_stateless/02896_gcd-codec-crash.reference b/tests/queries/0_stateless/02896_gcd-codec-crash.reference
deleted file mode 100644
index 56f0d46cddc..00000000000
--- a/tests/queries/0_stateless/02896_gcd-codec-crash.reference
+++ /dev/null
@@ -1,200 +0,0 @@
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
diff --git a/tests/queries/0_stateless/02896_gcd-codec-crash.sql b/tests/queries/0_stateless/02896_gcd-codec-crash.sql
deleted file mode 100644
index 764d7168b07..00000000000
--- a/tests/queries/0_stateless/02896_gcd-codec-crash.sql
+++ /dev/null
@@ -1,13 +0,0 @@
-DROP TABLE IF EXISTS test;
-DROP TABLE IF EXISTS test2;
-
-CREATE TABLE test (a Nullable(Int64) CODEC (GCD,LZ4)) ENGINE=MergeTree Order by ();
-INSERT INTO test SELECT 0 FROM numbers(1e2);
-SELECT * FROM test;
-
-CREATE TABLE test2 (a Nullable(Int64) CODEC (GCD,LZ4)) ENGINE=MergeTree Order by ();
-INSERT INTO test2 SELECT 1 FROM numbers(1e2);
-SELECT * FROM test2;
-
-DROP TABLE IF EXISTS test;
-DROP TABLE IF EXISTS test2;

From 770a762317b88060f71bafdbff4f31a60e5ad8d1 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 3 Nov 2023 19:01:33 +0100
Subject: [PATCH 0498/1097] aggressive timeout

---
 src/Backups/BackupIO_S3.cpp   |  4 +++-
 src/Core/Settings.h           |  1 +
 src/IO/ConnectionTimeouts.cpp | 20 ++++++++++++++++
 src/IO/ConnectionTimeouts.h   |  2 ++
 src/IO/S3/Client.cpp          |  1 +
 src/IO/S3/PocoHTTPClient.cpp  | 43 +++++++++++++++++++++++++++++++----
 src/IO/S3/PocoHTTPClient.h    |  5 ++++
 7 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 8bb2f895e38..0b700665988 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -55,7 +55,9 @@ namespace
             static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
             static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_retry_attempts),
             context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
-            /* for_disk_s3 = */ false, request_settings.get_request_throttler, request_settings.put_request_throttler,
+            /* for_disk_s3 = */ false,
+            request_settings.get_request_throttler,
+            request_settings.put_request_throttler,
             s3_uri.uri.getScheme());
 
         client_configuration.endpointOverride = s3_uri.endpoint;
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 3b90a3e068b..b1459b6f328 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -94,6 +94,7 @@ class IColumn;
     M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \
     M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \
     M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
+    M(Bool, s3_aggressive_timeouts, true, "When aggressive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \
     M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
     M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
     M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \
diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp
index 01fbaa4f817..a9eebb1a755 100644
--- a/src/IO/ConnectionTimeouts.cpp
+++ b/src/IO/ConnectionTimeouts.cpp
@@ -133,4 +133,24 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
         settings.http_receive_timeout);
 }
 
+ConnectionTimeouts ConnectionTimeouts::aggressiveTimeouts(UInt32 attempt) const
+{
+    auto aggressive = *this;
+
+    if (attempt == 2)
+    {
+        auto one_second = Poco::Timespan(1, 0);
+        aggressive.send_timeout = saturate(one_second, send_timeout);
+        aggressive.receive_timeout = saturate(one_second, receive_timeout);
+    }
+    else if (attempt == 1)
+    {
+        auto two_hundred_ms = Poco::Timespan(0, 200 * 1000);
+        aggressive.send_timeout = saturate(two_hundred_ms, send_timeout);
+        aggressive.receive_timeout = saturate(two_hundred_ms, receive_timeout);
+    }
+
+    return aggressive;
+}
+
 }
diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h
index 684af42827f..17ee1907d89 100644
--- a/src/IO/ConnectionTimeouts.h
+++ b/src/IO/ConnectionTimeouts.h
@@ -67,6 +67,8 @@ struct ConnectionTimeouts
     /// Timeouts for the case when we will try many addresses in a loop.
     static ConnectionTimeouts getTCPTimeoutsWithFailover(const Settings & settings);
     static ConnectionTimeouts getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout);
+
+    ConnectionTimeouts aggressiveTimeouts(UInt32 attempt) const;
 };
 
 }
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index ceb7d275299..4250342c49f 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -905,6 +905,7 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT
         s3_retry_attempts,
         enable_s3_requests_logging,
         for_disk_s3,
+        context->getGlobalContext()->getSettingsRef().s3_aggressive_timeouts,
         get_request_throttler,
         put_request_throttler,
         error_report);
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index d0f248f48a6..08ba04ee875 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -14,6 +14,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/Throttler.h>
 #include <IO/HTTPCommon.h>
+#include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <IO/S3/ProviderType.h>
@@ -99,6 +100,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
         unsigned int s3_retry_attempts_,
         bool enable_s3_requests_logging_,
         bool for_disk_s3_,
+        bool s3_aggressive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
         std::function<void(const DB::ProxyConfiguration &)> error_report_)
@@ -111,6 +113,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
     , for_disk_s3(for_disk_s3_)
     , get_request_throttler(get_request_throttler_)
     , put_request_throttler(put_request_throttler_)
+    , s3_aggressive_timeouts(s3_aggressive_timeouts_)
     , error_report(error_report_)
 {
 }
@@ -157,6 +160,7 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config
           Poco::Timespan(client_configuration.http_keep_alive_timeout_ms * 1000))) /// flag indicating whether keep-alive is enabled is set to each session upon creation
     , remote_host_filter(client_configuration.remote_host_filter)
     , s3_max_redirects(client_configuration.s3_max_redirects)
+    , s3_aggressive_timeouts(client_configuration.s3_aggressive_timeouts)
     , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging)
     , for_disk_s3(client_configuration.for_disk_s3)
     , get_request_throttler(client_configuration.get_request_throttler)
@@ -268,6 +272,37 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT
         ProfileEvents::increment(disk_s3_events_map[static_cast<unsigned int>(type)][static_cast<unsigned int>(kind)], amount);
 }
 
+UInt32 extractAttempt(const Aws::String & request_info)
+{
+    static auto key = Aws::String("attempt=");
+
+    auto key_begin = request_info.find(key, 0);
+    if (key_begin == Aws::String::npos)
+        return 1;
+
+    auto val_begin = key_begin + key.size();
+    auto val_end = request_info.find(';', val_begin);
+    if (val_end == Aws::String::npos)
+        val_end = request_info.size();
+
+    Aws::String value = request_info.substr(val_begin, val_end-val_begin);
+
+    UInt32 attempt = 1;
+    ReadBufferFromString buf(value);
+    readIntText(attempt, buf);
+    return attempt;
+}
+
+ConnectionTimeouts PocoHTTPClient::getTimeouts(Aws::Http::HttpRequest & request) const
+{
+    if (!s3_aggressive_timeouts)
+        return timeouts;
+
+    const auto & request_info = request.GetHeaderValue(Aws::Http::SDK_REQUEST_HEADER);
+    auto attempt = extractAttempt(request_info);
+    return timeouts.aggressiveTimeouts(attempt);
+}
+
 void PocoHTTPClient::makeRequestInternal(
     Aws::Http::HttpRequest & request,
     std::shared_ptr<PocoHTTPResponse> & response,
@@ -348,17 +383,17 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 /// This can lead to request signature difference on S3 side.
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri, timeouts, http_connection_pool_size, wait_on_pool_size_limit, proxy_configuration);
+                        target_uri, getTimeouts(request), http_connection_pool_size, wait_on_pool_size_limit, proxy_configuration);
                 else
-                    session = makeHTTPSession(target_uri, timeouts, proxy_configuration);
+                    session = makeHTTPSession(target_uri, getTimeouts(request), proxy_configuration);
             }
             else
             {
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri, timeouts, http_connection_pool_size, wait_on_pool_size_limit);
+                        target_uri, getTimeouts(request), http_connection_pool_size, wait_on_pool_size_limit);
                 else
-                    session = makeHTTPSession(target_uri, timeouts);
+                    session = makeHTTPSession(target_uri, getTimeouts(request));
             }
 
             /// In case of error this address will be written to logs
diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 2a449458360..6eeff431569 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -55,6 +55,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
     size_t http_connection_pool_size = 0;
     /// See PoolBase::BehaviourOnLimit
     bool wait_on_pool_size_limit = true;
+    bool s3_aggressive_timeouts = false;
 
     std::function<void(const DB::ProxyConfiguration &)> error_report;
 
@@ -69,6 +70,7 @@ private:
         unsigned int s3_retry_attempts,
         bool enable_s3_requests_logging_,
         bool for_disk_s3_,
+        bool s3_aggressive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
         std::function<void(const DB::ProxyConfiguration &)> error_report_
@@ -169,6 +171,8 @@ private:
         Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
         Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
 
+    ConnectionTimeouts getTimeouts(Aws::Http::HttpRequest & request) const;
+
 protected:
     static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request);
     void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const;
@@ -178,6 +182,7 @@ protected:
     ConnectionTimeouts timeouts;
     const RemoteHostFilter & remote_host_filter;
     unsigned int s3_max_redirects;
+    bool s3_aggressive_timeouts = false;
     bool enable_s3_requests_logging;
     bool for_disk_s3;
 

From ab2594154e35a4fed769de14bfc2b720598dfaa7 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 6 Nov 2023 20:57:16 +0100
Subject: [PATCH 0499/1097] add tests

---
 src/Disks/ObjectStorages/S3/diskSettings.cpp  |  6 +-
 .../configs/inf_s3_retries.xml                |  1 +
 .../configs/s3_retries.xml                    |  1 +
 .../configs/storage_conf.xml                  | 15 +++-
 .../test_checking_s3_blobs_paranoid/test.py   | 74 +++++++++++++++++--
 5 files changed, 87 insertions(+), 10 deletions(-)

diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index de88c876922..43618c64776 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -63,10 +63,12 @@ std::unique_ptr<S3::Client> getClient(
     client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000);
     client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
     client_configuration.endpointOverride = uri.endpoint;
-    client_configuration.http_keep_alive_timeout_ms
-        = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
+    client_configuration.http_keep_alive_timeout_ms = config.getUInt(
+        config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
     client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000);
     client_configuration.wait_on_pool_size_limit = false;
+    client_configuration.s3_aggressive_timeouts = config.getUInt(
+        config_prefix + ".aggressive_timeouts", client_configuration.s3_aggressive_timeouts);
 
     /*
      * Override proxy configuration for backwards compatibility with old configuration format.
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
index 206eb4f2bad..5f0860ac120 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
@@ -4,6 +4,7 @@
     <profiles>
         <default>
             <s3_retry_attempts>1000000</s3_retry_attempts>
+            <s3_aggressive_timeouts>1</s3_aggressive_timeouts>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
index 556bf60d385..f215a89f613 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
@@ -4,6 +4,7 @@
     <profiles>
         <default>
             <s3_retry_attempts>5</s3_retry_attempts>
+            <s3_aggressive_timeouts>0</s3_aggressive_timeouts>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
index b77e72d808b..264c411b59b 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
@@ -7,6 +7,12 @@
 
     <storage_configuration>
         <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root/data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+            </s3>
             <broken_s3>
                 <type>s3</type>
                 <endpoint>http://resolver:8083/root/data/</endpoint>
@@ -23,9 +29,16 @@
                     </main>
                 </volumes>
             </broken_s3>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                </volumes>
+            </s3>
         </policies>
     </storage_configuration>
     <merge_tree>
-        <storage_policy>broken_s3</storage_policy>
+        <storage_policy>s3</storage_policy>
     </merge_tree>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index d6bcb3fb8f4..7f8664f1648 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -64,6 +64,8 @@ def test_upload_after_check_works(cluster, broken_s3):
             data String
         ) ENGINE=MergeTree()
         ORDER BY id
+        SETTINGS
+            storage_policy='broken_s3'
         """
     )
 
@@ -87,7 +89,8 @@ def get_counters(node, query_id, log_type="ExceptionWhileProcessing"):
                 SELECT
                     ProfileEvents['S3CreateMultipartUpload'],
                     ProfileEvents['S3UploadPart'],
-                    ProfileEvents['S3WriteRequestsErrors']
+                    ProfileEvents['S3WriteRequestsErrors'],
+                    ProfileEvents['S3PutObject'],
                 FROM system.query_log
                 WHERE query_id='{query_id}'
                     AND type='{log_type}'
@@ -129,7 +132,7 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression
     assert "Code: 499" in error, error
     assert "mock s3 injected error" in error, error
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
         node, insert_query_id
     )
     assert count_create_multi_part_uploads == 1
@@ -172,7 +175,7 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload(
     assert "Code: 499" in error, error
     assert "mock s3 injected error" in error, error
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
         node, insert_query_id
     )
     assert count_create_multi_part_uploads == 1
@@ -207,7 +210,7 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
     assert count_create_multi_part_uploads == 1
@@ -279,7 +282,7 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried(
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
@@ -361,7 +364,7 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried(
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
@@ -438,7 +441,7 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3):
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
@@ -533,3 +536,60 @@ def test_query_is_canceled_with_inf_retries(cluster, broken_s3):
         retry_count=120,
         sleep_time=1,
     )
+
+
+@pytest.mark.parametrize("node_name", ["node", "node_with_inf_s3_retries"])
+def test_aggressive_timeouts(cluster, broken_s3, node_name):
+    node = cluster.instances[node_name]
+
+    broken_s3.setup_fake_puts(part_length=1)
+    broken_s3.setup_slow_answers(
+        timeout=5,
+        count=1000000,
+    )
+
+    insert_query_id = f"TEST_AGGRESSIVE_TIMEOUTS_{node_name}"
+    node.query(
+        f"""
+            INSERT INTO
+                TABLE FUNCTION s3(
+                    'http://resolver:8083/root/data/aggressive_timeouts',
+                    'minio', 'minio123',
+                    'CSV', auto, 'none'
+                )
+            SELECT
+                *
+            FROM system.numbers
+            LIMIT 1
+            SETTINGS
+                s3_request_timeout_ms=30000,
+                s3_check_objects_after_upload=0
+            """,
+        query_id=insert_query_id,
+    )
+
+    broken_s3.reset()
+
+    _, _, count_s3_errors, count_s3_puts = get_counters(
+        node, insert_query_id, log_type="QueryFinish"
+    )
+
+    assert count_s3_puts == 1
+
+    s3_aggressive_timeouts_state = node.query(
+        f"""
+        SELECT
+            value
+        FROM system.settings
+        WHERE
+            name='s3_aggressive_timeouts'
+        """
+    ).strip()
+
+    if node_name == "node_with_inf_s3_retries":
+        # first 2 attempts failed
+        assert s3_aggressive_timeouts_state == "1"
+        assert count_s3_errors == 2
+    else:
+        assert s3_aggressive_timeouts_state == "0"
+        assert count_s3_errors == 0

From e0edd165da4df6d700fcde818cc91492f295323f Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Tue, 7 Nov 2023 11:21:46 +0100
Subject: [PATCH 0500/1097] add doc

---
 docs/en/operations/settings/settings.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index d0acad7b557..306529c4b96 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4820,3 +4820,10 @@ When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY`
 When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. 
 
 Default value: `false`.
+
+## s3_aggressive_timeouts {#s3_aggressive_timeouts}
+
+When set to `true` than for all s3 requests first two attempts are made with low send and receive timeouts.
+When set to `false` than all attempts are made with identical timeouts.
+
+Default value: `true`.

From 338c51745f1709220a01a3684b5e305ff64ff788 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Tue, 7 Nov 2023 12:08:38 +0100
Subject: [PATCH 0501/1097] fix style

---
 .../test_checking_s3_blobs_paranoid/test.py   | 75 ++++++++++++-------
 1 file changed, 46 insertions(+), 29 deletions(-)

diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index 7f8664f1648..441a5a541e8 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -80,7 +80,7 @@ def test_upload_after_check_works(cluster, broken_s3):
     assert "suddenly disappeared" in error, error
 
 
-def get_counters(node, query_id, log_type="ExceptionWhileProcessing"):
+def get_multipart_counters(node, query_id, log_type="ExceptionWhileProcessing"):
     node.query("SYSTEM FLUSH LOGS")
     return [
         int(x)
@@ -90,7 +90,24 @@ def get_counters(node, query_id, log_type="ExceptionWhileProcessing"):
                     ProfileEvents['S3CreateMultipartUpload'],
                     ProfileEvents['S3UploadPart'],
                     ProfileEvents['S3WriteRequestsErrors'],
+                FROM system.query_log
+                WHERE query_id='{query_id}'
+                    AND type='{log_type}'
+                """
+        ).split()
+        if x
+    ]
+
+
+def get_put_counters(node, query_id, log_type="ExceptionWhileProcessing"):
+    node.query("SYSTEM FLUSH LOGS")
+    return [
+        int(x)
+        for x in node.query(
+            f"""
+                SELECT
                     ProfileEvents['S3PutObject'],
+                    ProfileEvents['S3WriteRequestsErrors'],
                 FROM system.query_log
                 WHERE query_id='{query_id}'
                     AND type='{log_type}'
@@ -132,12 +149,12 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression
     assert "Code: 499" in error, error
     assert "mock s3 injected error" in error, error
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id
     )
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 0
-    assert count_s3_errors == 1
+    assert create_multipart == 1
+    assert upload_parts == 0
+    assert s3_errors == 1
 
 
 #  Add "lz4" compression method in the list after https://github.com/ClickHouse/ClickHouse/issues/50975 is fixed
@@ -175,12 +192,12 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload(
     assert "Code: 499" in error, error
     assert "mock s3 injected error" in error, error
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id
     )
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts >= 2
-    assert count_s3_errors >= 2
+    assert create_multipart == 1
+    assert upload_parts >= 2
+    assert s3_errors >= 2
 
 
 def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
@@ -210,12 +227,12 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 39
-    assert count_s3_errors == 3
+    assert create_multipart == 1
+    assert upload_parts == 39
+    assert s3_errors == 3
 
     broken_s3.setup_at_part_upload(count=1000, after=2, action="connection_refused")
     insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED_1"
@@ -282,13 +299,13 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried(
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 39
-    assert count_s3_errors == 3
+    assert create_multipart == 1
+    assert upload_parts == 39
+    assert s3_errors == 3
 
     broken_s3.setup_at_part_upload(
         count=1000,
@@ -364,13 +381,13 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried(
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 39
-    assert count_s3_errors == 3
+    assert create_multipart == 1
+    assert upload_parts == 39
+    assert s3_errors == 3
 
     broken_s3.setup_at_create_multi_part_upload(
         count=1000,
@@ -441,13 +458,13 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3):
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors, _ = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 7
-    assert count_s3_errors == 3
+    assert create_multipart == 1
+    assert upload_parts == 7
+    assert s3_errors == 3
 
     broken_s3.setup_at_part_upload(
         count=1000,
@@ -570,11 +587,11 @@ def test_aggressive_timeouts(cluster, broken_s3, node_name):
 
     broken_s3.reset()
 
-    _, _, count_s3_errors, count_s3_puts = get_counters(
+    put_objects, s3_errors = get_put_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert count_s3_puts == 1
+    assert put_objects == 1
 
     s3_aggressive_timeouts_state = node.query(
         f"""
@@ -589,7 +606,7 @@ def test_aggressive_timeouts(cluster, broken_s3, node_name):
     if node_name == "node_with_inf_s3_retries":
         # first 2 attempts failed
         assert s3_aggressive_timeouts_state == "1"
-        assert count_s3_errors == 2
+        assert s3_errors == 2
     else:
         assert s3_aggressive_timeouts_state == "0"
-        assert count_s3_errors == 0
+        assert s3_errors == 0

From 1a7be21a66eb498a6d52718d9cd7f37ba9729213 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Tue, 7 Nov 2023 18:05:57 +0100
Subject: [PATCH 0502/1097] adjust tests

---
 .../configs/config.d/storage_conf.xml                        | 2 ++
 tests/integration/test_storage_s3/configs/defaultS3.xml      | 5 -----
 tests/integration/test_storage_s3/configs/s3_retry.xml       | 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
index 235b9a7b7a1..f51b854de75 100644
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
@@ -11,6 +11,7 @@
                 <skip_access_check>true</skip_access_check>
                 <!-- Avoid extra retries to speed up tests -->
                 <retry_attempts>0</retry_attempts>
+                <aggressive_timeouts>0</aggressive_timeouts>
                 <connect_timeout_ms>20000</connect_timeout_ms>
             </s3>
             <s3_retryable>
@@ -33,6 +34,7 @@
                 <skip_access_check>true</skip_access_check>
                 <!-- Avoid extra retries to speed up tests -->
                 <retry_attempts>1</retry_attempts>
+                <aggressive_timeouts>0</aggressive_timeouts>
                 <s3_max_single_read_retries>1</s3_max_single_read_retries>
                 <connect_timeout_ms>20000</connect_timeout_ms>
             </s3_no_retries>
diff --git a/tests/integration/test_storage_s3/configs/defaultS3.xml b/tests/integration/test_storage_s3/configs/defaultS3.xml
index 37454ef6781..7dac6d9fbb5 100644
--- a/tests/integration/test_storage_s3/configs/defaultS3.xml
+++ b/tests/integration/test_storage_s3/configs/defaultS3.xml
@@ -1,9 +1,4 @@
 <clickhouse>
-    <profiles>
-        <default>
-            <s3_retry_attempts>5</s3_retry_attempts>
-        </default>
-    </profiles>
     <s3>
         <s3_mock>
             <endpoint>http://resolver:8080</endpoint>
diff --git a/tests/integration/test_storage_s3/configs/s3_retry.xml b/tests/integration/test_storage_s3/configs/s3_retry.xml
index 727e23273cf..581fc44c8d4 100644
--- a/tests/integration/test_storage_s3/configs/s3_retry.xml
+++ b/tests/integration/test_storage_s3/configs/s3_retry.xml
@@ -1,7 +1,7 @@
 <clickhouse>
     <profiles>
         <default>
-            <s3_retry_attempts>5</s3_retry_attempts>
+            <s3_retry_attempts>10</s3_retry_attempts>
         </default>
     </profiles>
 </clickhouse>

From 45de9beab4231a806ea247adef0a1fc5180748ba Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 8 Nov 2023 13:19:28 +0100
Subject: [PATCH 0503/1097] set new timeout for session from connection pool

---
 base/poco/Net/src/HTTPSession.cpp | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp
index d2663baaf9f..d30f5590280 100644
--- a/base/poco/Net/src/HTTPSession.cpp
+++ b/base/poco/Net/src/HTTPSession.cpp
@@ -94,8 +94,22 @@ void HTTPSession::setTimeout(const Poco::Timespan& timeout)
 void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco::Timespan& sendTimeout, const Poco::Timespan& receiveTimeout)
 {
 	 _connectionTimeout = connectionTimeout;
-	 _sendTimeout = sendTimeout;
-	 _receiveTimeout = receiveTimeout;
+
+     if (_sendTimeout != sendTimeout)
+     {
+         _sendTimeout = sendTimeout;
+
+         if (connected())
+             _socket.setSendTimeout(_sendTimeout);
+     }
+
+     if (_receiveTimeout != receiveTimeout)
+     {
+         _receiveTimeout = receiveTimeout;
+
+         if (connected())
+             _socket.setReceiveTimeout(_receiveTimeout);
+     }
 }
 
 
From be01a5cd3e07eeba990a8dcc3e69e62f3492d05e Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 8 Nov 2023 17:32:06 +0100
Subject: [PATCH 0504/1097] turn off agressive timeouts for heavy requests

---
 src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 6 +++++-
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h   | 1 +
 src/IO/S3/Client.cpp                            | 9 ++++++---
 src/IO/S3/Client.h                              | 6 ++----
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index b36185249af..aa4bcd7fbad 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -537,7 +537,11 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
 }
 
 S3ObjectStorage::Clients::Clients(std::shared_ptr<S3::Client> client_, const S3ObjectStorageSettings & settings)
-    : client(std::move(client_)), client_with_long_timeout(client->clone(std::nullopt, settings.request_settings.long_request_timeout_ms)) {}
+    : client(std::move(client_))
+    , client_with_long_timeout(client->clone(
+          /*override_aggressive_timeouts*/ false,
+          settings.request_settings.long_request_timeout_ms))
+{}
 
 ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string &) const
 {
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index b1b3fb22366..37e491e21dc 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -184,6 +184,7 @@ private:
     std::string bucket;
     String object_key_prefix;
 
+
     MultiVersion<Clients> clients;
     MultiVersion<S3ObjectStorageSettings> s3_settings;
     S3Capabilities s3_capabilities;
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 4250342c49f..12a0cb8f93c 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -119,14 +119,17 @@ std::unique_ptr<Client> Client::create(
 }
 
 std::unique_ptr<Client> Client::clone(
-    std::optional<std::shared_ptr<RetryStrategy>> override_retry_strategy,
+    std::optional<bool> override_aggressive_timeouts,
     std::optional<Int64> override_request_timeout_ms) const
 {
     PocoHTTPClientConfiguration new_configuration = client_configuration;
-    if (override_retry_strategy.has_value())
-        new_configuration.retryStrategy = *override_retry_strategy;
+
     if (override_request_timeout_ms.has_value())
         new_configuration.requestTimeoutMs = *override_request_timeout_ms;
+
+    if (override_aggressive_timeouts.has_value())
+        new_configuration.s3_aggressive_timeouts = *override_aggressive_timeouts;
+
     return std::unique_ptr<Client>(new Client(*this, new_configuration));
 }
 
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index 48310bc21af..81ab3854d3d 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -119,13 +119,11 @@ public:
             bool use_virtual_addressing);
 
     /// Create a client with adjusted settings:
-    ///  * override_retry_strategy can be used to disable retries to avoid nested retries when we have
-    ///    a retry loop outside of S3 client. Specifically, for read and write buffers. Currently not
-    ///    actually used.
     ///  * override_request_timeout_ms is used to increase timeout for CompleteMultipartUploadRequest
     ///    because it often sits idle for 10 seconds: https://github.com/ClickHouse/ClickHouse/pull/42321
+    ///  * s3_aggressive_timeouts is used to turn off s3_aggressive_timeouts feature for CompleteMultipartUploadRequest
     std::unique_ptr<Client> clone(
-        std::optional<std::shared_ptr<RetryStrategy>> override_retry_strategy = std::nullopt,
+        std::optional<bool> override_aggressive_timeouts = std::nullopt,
         std::optional<Int64> override_request_timeout_ms = std::nullopt) const;
 
     Client & operator=(const Client &) = delete;

From 27fb25d056c420bca141ce2ecd83868d15fd07ef Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 9 Nov 2023 13:10:52 +0100
Subject: [PATCH 0505/1097] alter the naming, fix client_with_long_timeout in
 s3 storage

---
 src/Core/Settings.h                             | 2 +-
 src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 2 +-
 src/Disks/ObjectStorages/S3/diskSettings.cpp    | 4 ++--
 src/IO/S3/Client.cpp                            | 8 ++++----
 src/IO/S3/Client.h                              | 4 ++--
 src/IO/S3/PocoHTTPClient.cpp                    | 8 ++++----
 src/IO/S3/PocoHTTPClient.h                      | 6 +++---
 src/Storages/StorageS3.cpp                      | 2 +-
 8 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b1459b6f328..3f80c83ff5f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -94,7 +94,7 @@ class IColumn;
     M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \
     M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \
     M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
-    M(Bool, s3_aggressive_timeouts, true, "When aggressive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \
+    M(Bool, s3_use_adaptive_timeouts, true, "When aggressive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \
     M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
     M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
     M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index aa4bcd7fbad..8a46bfd59d1 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -539,7 +539,7 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
 S3ObjectStorage::Clients::Clients(std::shared_ptr<S3::Client> client_, const S3ObjectStorageSettings & settings)
     : client(std::move(client_))
     , client_with_long_timeout(client->clone(
-          /*override_aggressive_timeouts*/ false,
+          /*override_use_adaptive_timeouts*/ false,
           settings.request_settings.long_request_timeout_ms))
 {}
 
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 43618c64776..573fa744ce6 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -67,8 +67,8 @@ std::unique_ptr<S3::Client> getClient(
         config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
     client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000);
     client_configuration.wait_on_pool_size_limit = false;
-    client_configuration.s3_aggressive_timeouts = config.getUInt(
-        config_prefix + ".aggressive_timeouts", client_configuration.s3_aggressive_timeouts);
+    client_configuration.s3_use_adaptive_timeouts = config.getUInt(
+        config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
 
     /*
      * Override proxy configuration for backwards compatibility with old configuration format.
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 12a0cb8f93c..90806852c1e 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -119,7 +119,7 @@ std::unique_ptr<Client> Client::create(
 }
 
 std::unique_ptr<Client> Client::clone(
-    std::optional<bool> override_aggressive_timeouts,
+    std::optional<bool> override_use_adaptive_timeouts,
     std::optional<Int64> override_request_timeout_ms) const
 {
     PocoHTTPClientConfiguration new_configuration = client_configuration;
@@ -127,8 +127,8 @@ std::unique_ptr<Client> Client::clone(
     if (override_request_timeout_ms.has_value())
         new_configuration.requestTimeoutMs = *override_request_timeout_ms;
 
-    if (override_aggressive_timeouts.has_value())
-        new_configuration.s3_aggressive_timeouts = *override_aggressive_timeouts;
+    if (override_use_adaptive_timeouts.has_value())
+        new_configuration.s3_use_adaptive_timeouts = *override_use_adaptive_timeouts;
 
     return std::unique_ptr<Client>(new Client(*this, new_configuration));
 }
@@ -908,7 +908,7 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT
         s3_retry_attempts,
         enable_s3_requests_logging,
         for_disk_s3,
-        context->getGlobalContext()->getSettingsRef().s3_aggressive_timeouts,
+        context->getGlobalContext()->getSettingsRef().s3_use_adaptive_timeouts,
         get_request_throttler,
         put_request_throttler,
         error_report);
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index 81ab3854d3d..be7235eb9f1 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -121,9 +121,9 @@ public:
     /// Create a client with adjusted settings:
     ///  * override_request_timeout_ms is used to increase timeout for CompleteMultipartUploadRequest
     ///    because it often sits idle for 10 seconds: https://github.com/ClickHouse/ClickHouse/pull/42321
-    ///  * s3_aggressive_timeouts is used to turn off s3_aggressive_timeouts feature for CompleteMultipartUploadRequest
+    ///  * s3_use_adaptive_timeouts is used to turn off s3_use_adaptive_timeouts feature for CompleteMultipartUploadRequest
     std::unique_ptr<Client> clone(
-        std::optional<bool> override_aggressive_timeouts = std::nullopt,
+        std::optional<bool> override_use_adaptive_timeouts = std::nullopt,
         std::optional<Int64> override_request_timeout_ms = std::nullopt) const;
 
     Client & operator=(const Client &) = delete;
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 08ba04ee875..f783a886877 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -100,7 +100,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
         unsigned int s3_retry_attempts_,
         bool enable_s3_requests_logging_,
         bool for_disk_s3_,
-        bool s3_aggressive_timeouts_,
+        bool s3_use_adaptive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
         std::function<void(const DB::ProxyConfiguration &)> error_report_)
@@ -113,7 +113,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
     , for_disk_s3(for_disk_s3_)
     , get_request_throttler(get_request_throttler_)
     , put_request_throttler(put_request_throttler_)
-    , s3_aggressive_timeouts(s3_aggressive_timeouts_)
+    , s3_use_adaptive_timeouts(s3_use_adaptive_timeouts_)
     , error_report(error_report_)
 {
 }
@@ -160,7 +160,7 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config
           Poco::Timespan(client_configuration.http_keep_alive_timeout_ms * 1000))) /// flag indicating whether keep-alive is enabled is set to each session upon creation
     , remote_host_filter(client_configuration.remote_host_filter)
     , s3_max_redirects(client_configuration.s3_max_redirects)
-    , s3_aggressive_timeouts(client_configuration.s3_aggressive_timeouts)
+    , s3_use_adaptive_timeouts(client_configuration.s3_use_adaptive_timeouts)
     , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging)
     , for_disk_s3(client_configuration.for_disk_s3)
     , get_request_throttler(client_configuration.get_request_throttler)
@@ -295,7 +295,7 @@ UInt32 extractAttempt(const Aws::String & request_info)
 
 ConnectionTimeouts PocoHTTPClient::getTimeouts(Aws::Http::HttpRequest & request) const
 {
-    if (!s3_aggressive_timeouts)
+    if (!s3_use_adaptive_timeouts)
         return timeouts;
 
     const auto & request_info = request.GetHeaderValue(Aws::Http::SDK_REQUEST_HEADER);
diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 6eeff431569..9ba5f4ffe64 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -55,7 +55,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
     size_t http_connection_pool_size = 0;
     /// See PoolBase::BehaviourOnLimit
     bool wait_on_pool_size_limit = true;
-    bool s3_aggressive_timeouts = false;
+    bool s3_use_adaptive_timeouts = false;
 
     std::function<void(const DB::ProxyConfiguration &)> error_report;
 
@@ -70,7 +70,7 @@ private:
         unsigned int s3_retry_attempts,
         bool enable_s3_requests_logging_,
         bool for_disk_s3_,
-        bool s3_aggressive_timeouts_,
+        bool s3_use_adaptive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
         std::function<void(const DB::ProxyConfiguration &)> error_report_
@@ -182,7 +182,7 @@ protected:
     ConnectionTimeouts timeouts;
     const RemoteHostFilter & remote_host_filter;
     unsigned int s3_max_redirects;
-    bool s3_aggressive_timeouts = false;
+    bool s3_use_adaptive_timeouts = false;
     bool enable_s3_requests_logging;
     bool for_disk_s3;
 
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 63ed84680c9..231efb87e87 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1330,7 +1330,7 @@ void StorageS3::Configuration::connect(ContextPtr context)
             auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
         });
 
-    client_with_long_timeout = client->clone(std::nullopt, request_settings.long_request_timeout_ms);
+    client_with_long_timeout = client->clone(/*override_use_adaptive_timeouts*/ false, request_settings.long_request_timeout_ms);
 }
 
 void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection)

From bb0b6afe14319799028fbd8483b3bc4042e6f951 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 9 Nov 2023 13:12:38 +0100
Subject: [PATCH 0506/1097] reduce cuncurrent request number to the minio in
 test_storage_s3

---
 tests/integration/test_storage_s3/configs/s3_retry.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_storage_s3/configs/s3_retry.xml b/tests/integration/test_storage_s3/configs/s3_retry.xml
index 581fc44c8d4..b7a7bbc8a9b 100644
--- a/tests/integration/test_storage_s3/configs/s3_retry.xml
+++ b/tests/integration/test_storage_s3/configs/s3_retry.xml
@@ -2,6 +2,7 @@
     <profiles>
         <default>
             <s3_retry_attempts>10</s3_retry_attempts>
+            <s3_max_inflight_parts_for_one_file>5</s3_max_inflight_parts_for_one_file>
         </default>
     </profiles>
 </clickhouse>

From 76d11687a76ede0a0fd080fd76ff04da501e7af6 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 9 Nov 2023 13:12:56 +0100
Subject: [PATCH 0507/1097] adjuct docs

---
 docs/en/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 306529c4b96..34ed85c773a 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4821,7 +4821,7 @@ When set to `false` the metadata files are written with the previous format vers
 
 Default value: `false`.
 
-## s3_aggressive_timeouts {#s3_aggressive_timeouts}
+## s3_use_adaptive_timeouts {#s3_use_adaptive_timeouts}
 
 When set to `true` than for all s3 requests first two attempts are made with low send and receive timeouts.
 When set to `false` than all attempts are made with identical timeouts.

From 8d36fd6e54cc66ab826d80fb6c4ec867fad4b731 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 9 Nov 2023 23:54:31 +0100
Subject: [PATCH 0508/1097] get rid off of client_with_long_timeout_ptr

---
 src/Backups/BackupIO_S3.cpp                   |  5 +-
 src/Coordination/KeeperSnapshotManagerS3.cpp  |  1 -
 src/Core/Settings.h                           |  2 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     | 62 ++++++-------------
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 14 +----
 src/Disks/ObjectStorages/S3/diskSettings.cpp  |  2 +-
 src/IO/ConnectionTimeouts.cpp                 | 53 ++++++++++++----
 src/IO/ConnectionTimeouts.h                   |  3 +-
 src/IO/S3/Client.cpp                          | 14 +----
 src/IO/S3/Client.h                            |  8 +--
 src/IO/S3/PocoHTTPClient.cpp                  |  2 +-
 src/IO/S3/copyS3File.cpp                      | 26 +++-----
 src/IO/S3/copyS3File.h                        |  7 ---
 src/IO/S3/tests/gtest_aws_s3_client.cpp       |  1 -
 src/IO/WriteBufferFromS3.cpp                  |  4 +-
 src/IO/WriteBufferFromS3.h                    |  3 -
 src/IO/tests/gtest_writebuffer_s3.cpp         |  1 -
 src/Storages/StorageS3.cpp                    |  3 -
 src/Storages/StorageS3.h                      |  1 -
 src/Storages/StorageS3Settings.h              |  3 +-
 20 files changed, 83 insertions(+), 132 deletions(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 0b700665988..4f83158d07d 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -169,7 +169,6 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                                 blob_path.size(), mode);
 
             copyS3File(
-                client,
                 client,
                 s3_uri.bucket,
                 fs::path(s3_uri.key) / path_in_backup,
@@ -231,7 +230,6 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
         {
             LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName());
             copyS3File(
-                client,
                 client,
                 /* src_bucket */ blob_path[1],
                 /* src_key= */ blob_path[0],
@@ -253,7 +251,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
 
 void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
-    copyDataToS3File(create_read_buffer, start_pos, length, client, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
+    copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
                      threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
@@ -283,7 +281,6 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
 {
     return std::make_unique<WriteBufferFromS3>(
         client,
-        client, // already has long timeout
         s3_uri.bucket,
         fs::path(s3_uri.key) / file_name,
         DBMS_DEFAULT_BUFFER_SIZE,
diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp
index 302e05c8418..bedde0d7b39 100644
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@@ -148,7 +148,6 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
         const auto create_writer = [&](const auto & key)
         {
             return WriteBufferFromS3(
-                s3_client->client,
                 s3_client->client,
                 s3_client->uri.bucket,
                 key,
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 3f80c83ff5f..34547aded9c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -105,7 +105,7 @@ class IColumn;
     M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
     M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
     M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
-    M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
+    M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
     M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. Only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \
     M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
     M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 8a46bfd59d1..75dd405f6aa 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -153,7 +153,7 @@ private:
 bool S3ObjectStorage::exists(const StoredObject & object) const
 {
     auto settings_ptr = s3_settings.get();
-    return S3::objectExists(*clients.get()->client, bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+    return S3::objectExists(*client.get(), bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
@@ -172,7 +172,7 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
         (const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
     {
         return std::make_unique<ReadBufferFromS3>(
-            clients.get()->client,
+            client.get(),
             bucket,
             path,
             version_id,
@@ -222,7 +222,7 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObject( /// NOLINT
 {
     auto settings_ptr = s3_settings.get();
     return std::make_unique<ReadBufferFromS3>(
-        clients.get()->client,
+        client.get(),
         bucket,
         object.remote_path,
         version_id,
@@ -247,10 +247,8 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
     if (write_settings.s3_allow_parallel_part_upload)
         scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "VFSWrite");
 
-    auto clients_ = clients.get();
     return std::make_unique<WriteBufferFromS3>(
-        clients_->client,
-        clients_->client_with_long_timeout,
+        client.get(),
         bucket,
         object.remote_path,
         buf_size,
@@ -264,15 +262,12 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
 ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const
 {
     auto settings_ptr = s3_settings.get();
-    auto client_ptr = clients.get()->client;
-
-    return std::make_shared<S3IteratorAsync>(bucket, path_prefix, client_ptr, settings_ptr->list_object_keys_size);
+    return std::make_shared<S3IteratorAsync>(bucket, path_prefix, client.get(), settings_ptr->list_object_keys_size);
 }
 
 void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
 {
     auto settings_ptr = s3_settings.get();
-    auto client_ptr = clients.get()->client;
 
     S3::ListObjectsV2Request request;
     request.SetBucket(bucket);
@@ -287,7 +282,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
     {
         ProfileEvents::increment(ProfileEvents::S3ListObjects);
         ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
-        outcome = client_ptr->ListObjectsV2(request);
+        outcome = client.get()->ListObjectsV2(request);
         throwIfError(outcome);
 
         auto result = outcome.GetResult();
@@ -318,14 +313,12 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
 
 void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exists)
 {
-    auto client_ptr = clients.get()->client;
-
     ProfileEvents::increment(ProfileEvents::S3DeleteObjects);
     ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects);
     S3::DeleteObjectRequest request;
     request.SetBucket(bucket);
     request.SetKey(object.remote_path);
-    auto outcome = client_ptr->DeleteObject(request);
+    auto outcome = client.get()->DeleteObject(request);
 
     throwIfUnexpectedError(outcome, if_exists);
 
@@ -344,7 +337,6 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
     }
     else
     {
-        auto client_ptr = clients.get()->client;
         auto settings_ptr = s3_settings.get();
 
         size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete;
@@ -373,7 +365,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
             S3::DeleteObjectsRequest request;
             request.SetBucket(bucket);
             request.SetDelete(delkeys);
-            auto outcome = client_ptr->DeleteObjects(request);
+            auto outcome = client.get()->DeleteObjects(request);
 
             throwIfUnexpectedError(outcome, if_exists);
 
@@ -405,7 +397,7 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
 std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const
 {
     auto settings_ptr = s3_settings.get();
-    auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false);
+    auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false);
 
     if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty())
         return {};
@@ -421,7 +413,7 @@ std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::s
 ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const
 {
     auto settings_ptr = s3_settings.get();
-    auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true);
+    auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true);
 
     ObjectMetadata result;
     result.size_bytes = object_info.size;
@@ -442,12 +434,12 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
     /// Shortcut for S3
     if (auto * dest_s3 = dynamic_cast<S3ObjectStorage * >(&object_storage_to); dest_s3 != nullptr)
     {
-        auto clients_ = clients.get();
+        auto client_ = client.get();
         auto settings_ptr = s3_settings.get();
-        auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+        auto size = S3::getObjectSize(*client_, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
         auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-        copyS3File(clients_->client,
-            clients_->client_with_long_timeout,
+        copyS3File(
+            client.get(),
             bucket,
             object_from.remote_path,
             0,
@@ -471,12 +463,11 @@ void S3ObjectStorage::copyObject( // NOLINT
     const WriteSettings &,
     std::optional<ObjectAttributes> object_to_attributes)
 {
-    auto clients_ = clients.get();
+    auto client_ = client.get();
     auto settings_ptr = s3_settings.get();
-    auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+    auto size = S3::getObjectSize(*client_, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
     auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-    copyS3File(clients_->client,
-        clients_->client_with_long_timeout,
+    copyS3File(client_,
         bucket,
         object_from.remote_path,
         0,
@@ -497,31 +488,25 @@ void S3ObjectStorage::setNewSettings(std::unique_ptr<S3ObjectStorageSettings> &&
 
 void S3ObjectStorage::shutdown()
 {
-    auto clients_ptr = clients.get();
     /// This call stops any next retry attempts for ongoing S3 requests.
     /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome.
     /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors.
     /// This should significantly speed up shutdown process if S3 is unhealthy.
-    const_cast<S3::Client &>(*clients_ptr->client).DisableRequestProcessing();
-    const_cast<S3::Client &>(*clients_ptr->client_with_long_timeout).DisableRequestProcessing();
+    const_cast<S3::Client &>(*client.get()).DisableRequestProcessing();
 }
 
 void S3ObjectStorage::startup()
 {
-    auto clients_ptr = clients.get();
-
     /// Need to be enabled if it was disabled during shutdown() call.
-    const_cast<S3::Client &>(*clients_ptr->client).EnableRequestProcessing();
-    const_cast<S3::Client &>(*clients_ptr->client_with_long_timeout).EnableRequestProcessing();
+    const_cast<S3::Client &>(*client.get()).EnableRequestProcessing();
 }
 
 void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
 {
     auto new_s3_settings = getSettings(config, config_prefix, context);
     auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
-    auto new_clients = std::make_unique<Clients>(std::move(new_client), *new_s3_settings);
     s3_settings.set(std::move(new_s3_settings));
-    clients.set(std::move(new_clients));
+    client.set(std::move(new_client));
 }
 
 std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
@@ -536,13 +521,6 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
         endpoint, object_key_prefix);
 }
 
-S3ObjectStorage::Clients::Clients(std::shared_ptr<S3::Client> client_, const S3ObjectStorageSettings & settings)
-    : client(std::move(client_))
-    , client_with_long_timeout(client->clone(
-          /*override_use_adaptive_timeouts*/ false,
-          settings.request_settings.long_request_timeout_ms))
-{}
-
 ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string &) const
 {
     /// Path to store the new S3 object.
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 37e491e21dc..7d14482311f 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -39,16 +39,6 @@ struct S3ObjectStorageSettings
 
 class S3ObjectStorage : public IObjectStorage
 {
-public:
-    struct Clients
-    {
-        std::shared_ptr<S3::Client> client;
-        std::shared_ptr<S3::Client> client_with_long_timeout;
-
-        Clients() = default;
-        Clients(std::shared_ptr<S3::Client> client, const S3ObjectStorageSettings & settings);
-    };
-
 private:
     friend class S3PlainObjectStorage;
 
@@ -63,7 +53,7 @@ private:
         String object_key_prefix_)
         : bucket(std::move(bucket_))
         , object_key_prefix(std::move(object_key_prefix_))
-        , clients(std::make_unique<Clients>(std::move(client_), *s3_settings_))
+        , client(std::move(client_))
         , s3_settings(std::move(s3_settings_))
         , s3_capabilities(s3_capabilities_)
         , version_id(std::move(version_id_))
@@ -185,7 +175,7 @@ private:
     String object_key_prefix;
 
 
-    MultiVersion<Clients> clients;
+    MultiVersion<S3::Client> client;
     MultiVersion<S3ObjectStorageSettings> s3_settings;
     S3Capabilities s3_capabilities;
 
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 573fa744ce6..b0384daab2d 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -60,7 +60,7 @@ std::unique_ptr<S3::Client> getClient(
         uri.uri.getScheme());
 
     client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000);
-    client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000);
+    client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000);
     client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
     client_configuration.endpointOverride = uri.endpoint;
     client_configuration.http_keep_alive_timeout_ms = config.getUInt(
diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp
index a9eebb1a755..90406dcf409 100644
--- a/src/IO/ConnectionTimeouts.cpp
+++ b/src/IO/ConnectionTimeouts.cpp
@@ -133,22 +133,51 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
         settings.http_receive_timeout);
 }
 
-ConnectionTimeouts ConnectionTimeouts::aggressiveTimeouts(UInt32 attempt) const
+ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(Aws::Http::HttpMethod method, UInt32 attempt) const
 {
+    constexpr size_t first_method_index = size_t(Aws::Http::HttpMethod::HTTP_GET);
+    constexpr size_t last_method_index = size_t(Aws::Http::HttpMethod::HTTP_PATCH);
+    constexpr size_t methods_count = last_method_index - first_method_index + 1;
+
+    /// HTTP_POST is used for CompleteMultipartUpload requests.
+    /// These requests need longer timeout, especially when minio is used
+    /// The same assumption are made for HTTP_DELETE, HTTP_PATCH
+    /// That requests are more heavy that HTTP_GET, HTTP_HEAD, HTTP_PUT
+
+    static const UInt32 first_attempt_send_receive_timeouts_ms[methods_count][2] = {
+        /*HTTP_GET*/    {200,   200},
+        /*HTTP_POST*/   {200, 30000},
+        /*HTTP_DELETE*/ {200,  1000},
+        /*HTTP_PUT*/    {200,   200},
+        /*HTTP_HEAD*/   {200,   200},
+        /*HTTP_PATCH*/  {200,  1000},
+    };
+
+    static const UInt32 second_attempt_send_receive_timeouts_ms[methods_count][2] = {
+        /*HTTP_GET*/    {1000,  1000},
+        /*HTTP_POST*/   {1000, 30000},
+        /*HTTP_DELETE*/ {1000, 10000},
+        /*HTTP_PUT*/    {1000,  1000},
+        /*HTTP_HEAD*/   {1000,  1000},
+        /*HTTP_PATCH*/  {1000, 10000},
+    };
+
+    static_assert(methods_count == 6);
+    static_assert(sizeof(first_attempt_send_receive_timeouts_ms) == sizeof(second_attempt_send_receive_timeouts_ms));
+    static_assert(sizeof(first_attempt_send_receive_timeouts_ms) == methods_count * sizeof(UInt32) * 2);
+
     auto aggressive = *this;
 
+    if (attempt > 2)
+        return aggressive;
+
+    auto timeout_map = first_attempt_send_receive_timeouts_ms;
     if (attempt == 2)
-    {
-        auto one_second = Poco::Timespan(1, 0);
-        aggressive.send_timeout = saturate(one_second, send_timeout);
-        aggressive.receive_timeout = saturate(one_second, receive_timeout);
-    }
-    else if (attempt == 1)
-    {
-        auto two_hundred_ms = Poco::Timespan(0, 200 * 1000);
-        aggressive.send_timeout = saturate(two_hundred_ms, send_timeout);
-        aggressive.receive_timeout = saturate(two_hundred_ms, receive_timeout);
-    }
+        timeout_map = second_attempt_send_receive_timeouts_ms;
+
+    const size_t method_index = size_t(method) - first_method_index;
+    aggressive.send_timeout = saturate(Poco::Timespan(timeout_map[method_index][0]), send_timeout);
+    aggressive.receive_timeout = saturate(Poco::Timespan(timeout_map[method_index][1]), receive_timeout);
 
     return aggressive;
 }
diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h
index 17ee1907d89..0ef133c8378 100644
--- a/src/IO/ConnectionTimeouts.h
+++ b/src/IO/ConnectionTimeouts.h
@@ -4,6 +4,7 @@
 #include <Interpreters/Context_fwd.h>
 
 #include <Poco/Timespan.h>
+#include <aws/core/http/HttpTypes.h>
 
 namespace DB
 {
@@ -68,7 +69,7 @@ struct ConnectionTimeouts
     static ConnectionTimeouts getTCPTimeoutsWithFailover(const Settings & settings);
     static ConnectionTimeouts getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout);
 
-    ConnectionTimeouts aggressiveTimeouts(UInt32 attempt) const;
+    ConnectionTimeouts getAdaptiveTimeouts(Aws::Http::HttpMethod method, UInt32 attempt) const;
 };
 
 }
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 90806852c1e..4630e68fbb6 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -118,19 +118,9 @@ std::unique_ptr<Client> Client::create(
         new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, use_virtual_addressing));
 }
 
-std::unique_ptr<Client> Client::clone(
-    std::optional<bool> override_use_adaptive_timeouts,
-    std::optional<Int64> override_request_timeout_ms) const
+std::unique_ptr<Client> Client::clone() const
 {
-    PocoHTTPClientConfiguration new_configuration = client_configuration;
-
-    if (override_request_timeout_ms.has_value())
-        new_configuration.requestTimeoutMs = *override_request_timeout_ms;
-
-    if (override_use_adaptive_timeouts.has_value())
-        new_configuration.s3_use_adaptive_timeouts = *override_use_adaptive_timeouts;
-
-    return std::unique_ptr<Client>(new Client(*this, new_configuration));
+    return std::unique_ptr<Client>(new Client(*this, client_configuration));
 }
 
 namespace
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index be7235eb9f1..5ad57a9d827 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -118,13 +118,7 @@ public:
             Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
             bool use_virtual_addressing);
 
-    /// Create a client with adjusted settings:
-    ///  * override_request_timeout_ms is used to increase timeout for CompleteMultipartUploadRequest
-    ///    because it often sits idle for 10 seconds: https://github.com/ClickHouse/ClickHouse/pull/42321
-    ///  * s3_use_adaptive_timeouts is used to turn off s3_use_adaptive_timeouts feature for CompleteMultipartUploadRequest
-    std::unique_ptr<Client> clone(
-        std::optional<bool> override_use_adaptive_timeouts = std::nullopt,
-        std::optional<Int64> override_request_timeout_ms = std::nullopt) const;
+    std::unique_ptr<Client> clone() const;
 
     Client & operator=(const Client &) = delete;
 
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index f783a886877..b26c36f8029 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -300,7 +300,7 @@ ConnectionTimeouts PocoHTTPClient::getTimeouts(Aws::Http::HttpRequest & request)
 
     const auto & request_info = request.GetHeaderValue(Aws::Http::SDK_REQUEST_HEADER);
     auto attempt = extractAttempt(request_info);
-    return timeouts.aggressiveTimeouts(attempt);
+    return timeouts.getAdaptiveTimeouts(request.GetMethod(), attempt);
 }
 
 void PocoHTTPClient::makeRequestInternal(
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index a16a1a41505..30da1c580c1 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -53,7 +53,6 @@ namespace
     public:
         UploadHelper(
             const std::shared_ptr<const S3::Client> & client_ptr_,
-            const std::shared_ptr<const S3::Client> & client_with_long_timeout_ptr_,
             const String & dest_bucket_,
             const String & dest_key_,
             const S3Settings::RequestSettings & request_settings_,
@@ -62,7 +61,6 @@ namespace
             bool for_disk_s3_,
             const Poco::Logger * log_)
             : client_ptr(client_ptr_)
-            , client_with_long_timeout_ptr(client_with_long_timeout_ptr_)
             , dest_bucket(dest_bucket_)
             , dest_key(dest_key_)
             , request_settings(request_settings_)
@@ -78,7 +76,6 @@ namespace
 
     protected:
         std::shared_ptr<const S3::Client> client_ptr;
-        std::shared_ptr<const S3::Client> client_with_long_timeout_ptr;
         const String & dest_bucket;
         const String & dest_key;
         const S3Settings::RequestSettings & request_settings;
@@ -179,7 +176,7 @@ namespace
                 if (for_disk_s3)
                     ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);
 
-                auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(request);
+                auto outcome = client_ptr->CompleteMultipartUpload(request);
 
                 if (outcome.IsSuccess())
                 {
@@ -433,14 +430,13 @@ namespace
             size_t offset_,
             size_t size_,
             const std::shared_ptr<const S3::Client> & client_ptr_,
-            const std::shared_ptr<const S3::Client> & client_with_long_timeout_ptr_,
             const String & dest_bucket_,
             const String & dest_key_,
             const S3Settings::RequestSettings & request_settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_s3_)
-            : UploadHelper(client_ptr_, client_with_long_timeout_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File"))
+            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File"))
             , create_read_buffer(create_read_buffer_)
             , offset(offset_)
             , size(size_)
@@ -602,7 +598,6 @@ namespace
     public:
         CopyFileHelper(
             const std::shared_ptr<const S3::Client> & client_ptr_,
-            const std::shared_ptr<const S3::Client> & client_with_long_timeout_ptr_,
             const String & src_bucket_,
             const String & src_key_,
             size_t src_offset_,
@@ -614,7 +609,7 @@ namespace
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_s3_)
-            : UploadHelper(client_ptr_, client_with_long_timeout_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File"))
+            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File"))
             , src_bucket(src_bucket_)
             , src_key(src_key_)
             , offset(src_offset_)
@@ -677,7 +672,7 @@ namespace
             /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
             request.SetContentType("binary/octet-stream");
 
-            client_with_long_timeout_ptr->setKMSHeaders(request);
+            client_ptr->setKMSHeaders(request);
         }
 
         void processCopyRequest(const S3::CopyObjectRequest & request)
@@ -689,7 +684,7 @@ namespace
                 if (for_disk_s3)
                     ProfileEvents::increment(ProfileEvents::DiskS3CopyObject);
 
-                auto outcome = client_with_long_timeout_ptr->CopyObject(request);
+                auto outcome = client_ptr->CopyObject(request);
                 if (outcome.IsSuccess())
                 {
                     LOG_TRACE(
@@ -714,7 +709,6 @@ namespace
                             offset,
                             size,
                             client_ptr,
-                            client_with_long_timeout_ptr,
                             dest_bucket,
                             dest_key,
                             request_settings,
@@ -788,7 +782,7 @@ namespace
             if (for_disk_s3)
                 ProfileEvents::increment(ProfileEvents::DiskS3UploadPartCopy);
 
-            auto outcome = client_with_long_timeout_ptr->UploadPartCopy(req);
+            auto outcome = client_ptr->UploadPartCopy(req);
             if (!outcome.IsSuccess())
             {
                 abortMultipartUpload();
@@ -806,7 +800,6 @@ void copyDataToS3File(
     size_t offset,
     size_t size,
     const std::shared_ptr<const S3::Client> & dest_s3_client,
-    const std::shared_ptr<const S3::Client> & dest_s3_client_with_long_timeout,
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
@@ -814,14 +807,13 @@ void copyDataToS3File(
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_s3)
 {
-    CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_s3_client_with_long_timeout, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
+    CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
     helper.performCopy();
 }
 
 
 void copyS3File(
     const std::shared_ptr<const S3::Client> & s3_client,
-    const std::shared_ptr<const S3::Client> & s3_client_with_long_timeout,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
@@ -836,7 +828,7 @@ void copyS3File(
 {
     if (settings.allow_native_copy)
     {
-        CopyFileHelper helper{s3_client, s3_client_with_long_timeout, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3};
+        CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3};
         helper.performCopy();
     }
     else
@@ -845,7 +837,7 @@ void copyS3File(
         {
             return std::make_unique<ReadBufferFromS3>(s3_client, src_bucket, src_key, "", settings, read_settings);
         };
-        copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, s3_client_with_long_timeout, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3);
+        copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3);
     }
 }
 
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index 1bcbfd7735e..33e22fdfba2 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -27,15 +27,9 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 /// because it is a known issue, it is fallbacks to read-write copy
 /// (copyDataToS3File()).
 ///
-/// s3_client_with_long_timeout (may be equal to s3_client) is used for native copy and
-/// CompleteMultipartUpload requests. These requests need longer timeout because S3 servers often
-/// block on them for multiple seconds without sending or receiving data from us (maybe the servers
-/// are copying data internally, or maybe throttling, idk).
-///
 /// read_settings - is used for throttling in case of native copy is not possible
 void copyS3File(
     const std::shared_ptr<const S3::Client> & s3_client,
-    const std::shared_ptr<const S3::Client> & s3_client_with_long_timeout,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
@@ -58,7 +52,6 @@ void copyDataToS3File(
     size_t offset,
     size_t size,
     const std::shared_ptr<const S3::Client> & dest_s3_client,
-    const std::shared_ptr<const S3::Client> & dest_s3_client_with_long_timeout,
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp
index c42f14e9a53..d4b9a017398 100644
--- a/src/IO/S3/tests/gtest_aws_s3_client.cpp
+++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp
@@ -91,7 +91,6 @@ void doWriteRequest(std::shared_ptr<const DB::S3::Client> client, const DB::S3::
     DB::S3Settings::RequestSettings request_settings;
     request_settings.max_unexpected_write_error_retries = max_unexpected_write_error_retries;
     DB::WriteBufferFromS3 write_buffer(
-        client,
         client,
         uri.bucket,
         uri.key,
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index e1b9c17efe9..62d0c80f1f2 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -77,7 +77,6 @@ struct WriteBufferFromS3::PartData
 
 WriteBufferFromS3::WriteBufferFromS3(
     std::shared_ptr<const S3::Client> client_ptr_,
-    std::shared_ptr<const S3::Client> client_with_long_timeout_ptr_,
     const String & bucket_,
     const String & key_,
     size_t buf_size_,
@@ -92,7 +91,6 @@ WriteBufferFromS3::WriteBufferFromS3(
     , upload_settings(request_settings.getUploadSettings())
     , write_settings(write_settings_)
     , client_ptr(std::move(client_ptr_))
-    , client_with_long_timeout_ptr(std::move(client_with_long_timeout_ptr_))
     , object_metadata(std::move(object_metadata_))
     , buffer_allocation_policy(ChooseBufferPolicy(upload_settings))
     , task_tracker(
@@ -566,7 +564,7 @@ void WriteBufferFromS3::completeMultipartUpload()
             ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);
 
         Stopwatch watch;
-        auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(req);
+        auto outcome = client_ptr->CompleteMultipartUpload(req);
         watch.stop();
 
         ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index 95148c49779..590342cc997 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -30,8 +30,6 @@ class WriteBufferFromS3 final : public WriteBufferFromFileBase
 public:
     WriteBufferFromS3(
         std::shared_ptr<const S3::Client> client_ptr_,
-        /// for CompleteMultipartUploadRequest, because it blocks on recv() for a few seconds on big uploads
-        std::shared_ptr<const S3::Client> client_with_long_timeout_ptr_,
         const String & bucket_,
         const String & key_,
         size_t buf_size_,
@@ -90,7 +88,6 @@ private:
     const S3Settings::RequestSettings::PartUploadSettings & upload_settings;
     const WriteSettings write_settings;
     const std::shared_ptr<const S3::Client> client_ptr;
-    const std::shared_ptr<const S3::Client> client_with_long_timeout_ptr;
     const std::optional<std::map<String, String>> object_metadata;
     Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3");
     LogSeriesLimiterPtr limitedLog = std::make_shared<LogSeriesLimiter>(log, 1, 5);
diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp
index 21bdd9a6f26..c82f97f8b20 100644
--- a/src/IO/tests/gtest_writebuffer_s3.cpp
+++ b/src/IO/tests/gtest_writebuffer_s3.cpp
@@ -549,7 +549,6 @@ public:
         getAsyncPolicy().setAutoExecute(false);
 
         return std::make_unique<WriteBufferFromS3>(
-                    client,
                     client,
                     bucket,
                     file_name,
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 231efb87e87..b0cd40a2e05 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -824,7 +824,6 @@ public:
         write_buf = wrapWriteBufferWithCompressionMethod(
             std::make_unique<WriteBufferFromS3>(
                 configuration_.client,
-                configuration_.client_with_long_timeout,
                 bucket,
                 key,
                 DBMS_DEFAULT_BUFFER_SIZE,
@@ -1329,8 +1328,6 @@ void StorageS3::Configuration::connect(ContextPtr context)
                 context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
             auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
         });
-
-    client_with_long_timeout = client->clone(/*override_use_adaptive_timeouts*/ false, request_settings.long_request_timeout_ms);
 }
 
 void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection)
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 3330ac6c210..3f35c578e19 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -311,7 +311,6 @@ public:
         HTTPHeaderEntries headers_from_ast;
 
         std::shared_ptr<const S3::Client> client;
-        std::shared_ptr<const S3::Client> client_with_long_timeout;
         std::vector<String> keys;
     };
 
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index e3d577ca0b3..728972c948c 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -69,8 +69,7 @@ struct S3Settings
         ThrottlerPtr get_request_throttler;
         ThrottlerPtr put_request_throttler;
         size_t retry_attempts = 10;
-        size_t request_timeout_ms = 3000;
-        size_t long_request_timeout_ms = 30000; // TODO: Take this from config like request_timeout_ms
+        size_t request_timeout_ms = 30000;
         bool allow_native_copy = true;
 
         bool throw_on_zero_files_match = false;

From 3075bd97450d42f00320b18c1b177fd700a19bec Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 10 Nov 2023 15:15:24 +0100
Subject: [PATCH 0509/1097] track clickhouse high level retries

---
 base/poco/Net/src/HTTPSession.cpp             |   4 +-
 src/IO/ConnectionTimeouts.cpp                 | 101 ++++++++++++------
 src/IO/ConnectionTimeouts.h                   |   3 +-
 src/IO/HTTPCommon.cpp                         |  12 +--
 src/IO/HTTPCommon.h                           |   2 +
 src/IO/ReadBufferFromS3.cpp                   |  24 +++--
 src/IO/ReadBufferFromS3.h                     |   4 +-
 src/IO/S3/PocoHTTPClient.cpp                  |  89 ++++++++-------
 src/IO/S3/PocoHTTPClient.h                    |   2 +-
 src/IO/S3/tests/gtest_aws_s3_client.cpp       |   2 +
 .../configs/inf_s3_retries.xml                |   2 +-
 .../configs/s3_retries.xml                    |   2 +-
 .../configs/storage_conf.xml                  |   1 +
 .../test_checking_s3_blobs_paranoid/test.py   |  16 +--
 .../test_storage_s3/configs/s3_retry.xml      |   1 +
 .../s3_mocks/unstable_server.py               |  17 ++-
 tests/integration/test_storage_s3/test.py     |   9 ++
 17 files changed, 181 insertions(+), 110 deletions(-)

diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp
index d30f5590280..9ebbd7d04cd 100644
--- a/base/poco/Net/src/HTTPSession.cpp
+++ b/base/poco/Net/src/HTTPSession.cpp
@@ -95,7 +95,7 @@ void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco
 {
 	 _connectionTimeout = connectionTimeout;
 
-     if (_sendTimeout != sendTimeout)
+     if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds())
      {
          _sendTimeout = sendTimeout;
 
@@ -103,7 +103,7 @@ void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco
              _socket.setSendTimeout(_sendTimeout);
      }
 
-     if (_receiveTimeout != receiveTimeout)
+     if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds())
      {
          _receiveTimeout = receiveTimeout;
 
diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp
index 90406dcf409..970afc75ec3 100644
--- a/src/IO/ConnectionTimeouts.cpp
+++ b/src/IO/ConnectionTimeouts.cpp
@@ -133,51 +133,84 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
         settings.http_receive_timeout);
 }
 
-ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(Aws::Http::HttpMethod method, UInt32 attempt) const
+class SendReceiveTimeoutsForFirstAttempt
 {
-    constexpr size_t first_method_index = size_t(Aws::Http::HttpMethod::HTTP_GET);
-    constexpr size_t last_method_index = size_t(Aws::Http::HttpMethod::HTTP_PATCH);
-    constexpr size_t methods_count = last_method_index - first_method_index + 1;
+private:
+    static constexpr size_t known_methods_count = 6;
+    using KnownMethodsArray = std::array<String, known_methods_count>;
+    static const KnownMethodsArray known_methods;
 
-    /// HTTP_POST is used for CompleteMultipartUpload requests.
-    /// These requests need longer timeout, especially when minio is used
+    /// HTTP_POST is used for CompleteMultipartUpload requests. Its latency could be high.
+    /// These requests need longer timeout, especially when minio is used.
     /// The same assumption are made for HTTP_DELETE, HTTP_PATCH
     /// That requests are more heavy that HTTP_GET, HTTP_HEAD, HTTP_PUT
 
-    static const UInt32 first_attempt_send_receive_timeouts_ms[methods_count][2] = {
-        /*HTTP_GET*/    {200,   200},
-        /*HTTP_POST*/   {200, 30000},
-        /*HTTP_DELETE*/ {200,  1000},
-        /*HTTP_PUT*/    {200,   200},
-        /*HTTP_HEAD*/   {200,   200},
-        /*HTTP_PATCH*/  {200,  1000},
+    static constexpr Poco::Timestamp::TimeDiff first_byte_ms[known_methods_count][2] =
+    {
+        /* GET */ {200, 200},
+        /* POST */ {200, 200},
+        /* DELETE */ {200, 200},
+        /* PUT */ {200, 200},
+        /* HEAD */ {200, 200},
+        /* PATCH */ {200, 200},
     };
 
-    static const UInt32 second_attempt_send_receive_timeouts_ms[methods_count][2] = {
-        /*HTTP_GET*/    {1000,  1000},
-        /*HTTP_POST*/   {1000, 30000},
-        /*HTTP_DELETE*/ {1000, 10000},
-        /*HTTP_PUT*/    {1000,  1000},
-        /*HTTP_HEAD*/   {1000,  1000},
-        /*HTTP_PATCH*/  {1000, 10000},
+    static constexpr Poco::Timestamp::TimeDiff rest_bytes_ms[known_methods_count][2] =
+    {
+        /* GET */ {500, 500},
+        /* POST */ {1000, 30000},
+        /* DELETE */ {1000, 10000},
+        /* PUT */ {1000, 3000},
+        /* HEAD */ {500, 500},
+        /* PATCH */ {1000, 10000},
     };
 
-    static_assert(methods_count == 6);
-    static_assert(sizeof(first_attempt_send_receive_timeouts_ms) == sizeof(second_attempt_send_receive_timeouts_ms));
-    static_assert(sizeof(first_attempt_send_receive_timeouts_ms) == methods_count * sizeof(UInt32) * 2);
+    static_assert(sizeof(first_byte_ms) == sizeof(rest_bytes_ms));
+    static_assert(sizeof(first_byte_ms) == known_methods_count * sizeof(Poco::Timestamp::TimeDiff) * 2);
+
+    static size_t getMethodIndex(const String & method)
+    {
+        KnownMethodsArray::const_iterator it = std::find(known_methods.begin(), known_methods.end(), method);
+        chassert(it != known_methods.end());
+        if (it == known_methods.end())
+            return 0;
+        return std::distance(known_methods.begin(), it);
+    }
+
+public:
+    static std::pair<Poco::Timespan, Poco::Timespan> getSendReceiveTimeout(const String & method, bool first_byte)
+    {
+        auto idx = getMethodIndex(method);
+
+        if (first_byte)
+            return std::make_pair(
+                Poco::Timespan(first_byte_ms[idx][0] * 1000),
+                Poco::Timespan(first_byte_ms[idx][1] * 1000)
+            );
+
+        return std::make_pair(
+            Poco::Timespan(rest_bytes_ms[idx][0] * 1000),
+            Poco::Timespan(rest_bytes_ms[idx][1] * 1000)
+        );
+    }
+};
+
+const SendReceiveTimeoutsForFirstAttempt::KnownMethodsArray SendReceiveTimeoutsForFirstAttempt::known_methods =
+{
+        "GET", "POST", "DELETE", "PUT", "HEAD", "PATCH"
+};
+
+
+ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const
+{
+    if (!first_attempt)
+        return *this;
+
+    auto [send, recv] = SendReceiveTimeoutsForFirstAttempt::getSendReceiveTimeout(method, first_byte);
 
     auto aggressive = *this;
-
-    if (attempt > 2)
-        return aggressive;
-
-    auto timeout_map = first_attempt_send_receive_timeouts_ms;
-    if (attempt == 2)
-        timeout_map = second_attempt_send_receive_timeouts_ms;
-
-    const size_t method_index = size_t(method) - first_method_index;
-    aggressive.send_timeout = saturate(Poco::Timespan(timeout_map[method_index][0]), send_timeout);
-    aggressive.receive_timeout = saturate(Poco::Timespan(timeout_map[method_index][1]), receive_timeout);
+    aggressive.send_timeout = saturate(send, send_timeout);
+    aggressive.receive_timeout = saturate(recv, receive_timeout);
 
     return aggressive;
 }
diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h
index 0ef133c8378..aabebdb836d 100644
--- a/src/IO/ConnectionTimeouts.h
+++ b/src/IO/ConnectionTimeouts.h
@@ -4,7 +4,6 @@
 #include <Interpreters/Context_fwd.h>
 
 #include <Poco/Timespan.h>
-#include <aws/core/http/HttpTypes.h>
 
 namespace DB
 {
@@ -69,7 +68,7 @@ struct ConnectionTimeouts
     static ConnectionTimeouts getTCPTimeoutsWithFailover(const Settings & settings);
     static ConnectionTimeouts getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout);
 
-    ConnectionTimeouts getAdaptiveTimeouts(Aws::Http::HttpMethod method, UInt32 attempt) const;
+    ConnectionTimeouts getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const;
 };
 
 }
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index 65ffa51a466..cce394c67c9 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -50,12 +50,6 @@ namespace ErrorCodes
 
 namespace
 {
-    void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts)
-    {
-        session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout);
-        session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout);
-    }
-
     Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const ProxyConfiguration & proxy_configuration)
     {
         Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config;
@@ -359,6 +353,12 @@ namespace
     };
 }
 
+void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts)
+{
+    session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout);
+    session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout);
+}
+
 void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout)
 {
     if (!response.getKeepAlive())
diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h
index de62b5d5c16..c9968fc6915 100644
--- a/src/IO/HTTPCommon.h
+++ b/src/IO/HTTPCommon.h
@@ -113,4 +113,6 @@ std::istream * receiveResponse(
 
 void assertResponseIsOk(
     const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false);
+
+void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts);
 }
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index f19978ccb47..c9c9319c44c 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -167,9 +167,9 @@ bool ReadBufferFromS3::nextImpl()
     }
 
     size_t sleep_time_with_backoff_milliseconds = 100;
-    for (size_t attempt = 0; !next_result; ++attempt)
+    for (size_t attempt = 1; !next_result; ++attempt)
     {
-        bool last_attempt = attempt + 1 >= request_settings.max_single_read_retries;
+        bool last_attempt = attempt >= request_settings.max_single_read_retries;
 
         ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromS3Microseconds);
 
@@ -177,7 +177,7 @@ bool ReadBufferFromS3::nextImpl()
         {
             if (!impl)
             {
-                impl = initialize();
+                impl = initialize(attempt);
 
                 if (use_external_buffer)
                 {
@@ -232,9 +232,9 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
 {
     size_t initial_n = n;
     size_t sleep_time_with_backoff_milliseconds = 100;
-    for (size_t attempt = 0; n > 0; ++attempt)
+    for (size_t attempt = 1; n > 0; ++attempt)
     {
-        bool last_attempt = attempt + 1 >= request_settings.max_single_read_retries;
+        bool last_attempt = attempt >= request_settings.max_single_read_retries;
         size_t bytes_copied = 0;
 
         ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromS3Microseconds);
@@ -266,7 +266,7 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
 
         try
         {
-            result = sendRequest(range_begin, range_begin + n - 1);
+            result = sendRequest(attempt, range_begin, range_begin + n - 1);
             std::istream & istr = result->GetBody();
 
             copyFromIStreamWithProgressCallback(istr, to, n, progress_callback, &bytes_copied);
@@ -304,8 +304,8 @@ bool ReadBufferFromS3::processException(Poco::Exception & e, size_t read_offset,
     LOG_DEBUG(
         log,
         "Caught exception while reading S3 object. Bucket: {}, Key: {}, Version: {}, Offset: {}, "
-        "Attempt: {}, Message: {}",
-        bucket, key, version_id.empty() ? "Latest" : version_id, read_offset, attempt, e.message());
+        "Attempt: {}/{}, Message: {}",
+        bucket, key, version_id.empty() ? "Latest" : version_id, read_offset, attempt, request_settings.max_single_read_retries, e.message());
 
 
     if (auto * s3_exception = dynamic_cast<S3Exception *>(&e))
@@ -463,7 +463,7 @@ ReadBufferFromS3::~ReadBufferFromS3()
     }
 }
 
-std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
+std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize(size_t attempt)
 {
     resetSessionIfNeeded(readAllRangeSuccessfully(), read_result);
     read_all_range_successfully = false;
@@ -475,13 +475,13 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
     if (read_until_position && offset >= read_until_position)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1);
 
-    read_result = sendRequest(offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt);
+    read_result = sendRequest(attempt, offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt);
 
     size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size;
     return std::make_unique<ReadBufferFromIStream>(read_result->GetBody(), buffer_size);
 }
 
-Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin, std::optional<size_t> range_end_incl) const
+Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, size_t range_begin, std::optional<size_t> range_end_incl) const
 {
     S3::GetObjectRequest req;
     req.SetBucket(bucket);
@@ -489,6 +489,8 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin
     if (!version_id.empty())
         req.SetVersionId(version_id);
 
+    req.SetAdditionalCustomHeaderValue("clickhouse-request", fmt::format("attempt={}", attempt));
+
     if (range_end_incl)
     {
         req.SetRange(fmt::format("bytes={}-{}", range_begin, *range_end_incl));
diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h
index 0835e52a5b2..101e25f8b43 100644
--- a/src/IO/ReadBufferFromS3.h
+++ b/src/IO/ReadBufferFromS3.h
@@ -79,7 +79,7 @@ public:
     bool supportsReadAt() override { return true; }
 
 private:
-    std::unique_ptr<ReadBuffer> initialize();
+    std::unique_ptr<ReadBuffer> initialize(size_t attempt);
 
     /// If true, if we destroy impl now, no work was wasted. Just for metrics.
     bool atEndOfRequestedRangeGuess();
@@ -88,7 +88,7 @@ private:
     /// Returns true if the error looks retriable.
     bool processException(Poco::Exception & e, size_t read_offset, size_t attempt) const;
 
-    Aws::S3::Model::GetObjectResult sendRequest(size_t range_begin, std::optional<size_t> range_end_incl) const;
+    Aws::S3::Model::GetObjectResult sendRequest(size_t attempt, size_t range_begin, std::optional<size_t> range_end_incl) const;
 
     bool readAllRangeSuccessfully() const;
 
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index b26c36f8029..904e2324145 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -272,35 +272,36 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT
         ProfileEvents::increment(disk_s3_events_map[static_cast<unsigned int>(type)][static_cast<unsigned int>(kind)], amount);
 }
 
-UInt32 extractAttempt(const Aws::String & request_info)
+String extractAttemptFromInfo(const Aws::String & request_info)
 {
     static auto key = Aws::String("attempt=");
 
     auto key_begin = request_info.find(key, 0);
     if (key_begin == Aws::String::npos)
-        return 1;
+        return "1";
 
     auto val_begin = key_begin + key.size();
     auto val_end = request_info.find(';', val_begin);
     if (val_end == Aws::String::npos)
         val_end = request_info.size();
 
-    Aws::String value = request_info.substr(val_begin, val_end-val_begin);
-
-    UInt32 attempt = 1;
-    ReadBufferFromString buf(value);
-    readIntText(attempt, buf);
-    return attempt;
+    return request_info.substr(val_begin, val_end-val_begin);
 }
 
-ConnectionTimeouts PocoHTTPClient::getTimeouts(Aws::Http::HttpRequest & request) const
+String getOrEmpty(const Aws::Http::HeaderValueCollection & map, const String & key)
+{
+    auto it = map.find(key);
+    if (it == map.end())
+        return {};
+    return it->second;
+}
+
+ConnectionTimeouts PocoHTTPClient::getTimeouts(const String & method, bool first_attempt, bool first_byte) const
 {
     if (!s3_use_adaptive_timeouts)
         return timeouts;
 
-    const auto & request_info = request.GetHeaderValue(Aws::Http::SDK_REQUEST_HEADER);
-    auto attempt = extractAttempt(request_info);
-    return timeouts.getAdaptiveTimeouts(request.GetMethod(), attempt);
+    return timeouts.getAdaptiveTimeouts(method, first_attempt, first_byte);
 }
 
 void PocoHTTPClient::makeRequestInternal(
@@ -317,6 +318,25 @@ void PocoHTTPClient::makeRequestInternal(
         makeRequestInternalImpl<false>(request, request_configuration, response, readLimiter, writeLimiter);
 }
 
+String getMethod(const Aws::Http::HttpRequest & request)
+{
+    switch (request.GetMethod())
+    {
+        case Aws::Http::HttpMethod::HTTP_GET:
+            return Poco::Net::HTTPRequest::HTTP_GET;
+        case Aws::Http::HttpMethod::HTTP_POST:
+            return Poco::Net::HTTPRequest::HTTP_POST;
+        case Aws::Http::HttpMethod::HTTP_DELETE:
+            return Poco::Net::HTTPRequest::HTTP_DELETE;
+        case Aws::Http::HttpMethod::HTTP_PUT:
+            return Poco::Net::HTTPRequest::HTTP_PUT;
+        case Aws::Http::HttpMethod::HTTP_HEAD:
+            return Poco::Net::HTTPRequest::HTTP_HEAD;
+        case Aws::Http::HttpMethod::HTTP_PATCH:
+            return Poco::Net::HTTPRequest::HTTP_PATCH;
+    }
+}
+
 template <bool pooled>
 void PocoHTTPClient::makeRequestInternalImpl(
     Aws::Http::HttpRequest & request,
@@ -330,9 +350,14 @@ void PocoHTTPClient::makeRequestInternalImpl(
     Poco::Logger * log = &Poco::Logger::get("AWSClient");
 
     auto uri = request.GetUri().GetURIString();
+    auto method = getMethod(request);
+
+    auto sdk_attempt = extractAttemptFromInfo(getOrEmpty(request.GetHeaders(), Aws::Http::SDK_REQUEST_HEADER));
+    auto ch_attempt = extractAttemptFromInfo(getOrEmpty(request.GetHeaders(), "clickhouse-request"));
+    bool first_attempt = ch_attempt == "1" && sdk_attempt == "1";
 
     if (enable_s3_requests_logging)
-        LOG_TEST(log, "Make request to: {}", uri);
+        LOG_TEST(log, "Make request to: {}, aws sdk attempt: {}, clickhouse attempt: {}", uri, sdk_attempt, ch_attempt);
 
     switch (request.GetMethod())
     {
@@ -383,17 +408,17 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 /// This can lead to request signature difference on S3 side.
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri, getTimeouts(request), http_connection_pool_size, wait_on_pool_size_limit, proxy_configuration);
+                        target_uri, getTimeouts(method, first_attempt), http_connection_pool_size, wait_on_pool_size_limit, proxy_configuration);
                 else
-                    session = makeHTTPSession(target_uri, getTimeouts(request), proxy_configuration);
+                    session = makeHTTPSession(target_uri, getTimeouts(method, first_attempt), proxy_configuration);
             }
             else
             {
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri, getTimeouts(request), http_connection_pool_size, wait_on_pool_size_limit);
+                        target_uri, getTimeouts(method, first_attempt), http_connection_pool_size, wait_on_pool_size_limit);
                 else
-                    session = makeHTTPSession(target_uri, getTimeouts(request));
+                    session = makeHTTPSession(target_uri, getTimeouts(method, first_attempt));
             }
 
             /// In case of error this address will be written to logs
@@ -427,28 +452,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 path_and_query = "/";
 
             poco_request.setURI(path_and_query);
-
-            switch (request.GetMethod())
-            {
-                case Aws::Http::HttpMethod::HTTP_GET:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_GET);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_POST:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_POST);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_DELETE:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_DELETE);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_PUT:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_PUT);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_HEAD:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_HEAD);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_PATCH:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_PATCH);
-                    break;
-            }
+            poco_request.setMethod(method);
 
             /// Headers coming from SDK are lower-cased.
             for (const auto & [header_name, header_value] : request.GetHeaders())
@@ -473,6 +477,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 request.GetContentBody()->clear();
                 request.GetContentBody()->seekg(0);
 
+                setTimeouts(*session, getTimeouts(method, first_attempt, /*first_byte*/ false));
                 auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream);
                 if (enable_s3_requests_logging)
                     LOG_TEST(log, "Written {} bytes to request body", size);
@@ -482,6 +487,8 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 LOG_TEST(log, "Receiving response...");
             auto & response_body_stream = session->receiveResponse(poco_response);
 
+            setTimeouts(*session, getTimeouts(method, first_attempt, /*first_byte*/ false));
+
             watch.stop();
             addMetric(request, S3MetricType::Microseconds, watch.elapsedMicroseconds());
 
@@ -533,6 +540,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
             /// Request is successful but for some special requests we can have actual error message in body
             if (status_code >= SUCCESS_RESPONSE_MIN && status_code <= SUCCESS_RESPONSE_MAX && checkRequestCanReturn2xxAndErrorInBody(request))
             {
+                /// reading the full response
                 std::string response_string((std::istreambuf_iterator<char>(response_body_stream)),
                                std::istreambuf_iterator<char>());
 
@@ -547,7 +555,6 @@ void PocoHTTPClient::makeRequestInternalImpl(
                     addMetric(request, S3MetricType::Errors);
                     if (error_report)
                         error_report(proxy_configuration);
-
                 }
 
                 /// Set response from string
@@ -566,6 +573,8 @@ void PocoHTTPClient::makeRequestInternalImpl(
                     if (status_code >= 500 && error_report)
                         error_report(proxy_configuration);
                 }
+
+                /// expose stream, after that client reads data from that stream without built-in retries
                 response->SetResponseBody(response_body_stream, session);
             }
 
diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 9ba5f4ffe64..14c4fec5dd7 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -171,7 +171,7 @@ private:
         Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
         Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
 
-    ConnectionTimeouts getTimeouts(Aws::Http::HttpRequest & request) const;
+    ConnectionTimeouts getTimeouts(const String & method, bool first_attempt, bool first_byte = true) const;
 
 protected:
     static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request);
diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp
index d4b9a017398..bff9ca6fa7b 100644
--- a/src/IO/S3/tests/gtest_aws_s3_client.cpp
+++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp
@@ -170,6 +170,7 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeadersRead)
         "authorization: ... SignedHeaders="
         "amz-sdk-invocation-id;"
         "amz-sdk-request;"
+        "clickhouse-request;"
         "content-type;"
         "host;"
         "x-amz-api-version;"
@@ -215,6 +216,7 @@ TEST(IOTestAwsS3Client, AppendExtraSSEKMSHeadersRead)
         "authorization: ... SignedHeaders="
         "amz-sdk-invocation-id;"
         "amz-sdk-request;"
+        "clickhouse-request;"
         "content-type;"
         "host;"
         "x-amz-api-version;"
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
index 5f0860ac120..4210c13b727 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
@@ -4,7 +4,7 @@
     <profiles>
         <default>
             <s3_retry_attempts>1000000</s3_retry_attempts>
-            <s3_aggressive_timeouts>1</s3_aggressive_timeouts>
+            <s3_use_adaptive_timeouts>1</s3_use_adaptive_timeouts>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
index f215a89f613..95a313ea4f2 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
@@ -4,7 +4,7 @@
     <profiles>
         <default>
             <s3_retry_attempts>5</s3_retry_attempts>
-            <s3_aggressive_timeouts>0</s3_aggressive_timeouts>
+            <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
index 264c411b59b..7b1f503ed55 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
@@ -18,6 +18,7 @@
                 <endpoint>http://resolver:8083/root/data/</endpoint>
                 <access_key_id>minio</access_key_id>
                 <secret_access_key>minio123</secret_access_key>
+                <skip_access_check>1</skip_access_check>
             </broken_s3>
         </disks>
 
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index 441a5a541e8..b000ccabcf4 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -556,7 +556,7 @@ def test_query_is_canceled_with_inf_retries(cluster, broken_s3):
 
 
 @pytest.mark.parametrize("node_name", ["node", "node_with_inf_s3_retries"])
-def test_aggressive_timeouts(cluster, broken_s3, node_name):
+def test_adaptive_timeouts(cluster, broken_s3, node_name):
     node = cluster.instances[node_name]
 
     broken_s3.setup_fake_puts(part_length=1)
@@ -565,12 +565,12 @@ def test_aggressive_timeouts(cluster, broken_s3, node_name):
         count=1000000,
     )
 
-    insert_query_id = f"TEST_AGGRESSIVE_TIMEOUTS_{node_name}"
+    insert_query_id = f"TEST_ADAPTIVE_TIMEOUTS_{node_name}"
     node.query(
         f"""
             INSERT INTO
                 TABLE FUNCTION s3(
-                    'http://resolver:8083/root/data/aggressive_timeouts',
+                    'http://resolver:8083/root/data/adaptive_timeouts',
                     'minio', 'minio123',
                     'CSV', auto, 'none'
                 )
@@ -593,20 +593,20 @@ def test_aggressive_timeouts(cluster, broken_s3, node_name):
 
     assert put_objects == 1
 
-    s3_aggressive_timeouts_state = node.query(
+    s3_use_adaptive_timeouts = node.query(
         f"""
         SELECT
             value
         FROM system.settings
         WHERE
-            name='s3_aggressive_timeouts'
+            name='s3_use_adaptive_timeouts'
         """
     ).strip()
 
     if node_name == "node_with_inf_s3_retries":
         # first 2 attempts failed
-        assert s3_aggressive_timeouts_state == "1"
-        assert s3_errors == 2
+        assert s3_use_adaptive_timeouts == "1"
+        assert s3_errors == 1
     else:
-        assert s3_aggressive_timeouts_state == "0"
+        assert s3_use_adaptive_timeouts == "0"
         assert s3_errors == 0
diff --git a/tests/integration/test_storage_s3/configs/s3_retry.xml b/tests/integration/test_storage_s3/configs/s3_retry.xml
index b7a7bbc8a9b..3171da051d0 100644
--- a/tests/integration/test_storage_s3/configs/s3_retry.xml
+++ b/tests/integration/test_storage_s3/configs/s3_retry.xml
@@ -1,6 +1,7 @@
 <clickhouse>
     <profiles>
         <default>
+            <enable_s3_requests_logging>1</enable_s3_requests_logging>
             <s3_retry_attempts>10</s3_retry_attempts>
             <s3_max_inflight_parts_for_one_file>5</s3_max_inflight_parts_for_one_file>
         </default>
diff --git a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
index 103dd30340c..5ef781bdc9e 100644
--- a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
+++ b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
@@ -4,6 +4,7 @@ import re
 import socket
 import struct
 import sys
+import time
 
 
 def gen_n_digit_number(n):
@@ -39,14 +40,14 @@ random.seed("Unstable server/1.0")
 
 # Generating some "random" data and append a line which contains sum of numbers in column 4.
 lines = (
-    b"".join((gen_line() for _ in range(500000)))
+    b"".join([gen_line() for _ in range(500000)])
     + f"0,0,0,{-sum_in_4_column}\n".encode()
 )
 
 
 class RequestHandler(http.server.BaseHTTPRequestHandler):
     def do_HEAD(self):
-        if self.path == "/root/test.csv":
+        if self.path == "/root/test.csv" or self.path == "/root/slow_send_test.csv":
             self.from_bytes = 0
             self.end_bytes = len(lines)
             self.size = self.end_bytes
@@ -101,6 +102,18 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
                     print("Dropping connection")
                     break
 
+        if self.path == "/root/slow_send_test.csv":
+            self.send_block_size = 81920
+
+            for c, i in enumerate(
+                range(self.from_bytes, self.end_bytes, self.send_block_size)
+            ):
+                self.wfile.write(
+                    lines[i : min(i + self.send_block_size, self.end_bytes)]
+                )
+                self.wfile.flush()
+                time.sleep(1)
+
         elif self.path == "/":
             self.wfile.write(b"OK")
 
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 01ade1acc4d..8c79ad02445 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -818,6 +818,15 @@ def test_storage_s3_get_unstable(started_cluster):
     assert result.splitlines() == ["500001,500000,0"]
 
 
+def test_storage_s3_get_slow(started_cluster):
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["dummy"]
+    table_format = "column1 Int64, column2 Int64, column3 Int64, column4 Int64"
+    get_query = f"SELECT count(), sum(column3), sum(column4) FROM s3('http://resolver:8081/{started_cluster.minio_bucket}/slow_send_test.csv', 'CSV', '{table_format}') FORMAT CSV"
+    result = run_query(instance, get_query)
+    assert result.splitlines() == ["500001,500000,0"]
+
+
 def test_storage_s3_put_uncompressed(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]

From 3b73780bf6a749cb22536ffd62d8aa5c8c0638b0 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 13 Nov 2023 15:17:25 +0000
Subject: [PATCH 0510/1097] Calculate sets from (array) joined actions

---
 src/Planner/Planner.cpp                       |  2 +-
 src/Planner/PlannerJoinTree.cpp               | 33 ++++++++++++++++---
 src/Planner/PlannerJoinTree.h                 |  1 +
 .../02916_analyzer_set_in_join.reference      |  2 ++
 .../02916_analyzer_set_in_join.sql            | 11 +++++++
 5 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02916_analyzer_set_in_join.reference
 create mode 100644 tests/queries/0_stateless/02916_analyzer_set_in_join.sql

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 5b354ccda46..89166316261 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1390,7 +1390,7 @@ void Planner::buildPlanForQueryNode()
         planner_context,
         query_processing_info);
 
-    std::vector<ActionsDAGPtr> result_actions_to_execute;
+    std::vector<ActionsDAGPtr> result_actions_to_execute = std::move(join_tree_query_plan.actions_dags);
 
     for (auto & [_, table_expression_data] : planner_context->getTableExpressionNodeToData())
     {
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index f8770ca7c9c..de808d9de6d 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -952,8 +952,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
 }
 
 JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression,
-    JoinTreeQueryPlan left_join_tree_query_plan,
-    JoinTreeQueryPlan right_join_tree_query_plan,
+    JoinTreeQueryPlan && left_join_tree_query_plan,
+    JoinTreeQueryPlan && right_join_tree_query_plan,
     const ColumnIdentifierSet & outer_scope_columns,
     PlannerContextPtr & planner_context)
 {
@@ -1410,7 +1410,23 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
     for (const auto & right_join_tree_query_plan_row_policy : right_join_tree_query_plan.used_row_policies)
         left_join_tree_query_plan.used_row_policies.insert(right_join_tree_query_plan_row_policy);
 
-    return {std::move(result_plan), QueryProcessingStage::FetchColumns, std::move(left_join_tree_query_plan.used_row_policies)};
+    std::vector<ActionsDAGPtr> result_actions_to_execute;
+
+    std::move(left_join_tree_query_plan.actions_dags.begin(), left_join_tree_query_plan.actions_dags.end(),
+              std::back_inserter(result_actions_to_execute));
+    std::move(right_join_tree_query_plan.actions_dags.begin(), right_join_tree_query_plan.actions_dags.end(),
+              std::back_inserter(result_actions_to_execute));
+    if (join_clauses_and_actions.left_join_expressions_actions)
+        result_actions_to_execute.emplace_back(std::move(join_clauses_and_actions.left_join_expressions_actions));
+    if (join_clauses_and_actions.right_join_expressions_actions)
+        result_actions_to_execute.emplace_back(std::move(join_clauses_and_actions.right_join_expressions_actions));
+
+    return JoinTreeQueryPlan{
+        .query_plan = std::move(result_plan),
+        .from_stage = QueryProcessingStage::FetchColumns,
+        .used_row_policies = std::move(left_join_tree_query_plan.used_row_policies),
+        .actions_dags = std::move(result_actions_to_execute),
+    };
 }
 
 JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_join_table_expression,
@@ -1450,6 +1466,10 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
     }
 
     array_join_action_dag->projectInput();
+
+    std::vector<ActionsDAGPtr> result_actions_to_execute;
+    result_actions_to_execute.push_back(array_join_action_dag);
+
     auto array_join_actions = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), array_join_action_dag);
     array_join_actions->setStepDescription("ARRAY JOIN actions");
     plan.addStep(std::move(array_join_actions));
@@ -1488,7 +1508,12 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
     array_join_step->setStepDescription("ARRAY JOIN");
     plan.addStep(std::move(array_join_step));
 
-    return {std::move(plan), QueryProcessingStage::FetchColumns, std::move(join_tree_query_plan.used_row_policies)};
+    return JoinTreeQueryPlan{
+        .query_plan = std::move(plan),
+        .from_stage = QueryProcessingStage::FetchColumns,
+        .used_row_policies = std::move(join_tree_query_plan.used_row_policies),
+        .actions_dags = std::move(result_actions_to_execute),
+    };
 }
 
 }
diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h
index d4d6c173847..06acf4cd2fd 100644
--- a/src/Planner/PlannerJoinTree.h
+++ b/src/Planner/PlannerJoinTree.h
@@ -16,6 +16,7 @@ struct JoinTreeQueryPlan
     QueryPlan query_plan;
     QueryProcessingStage::Enum from_stage;
     std::set<std::string> used_row_policies;
+    std::vector<ActionsDAGPtr> actions_dags = {};
 };
 
 /// Build JOIN TREE query plan for query node
diff --git a/tests/queries/0_stateless/02916_analyzer_set_in_join.reference b/tests/queries/0_stateless/02916_analyzer_set_in_join.reference
new file mode 100644
index 00000000000..a063ea39893
--- /dev/null
+++ b/tests/queries/0_stateless/02916_analyzer_set_in_join.reference
@@ -0,0 +1,2 @@
+1	0
+42	1
diff --git a/tests/queries/0_stateless/02916_analyzer_set_in_join.sql b/tests/queries/0_stateless/02916_analyzer_set_in_join.sql
new file mode 100644
index 00000000000..cae17d74a97
--- /dev/null
+++ b/tests/queries/0_stateless/02916_analyzer_set_in_join.sql
@@ -0,0 +1,11 @@
+
+SELECT 1, b
+FROM numbers(1)
+ARRAY JOIN [materialize(3) IN (SELECT 42)] AS b
+;
+
+SELECT *
+FROM (SELECT materialize(42) as a) as t1
+JOIN (SELECT materialize(1) as a) as t2
+ON t1.a IN (SELECT 42) = t2.a
+;

From daa60acb19aa8dfe1b6bc76a34ef08dd1d81cfc6 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 14 Nov 2023 11:03:00 +0000
Subject: [PATCH 0511/1097] Bug: #56673

---
 src/Compression/CompressionCodecGCD.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp
index 70fff01ebf7..8b923d61650 100644
--- a/src/Compression/CompressionCodecGCD.cpp
+++ b/src/Compression/CompressionCodecGCD.cpp
@@ -246,7 +246,7 @@ UInt8 getGCDBytesSize(const IDataType * column_type)
 {
     WhichDataType which(column_type);
     if (!(which.isInt() || which.isUInt() || which.isDecimal() || which.isDateOrDate32() || which.isDateTime() ||which.isDateTime64()))
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is not applicable for {} because the data type is not of fixed size",
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD cannot be applied to column {} because it can only be used with Int*, UInt*, Decimal*, Date* or DateTime* types.",
             column_type->getName());
 
     size_t max_size = column_type->getSizeOfValueInMemory();

From 11b8395fe7aa734c9ec8d4ee9f40ede995d230d6 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 14 Nov 2023 11:05:36 +0000
Subject: [PATCH 0512/1097] Address review comments

---
 src/Planner/PlannerJoinTree.cpp | 30 +++++++++++++++---------------
 src/Planner/PlannerJoinTree.h   |  2 +-
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index de808d9de6d..7c1e6ded1e0 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -948,12 +948,16 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
         }
     }
 
-    return {std::move(query_plan), from_stage, std::move(used_row_policies)};
+    return JoinTreeQueryPlan{
+        .query_plan = std::move(query_plan),
+        .from_stage = from_stage,
+        .used_row_policies = std::move(used_row_policies),
+    };
 }
 
 JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression,
-    JoinTreeQueryPlan && left_join_tree_query_plan,
-    JoinTreeQueryPlan && right_join_tree_query_plan,
+    JoinTreeQueryPlan left_join_tree_query_plan,
+    JoinTreeQueryPlan right_join_tree_query_plan,
     const ColumnIdentifierSet & outer_scope_columns,
     PlannerContextPtr & planner_context)
 {
@@ -1410,22 +1414,19 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
     for (const auto & right_join_tree_query_plan_row_policy : right_join_tree_query_plan.used_row_policies)
         left_join_tree_query_plan.used_row_policies.insert(right_join_tree_query_plan_row_policy);
 
-    std::vector<ActionsDAGPtr> result_actions_to_execute;
-
-    std::move(left_join_tree_query_plan.actions_dags.begin(), left_join_tree_query_plan.actions_dags.end(),
-              std::back_inserter(result_actions_to_execute));
-    std::move(right_join_tree_query_plan.actions_dags.begin(), right_join_tree_query_plan.actions_dags.end(),
-              std::back_inserter(result_actions_to_execute));
+    /// Collect all required actions dags in `left_join_tree_query_plan.actions_dags`
+    for (auto && action_dag : right_join_tree_query_plan.actions_dags)
+        left_join_tree_query_plan.actions_dags.emplace_back(action_dag);
     if (join_clauses_and_actions.left_join_expressions_actions)
-        result_actions_to_execute.emplace_back(std::move(join_clauses_and_actions.left_join_expressions_actions));
+        left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.left_join_expressions_actions));
     if (join_clauses_and_actions.right_join_expressions_actions)
-        result_actions_to_execute.emplace_back(std::move(join_clauses_and_actions.right_join_expressions_actions));
+        left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.right_join_expressions_actions));
 
     return JoinTreeQueryPlan{
         .query_plan = std::move(result_plan),
         .from_stage = QueryProcessingStage::FetchColumns,
         .used_row_policies = std::move(left_join_tree_query_plan.used_row_policies),
-        .actions_dags = std::move(result_actions_to_execute),
+        .actions_dags = std::move(left_join_tree_query_plan.actions_dags),
     };
 }
 
@@ -1467,8 +1468,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
 
     array_join_action_dag->projectInput();
 
-    std::vector<ActionsDAGPtr> result_actions_to_execute;
-    result_actions_to_execute.push_back(array_join_action_dag);
+    join_tree_query_plan.actions_dags.push_back(array_join_action_dag);
 
     auto array_join_actions = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), array_join_action_dag);
     array_join_actions->setStepDescription("ARRAY JOIN actions");
@@ -1512,7 +1512,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
         .query_plan = std::move(plan),
         .from_stage = QueryProcessingStage::FetchColumns,
         .used_row_policies = std::move(join_tree_query_plan.used_row_policies),
-        .actions_dags = std::move(result_actions_to_execute),
+        .actions_dags = std::move(join_tree_query_plan.actions_dags),
     };
 }
 
diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h
index 06acf4cd2fd..c5a7d14fa55 100644
--- a/src/Planner/PlannerJoinTree.h
+++ b/src/Planner/PlannerJoinTree.h
@@ -16,7 +16,7 @@ struct JoinTreeQueryPlan
     QueryPlan query_plan;
     QueryProcessingStage::Enum from_stage;
     std::set<std::string> used_row_policies;
-    std::vector<ActionsDAGPtr> actions_dags = {};
+    std::vector<ActionsDAGPtr> actions_dags;
 };
 
 /// Build JOIN TREE query plan for query node

From 8e88bf1348d66d7196475c8bc5bf2708af2a5125 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 13 Nov 2023 22:28:04 +0100
Subject: [PATCH 0513/1097] Revert "Revert "Add a setting
 max_execution_time_leaf to limit the execution time on shard for distributed
 query""

---
 .../operations/settings/query-complexity.md   | 22 ++++++++++++++++++-
 src/Core/Settings.h                           | 16 ++++++++------
 .../ClusterProxy/SelectStreamFactory.cpp      | 13 ++++++-----
 .../ClusterProxy/executeQuery.cpp             |  8 +++++++
 .../02786_max_execution_time_leaf.reference   |  0
 .../02786_max_execution_time_leaf.sql         |  4 ++++
 6 files changed, 50 insertions(+), 13 deletions(-)
 create mode 100644 tests/queries/0_stateless/02786_max_execution_time_leaf.reference
 create mode 100644 tests/queries/0_stateless/02786_max_execution_time_leaf.sql

diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index 2211b0c2de2..9e36aa26946 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -172,7 +172,27 @@ If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use c
 
 ## timeout_overflow_mode {#timeout-overflow-mode}
 
-What to do if the query is run longer than ‘max_execution_time’: ‘throw’ or ‘break’. By default, throw.
+What to do if the query is run longer than `max_execution_time`: `throw` or `break`. By default, `throw`.
+
+# max_execution_time_leaf
+
+Similar semantic to `max_execution_time` but only apply on leaf node for distributed or remote queries.
+
+For example, if we want to limit execution time on leaf node to `10s` but no limit on the initial node, instead of having `max_execution_time` in the nested subquery settings:
+
+``` sql
+SELECT count() FROM cluster(cluster, view(SELECT * FROM t SETTINGS max_execution_time = 10));
+```
+
+We can use `max_execution_time_leaf` as the query settings:
+
+``` sql
+SELECT count() FROM cluster(cluster, view(SELECT * FROM t)) SETTINGS max_execution_time_leaf = 10;
+```
+
+# timeout_overflow_mode_leaf
+
+What to do when the query in leaf node run longer than `max_execution_time_leaf`: `throw` or `break`. By default, `throw`.
 
 ## min_execution_speed {#min-execution-speed}
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 3b90a3e068b..b7d1af17a64 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -364,16 +364,16 @@ class IColumn;
     M(UInt64, max_bytes_to_read, 0, "Limit on read bytes (after decompression) from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.", 0) \
     M(OverflowMode, read_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
     \
-    M(UInt64, max_rows_to_read_leaf, 0, "Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
-    M(UInt64, max_bytes_to_read_leaf, 0, "Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
+    M(UInt64, max_rows_to_read_leaf, 0, "Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
+    M(UInt64, max_bytes_to_read_leaf, 0, "Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
     M(OverflowMode, read_overflow_mode_leaf, OverflowMode::THROW, "What to do when the leaf limit is exceeded.", 0) \
     \
-    M(UInt64, max_rows_to_group_by, 0, "If aggregation during GROUP BY is generating more than specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode.", 0) \
+    M(UInt64, max_rows_to_group_by, 0, "If aggregation during GROUP BY is generating more than the specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode.", 0) \
     M(OverflowModeGroupBy, group_by_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
     M(UInt64, max_bytes_before_external_group_by, 0, "If memory usage during GROUP BY operation is exceeding this threshold in bytes, activate the 'external aggregation' mode (spill data to disk). Recommended value is half of available system memory.", 0) \
     \
-    M(UInt64, max_rows_to_sort, 0, "If more than specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
-    M(UInt64, max_bytes_to_sort, 0, "If more than specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
+    M(UInt64, max_rows_to_sort, 0, "If more than the specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
+    M(UInt64, max_bytes_to_sort, 0, "If more than the specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
     M(OverflowMode, sort_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
     M(UInt64, max_bytes_before_external_sort, 0, "If memory usage during ORDER BY operation is exceeding this threshold in bytes, activate the 'external sorting' mode (spill data to disk). Recommended value is half of available system memory.", 0) \
     M(UInt64, max_bytes_before_remerge_sort, 1000000000, "In case of ORDER BY with LIMIT, when memory usage is higher than specified threshold, perform additional steps of merging blocks before final merge to keep just top LIMIT rows.", 0) \
@@ -384,8 +384,10 @@ class IColumn;
     M(OverflowMode, result_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
     \
     /* TODO: Check also when merging and finalizing aggregate functions. */ \
-    M(Seconds, max_execution_time, 0, "If query run time exceeded the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode' which by default is - throw an exception. Note that the timeout is checked and query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting.", 0) \
+    M(Seconds, max_execution_time, 0, "If query runtime exceeds the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode', which by default is - throw an exception. Note that the timeout is checked and query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting.", 0) \
     M(OverflowMode, timeout_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
+    M(Seconds, max_execution_time_leaf, 0, "Similar semantic to max_execution_time but only apply on leaf node for distributed queries, the time out behavior will be determined by 'timeout_overflow_mode_leaf' which by default is - throw an exception", 0) \
+    M(OverflowMode, timeout_overflow_mode_leaf, OverflowMode::THROW, "What to do when the leaf limit is exceeded.", 0) \
     \
     M(UInt64, min_execution_speed, 0, "Minimum number of execution rows per second.", 0) \
     M(UInt64, max_execution_speed, 0, "Maximum number of execution rows per second.", 0) \
@@ -399,7 +401,7 @@ class IColumn;
     \
     M(UInt64, max_sessions_for_user, 0, "Maximum number of simultaneous sessions for a user.", 0) \
     \
-    M(UInt64, max_subquery_depth, 100, "If a query has more than specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.", 0) \
+    M(UInt64, max_subquery_depth, 100, "If a query has more than the specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.", 0) \
     M(UInt64, max_analyze_depth, 5000, "Maximum number of analyses performed by interpreter.", 0) \
     M(UInt64, max_ast_depth, 1000, "Maximum depth of query syntax tree. Checked after parsing.", 0) \
     M(UInt64, max_ast_elements, 50000, "Maximum size of query syntax tree in number of nodes. Checked after parsing.", 0) \
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 3935028f27c..4edc9d4d4e5 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -1,20 +1,21 @@
-#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
-#include <Interpreters/Cluster.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <Common/checkStackSize.h>
+#include <Common/logger_useful.h>
+#include <Common/FailPoint.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <IO/ConnectionTimeouts.h>
+#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
+#include <Interpreters/Cluster.h>
 #include <Interpreters/AddDefaultDatabaseVisitor.h>
 #include <Interpreters/RequiredSourceColumnsVisitor.h>
 #include <Interpreters/TranslateQualifiedNamesVisitor.h>
 #include <DataTypes/ObjectUtils.h>
-
 #include <Client/IConnections.h>
-#include <Common/logger_useful.h>
-#include <Common/FailPoint.h>
+#include <Parsers/ASTSelectQuery.h>
+#include <Parsers/ASTSetQuery.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/ReadFromRemote.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
@@ -22,6 +23,7 @@
 #include <Processors/QueryPlan/DistributedCreateLocalPlan.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 
+
 namespace ProfileEvents
 {
     extern const Event DistributedConnectionMissingTable;
@@ -121,6 +123,7 @@ void SelectStreamFactory::createForShard(
     if (it != objects_by_shard.end())
         replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, query_ast);
 
+
     auto emplace_local_stream = [&]()
     {
         local_plans.emplace_back(createLocalPlan(
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 41235d107cd..420bb447027 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -141,6 +141,14 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster,
             new_settings.allow_experimental_parallel_reading_from_replicas = false;
     }
 
+    if (settings.max_execution_time_leaf.value > 0)
+    {
+        /// Replace 'max_execution_time' of this sub-query with 'max_execution_time_leaf' and 'timeout_overflow_mode'
+        /// with 'timeout_overflow_mode_leaf'
+        new_settings.max_execution_time = settings.max_execution_time_leaf;
+        new_settings.timeout_overflow_mode = settings.timeout_overflow_mode_leaf;
+    }
+
     auto new_context = Context::createCopy(context);
     new_context->setSettings(new_settings);
     return new_context;
diff --git a/tests/queries/0_stateless/02786_max_execution_time_leaf.reference b/tests/queries/0_stateless/02786_max_execution_time_leaf.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql
new file mode 100644
index 00000000000..1d02e82569c
--- /dev/null
+++ b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql
@@ -0,0 +1,4 @@
+-- Tags: no-fasttest
+SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) SETTINGS max_execution_time_leaf = 1; -- { serverError 159 }
+-- Can return partial result
+SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) FORMAT Null SETTINGS max_execution_time_leaf = 1, timeout_overflow_mode_leaf = 'break';

From ae05f4f604e2eead6ac4bbe41d30cb06b3784cd7 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 13 Nov 2023 21:35:15 +0000
Subject: [PATCH 0514/1097] Fix
 use_structure_from_insertion_table_in_table_functions with new Analyzer

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 175 +++++++++++-----------
 1 file changed, 88 insertions(+), 87 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 7855c4f34a8..d6c9c7ab807 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6327,6 +6327,94 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                 table_function_name);
     }
 
+    QueryTreeNodes result_table_function_arguments;
+
+    auto skip_analysis_arguments_indexes = table_function_ptr->skipAnalysisForArguments(table_function_node, scope_context);
+
+    auto & table_function_arguments = table_function_node_typed.getArguments().getNodes();
+    size_t table_function_arguments_size = table_function_arguments.size();
+
+    for (size_t table_function_argument_index = 0; table_function_argument_index < table_function_arguments_size; ++table_function_argument_index)
+    {
+        auto & table_function_argument = table_function_arguments[table_function_argument_index];
+
+        auto skip_argument_index_it = std::find(skip_analysis_arguments_indexes.begin(),
+            skip_analysis_arguments_indexes.end(),
+            table_function_argument_index);
+        if (skip_argument_index_it != skip_analysis_arguments_indexes.end())
+        {
+            result_table_function_arguments.push_back(table_function_argument);
+            continue;
+        }
+
+        if (auto * identifier_node = table_function_argument->as<IdentifierNode>())
+        {
+            const auto & unresolved_identifier = identifier_node->getIdentifier();
+            auto identifier_resolve_result = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::EXPRESSION}, scope);
+            auto resolved_identifier = std::move(identifier_resolve_result.resolved_identifier);
+
+            if (resolved_identifier && resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
+                result_table_function_arguments.push_back(std::move(resolved_identifier));
+            else
+                result_table_function_arguments.push_back(table_function_argument);
+
+            continue;
+        }
+        else if (auto * table_function_argument_function = table_function_argument->as<FunctionNode>())
+        {
+            const auto & table_function_argument_function_name = table_function_argument_function->getFunctionName();
+            if (TableFunctionFactory::instance().isTableFunctionName(table_function_argument_function_name))
+            {
+                auto table_function_node_to_resolve_typed = std::make_shared<TableFunctionNode>(table_function_argument_function_name);
+                table_function_node_to_resolve_typed->getArgumentsNode() = table_function_argument_function->getArgumentsNode();
+
+                QueryTreeNodePtr table_function_node_to_resolve = std::move(table_function_node_to_resolve_typed);
+                resolveTableFunction(table_function_node_to_resolve, scope, expressions_visitor, true /*nested_table_function*/);
+
+                result_table_function_arguments.push_back(std::move(table_function_node_to_resolve));
+                continue;
+            }
+        }
+
+        /** Table functions arguments can contain expressions with invalid identifiers.
+          * We cannot skip analysis for such arguments, because some table functions cannot provide
+          * information if analysis for argument should be skipped until other arguments will be resolved.
+          *
+          * Example: SELECT key from remote('127.0.0.{1,2}', view(select number AS key from numbers(2)), cityHash64(key));
+          * Example: SELECT id from remote('127.0.0.{1,2}', 'default', 'test_table', cityHash64(id));
+          */
+        try
+        {
+            resolveExpressionNode(table_function_argument, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+        }
+        catch (const Exception & exception)
+        {
+            if (exception.code() == ErrorCodes::UNKNOWN_IDENTIFIER)
+            {
+                result_table_function_arguments.push_back(table_function_argument);
+                continue;
+            }
+
+            throw;
+        }
+
+        if (auto * expression_list = table_function_argument->as<ListNode>())
+        {
+            for (auto & expression_list_node : expression_list->getNodes())
+                result_table_function_arguments.push_back(expression_list_node);
+        }
+        else
+        {
+            result_table_function_arguments.push_back(table_function_argument);
+        }
+    }
+
+    table_function_node_typed.getArguments().getNodes() = std::move(result_table_function_arguments);
+
+    auto table_function_ast = table_function_node_typed.toAST();
+    table_function_ptr->parseArguments(table_function_ast, scope_context);
+
+
     uint64_t use_structure_from_insertion_table_in_table_functions = scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions;
     if (!nested_table_function &&
         use_structure_from_insertion_table_in_table_functions &&
@@ -6468,93 +6556,6 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
         }
     }
 
-    QueryTreeNodes result_table_function_arguments;
-
-    auto skip_analysis_arguments_indexes = table_function_ptr->skipAnalysisForArguments(table_function_node, scope_context);
-
-    auto & table_function_arguments = table_function_node_typed.getArguments().getNodes();
-    size_t table_function_arguments_size = table_function_arguments.size();
-
-    for (size_t table_function_argument_index = 0; table_function_argument_index < table_function_arguments_size; ++table_function_argument_index)
-    {
-        auto & table_function_argument = table_function_arguments[table_function_argument_index];
-
-        auto skip_argument_index_it = std::find(skip_analysis_arguments_indexes.begin(),
-            skip_analysis_arguments_indexes.end(),
-            table_function_argument_index);
-        if (skip_argument_index_it != skip_analysis_arguments_indexes.end())
-        {
-            result_table_function_arguments.push_back(table_function_argument);
-            continue;
-        }
-
-        if (auto * identifier_node = table_function_argument->as<IdentifierNode>())
-        {
-            const auto & unresolved_identifier = identifier_node->getIdentifier();
-            auto identifier_resolve_result = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::EXPRESSION}, scope);
-            auto resolved_identifier = std::move(identifier_resolve_result.resolved_identifier);
-
-            if (resolved_identifier && resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
-                result_table_function_arguments.push_back(std::move(resolved_identifier));
-            else
-                result_table_function_arguments.push_back(table_function_argument);
-
-            continue;
-        }
-        else if (auto * table_function_argument_function = table_function_argument->as<FunctionNode>())
-        {
-            const auto & table_function_argument_function_name = table_function_argument_function->getFunctionName();
-            if (TableFunctionFactory::instance().isTableFunctionName(table_function_argument_function_name))
-            {
-                auto table_function_node_to_resolve_typed = std::make_shared<TableFunctionNode>(table_function_argument_function_name);
-                table_function_node_to_resolve_typed->getArgumentsNode() = table_function_argument_function->getArgumentsNode();
-
-                QueryTreeNodePtr table_function_node_to_resolve = std::move(table_function_node_to_resolve_typed);
-                resolveTableFunction(table_function_node_to_resolve, scope, expressions_visitor, true /*nested_table_function*/);
-
-                result_table_function_arguments.push_back(std::move(table_function_node_to_resolve));
-                continue;
-            }
-        }
-
-        /** Table functions arguments can contain expressions with invalid identifiers.
-          * We cannot skip analysis for such arguments, because some table functions cannot provide
-          * information if analysis for argument should be skipped until other arguments will be resolved.
-          *
-          * Example: SELECT key from remote('127.0.0.{1,2}', view(select number AS key from numbers(2)), cityHash64(key));
-          * Example: SELECT id from remote('127.0.0.{1,2}', 'default', 'test_table', cityHash64(id));
-          */
-        try
-        {
-            resolveExpressionNode(table_function_argument, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
-        }
-        catch (const Exception & exception)
-        {
-            if (exception.code() == ErrorCodes::UNKNOWN_IDENTIFIER)
-            {
-                result_table_function_arguments.push_back(table_function_argument);
-                continue;
-            }
-
-            throw;
-        }
-
-        if (auto * expression_list = table_function_argument->as<ListNode>())
-        {
-            for (auto & expression_list_node : expression_list->getNodes())
-                result_table_function_arguments.push_back(expression_list_node);
-        }
-        else
-        {
-            result_table_function_arguments.push_back(table_function_argument);
-        }
-    }
-
-    table_function_node_typed.getArguments().getNodes() = std::move(result_table_function_arguments);
-
-    auto table_function_ast = table_function_node_typed.toAST();
-    table_function_ptr->parseArguments(table_function_ast, scope_context);
-
     auto table_function_storage = scope_context->getQueryContext()->executeTableFunction(table_function_ast, table_function_ptr);
     table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context, std::move(skip_analysis_arguments_indexes));
 }

From cf7f155ce9dab0ea0a40d5104ae35562eded282e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 12 Nov 2023 14:41:19 +0000
Subject: [PATCH 0515/1097] Bump gRPC to v1.55.4 and protobuf to v22.5

---
 contrib/google-protobuf                      |   2 +-
 contrib/google-protobuf-cmake/CMakeLists.txt | 199 +++++++++++++------
 contrib/grpc                                 |   2 +-
 3 files changed, 144 insertions(+), 59 deletions(-)

diff --git a/contrib/google-protobuf b/contrib/google-protobuf
index 2a4fa1a4e95..089b89c8d41 160000
--- a/contrib/google-protobuf
+++ b/contrib/google-protobuf
@@ -1 +1 @@
-Subproject commit 2a4fa1a4e95012d754ac55d43c8bc462dd1c78a8
+Subproject commit 089b89c8d4140f0d49fe4222b047a8ea814bc752
diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt
index 268f0fbe0e4..fc5bf2c0241 100644
--- a/contrib/google-protobuf-cmake/CMakeLists.txt
+++ b/contrib/google-protobuf-cmake/CMakeLists.txt
@@ -30,17 +30,70 @@ include_directories(
   ${protobuf_binary_dir}
   ${protobuf_source_dir}/src)
 
+add_library(utf8_range
+    ${protobuf_source_dir}/third_party/utf8_range/naive.c
+    ${protobuf_source_dir}/third_party/utf8_range/range2-neon.c
+    ${protobuf_source_dir}/third_party/utf8_range/range2-sse.c
+)
+include_directories(${protobuf_source_dir}/third_party/utf8_range)
+
+add_library(utf8_validity
+    ${protobuf_source_dir}/third_party/utf8_range/utf8_validity.cc
+)
+target_link_libraries(utf8_validity PUBLIC absl::strings)
+
+set(protobuf_absl_used_targets
+    absl::absl_check
+    absl::absl_log
+    absl::algorithm
+    absl::base
+    absl::bind_front
+    absl::bits
+    absl::btree
+    absl::cleanup
+    absl::cord
+    absl::core_headers
+    absl::debugging
+    absl::die_if_null
+    absl::dynamic_annotations
+    absl::flags
+    absl::flat_hash_map
+    absl::flat_hash_set
+    absl::function_ref
+    absl::hash
+    absl::layout
+    absl::log_initialize
+    absl::log_severity
+    absl::memory
+    absl::node_hash_map
+    absl::node_hash_set
+    absl::optional
+    absl::span
+    absl::status
+    absl::statusor
+    absl::strings
+    absl::synchronization
+    absl::time
+    absl::type_traits
+    absl::utility
+    absl::variant
+)
+
 set(libprotobuf_lite_files
   ${protobuf_source_dir}/src/google/protobuf/any_lite.cc
   ${protobuf_source_dir}/src/google/protobuf/arena.cc
+  ${protobuf_source_dir}/src/google/protobuf/arena_align.cc
+  ${protobuf_source_dir}/src/google/protobuf/arena_config.cc
   ${protobuf_source_dir}/src/google/protobuf/arenastring.cc
+  ${protobuf_source_dir}/src/google/protobuf/arenaz_sampler.cc
   ${protobuf_source_dir}/src/google/protobuf/extension_set.cc
   ${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
+  ${protobuf_source_dir}/src/google/protobuf/generated_message_tctable_lite.cc
   ${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc
   ${protobuf_source_dir}/src/google/protobuf/implicit_weak_message.cc
+  ${protobuf_source_dir}/src/google/protobuf/inlined_string_field.cc
   ${protobuf_source_dir}/src/google/protobuf/io/coded_stream.cc
   ${protobuf_source_dir}/src/google/protobuf/io/io_win32.cc
-  ${protobuf_source_dir}/src/google/protobuf/io/strtod.cc
   ${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream.cc
   ${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream_impl.cc
   ${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
@@ -48,21 +101,15 @@ set(libprotobuf_lite_files
   ${protobuf_source_dir}/src/google/protobuf/message_lite.cc
   ${protobuf_source_dir}/src/google/protobuf/parse_context.cc
   ${protobuf_source_dir}/src/google/protobuf/repeated_field.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/bytestream.cc
+  ${protobuf_source_dir}/src/google/protobuf/repeated_ptr_field.cc
   ${protobuf_source_dir}/src/google/protobuf/stubs/common.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/int128.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/status.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/statusor.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/stringpiece.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/stringprintf.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/structurally_valid.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/strutil.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/time.cc
   ${protobuf_source_dir}/src/google/protobuf/wire_format_lite.cc
 )
 
 add_library(_libprotobuf-lite ${libprotobuf_lite_files})
-target_link_libraries(_libprotobuf-lite pthread)
+target_link_libraries(_libprotobuf-lite
+    pthread
+    utf8_validity)
 if(${CMAKE_SYSTEM_NAME} STREQUAL "Android")
     target_link_libraries(_libprotobuf-lite log)
 endif()
@@ -71,67 +118,89 @@ add_library(protobuf::libprotobuf-lite ALIAS _libprotobuf-lite)
 
 
 set(libprotobuf_files
-  ${protobuf_source_dir}/src/google/protobuf/any.cc
   ${protobuf_source_dir}/src/google/protobuf/any.pb.cc
   ${protobuf_source_dir}/src/google/protobuf/api.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/duration.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/empty.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/source_context.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/struct.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/timestamp.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/type.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/wrappers.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/any.cc
+  ${protobuf_source_dir}/src/google/protobuf/any_lite.cc
+  ${protobuf_source_dir}/src/google/protobuf/arena.cc
+  ${protobuf_source_dir}/src/google/protobuf/arena_align.cc
+  ${protobuf_source_dir}/src/google/protobuf/arena_config.cc
+  ${protobuf_source_dir}/src/google/protobuf/arenastring.cc
+  ${protobuf_source_dir}/src/google/protobuf/arenaz_sampler.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/importer.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/parser.cc
   ${protobuf_source_dir}/src/google/protobuf/descriptor.cc
   ${protobuf_source_dir}/src/google/protobuf/descriptor.pb.cc
   ${protobuf_source_dir}/src/google/protobuf/descriptor_database.cc
-  ${protobuf_source_dir}/src/google/protobuf/duration.pb.cc
   ${protobuf_source_dir}/src/google/protobuf/dynamic_message.cc
-  ${protobuf_source_dir}/src/google/protobuf/empty.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/extension_set.cc
   ${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc
-  ${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
   ${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc
   ${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc
+  ${protobuf_source_dir}/src/google/protobuf/generated_message_tctable_full.cc
+  ${protobuf_source_dir}/src/google/protobuf/generated_message_tctable_gen.cc
+  ${protobuf_source_dir}/src/google/protobuf/generated_message_tctable_lite.cc
+  ${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc
+  ${protobuf_source_dir}/src/google/protobuf/implicit_weak_message.cc
+  ${protobuf_source_dir}/src/google/protobuf/inlined_string_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/io/coded_stream.cc
   ${protobuf_source_dir}/src/google/protobuf/io/gzip_stream.cc
+  ${protobuf_source_dir}/src/google/protobuf/io/io_win32.cc
   ${protobuf_source_dir}/src/google/protobuf/io/printer.cc
+  ${protobuf_source_dir}/src/google/protobuf/io/strtod.cc
   ${protobuf_source_dir}/src/google/protobuf/io/tokenizer.cc
+  ${protobuf_source_dir}/src/google/protobuf/io/zero_copy_sink.cc
+  ${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream.cc
+  ${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream_impl.cc
+  ${protobuf_source_dir}/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
+  ${protobuf_source_dir}/src/google/protobuf/json/internal/lexer.cc
+  ${protobuf_source_dir}/src/google/protobuf/json/internal/message_path.cc
+  ${protobuf_source_dir}/src/google/protobuf/json/internal/parser.cc
+  ${protobuf_source_dir}/src/google/protobuf/json/internal/unparser.cc
+  ${protobuf_source_dir}/src/google/protobuf/json/internal/untyped_message.cc
+  ${protobuf_source_dir}/src/google/protobuf/json/internal/writer.cc
+  ${protobuf_source_dir}/src/google/protobuf/json/internal/zero_copy_buffered_stream.cc
+  ${protobuf_source_dir}/src/google/protobuf/json/json.cc
+  ${protobuf_source_dir}/src/google/protobuf/map.cc
   ${protobuf_source_dir}/src/google/protobuf/map_field.cc
   ${protobuf_source_dir}/src/google/protobuf/message.cc
+  ${protobuf_source_dir}/src/google/protobuf/message_lite.cc
+  ${protobuf_source_dir}/src/google/protobuf/parse_context.cc
   ${protobuf_source_dir}/src/google/protobuf/reflection_ops.cc
+  ${protobuf_source_dir}/src/google/protobuf/repeated_field.cc
   ${protobuf_source_dir}/src/google/protobuf/repeated_ptr_field.cc
   ${protobuf_source_dir}/src/google/protobuf/service.cc
-  ${protobuf_source_dir}/src/google/protobuf/source_context.pb.cc
-  ${protobuf_source_dir}/src/google/protobuf/struct.pb.cc
-  ${protobuf_source_dir}/src/google/protobuf/stubs/substitute.cc
+  ${protobuf_source_dir}/src/google/protobuf/stubs/common.cc
   ${protobuf_source_dir}/src/google/protobuf/text_format.cc
-  ${protobuf_source_dir}/src/google/protobuf/timestamp.pb.cc
-  ${protobuf_source_dir}/src/google/protobuf/type.pb.cc
   ${protobuf_source_dir}/src/google/protobuf/unknown_field_set.cc
   ${protobuf_source_dir}/src/google/protobuf/util/delimited_message_util.cc
   ${protobuf_source_dir}/src/google/protobuf/util/field_comparator.cc
   ${protobuf_source_dir}/src/google/protobuf/util/field_mask_util.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/datapiece.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/default_value_objectwriter.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/error_listener.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/field_mask_utility.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/json_escaping.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/json_objectwriter.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/json_stream_parser.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/object_writer.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/proto_writer.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/protostream_objectsource.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/protostream_objectwriter.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/type_info.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/type_info_test_helper.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/internal/utility.cc
-  ${protobuf_source_dir}/src/google/protobuf/util/json_util.cc
   ${protobuf_source_dir}/src/google/protobuf/util/message_differencer.cc
   ${protobuf_source_dir}/src/google/protobuf/util/time_util.cc
   ${protobuf_source_dir}/src/google/protobuf/util/type_resolver_util.cc
   ${protobuf_source_dir}/src/google/protobuf/wire_format.cc
-  ${protobuf_source_dir}/src/google/protobuf/wrappers.pb.cc
+  ${protobuf_source_dir}/src/google/protobuf/wire_format_lite.cc
 )
 
 add_library(_libprotobuf ${libprotobuf_lite_files} ${libprotobuf_files})
 if (ENABLE_FUZZING)
     target_compile_options(_libprotobuf PRIVATE "-fsanitize-recover=all")
 endif()
-target_link_libraries(_libprotobuf pthread)
-target_link_libraries(_libprotobuf ch_contrib::zlib)
+target_link_libraries(_libprotobuf
+    pthread
+    ch_contrib::zlib
+    utf8_validity
+    ${protobuf_absl_used_targets})
 if(${CMAKE_SYSTEM_NAME} STREQUAL "Android")
     target_link_libraries(_libprotobuf log)
 endif()
@@ -143,20 +212,21 @@ set(libprotoc_files
   ${protobuf_source_dir}/src/google/protobuf/compiler/code_generator.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/command_line_interface.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/enum.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/enum_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/extension.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/enum_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/map_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/message_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/primitive_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/string_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/file.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/generator.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/helpers.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/map_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/message.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/message_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/padding_optimizer.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/parse_function_generator.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/primitive_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/service.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/string_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/tracker.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_doc_comment.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_enum.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_enum_field.cc
@@ -173,6 +243,7 @@ set(libprotoc_files
   ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_repeated_primitive_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_source_generator_base.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_wrapper_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/names.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/context.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/doc_comment.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/enum.cc
@@ -195,38 +266,48 @@ set(libprotoc_files
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/message_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/message_field_lite.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/message_lite.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/java/message_serialization.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/name_resolver.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/java/names.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/primitive_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/primitive_field_lite.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/service.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/shared_code_generator.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/string_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/java/string_field_lite.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_enum.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_enum_field.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_extension.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_field.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_file.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_generator.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_helpers.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_map_field.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_message.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_message_field.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_oneof.cc
-  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_primitive_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/enum.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/enum_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/extension.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/file.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/generator.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/helpers.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/import_writer.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/line_consumer.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/map_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/message.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/message_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/names.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/oneof.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/primitive_field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/text_format_decode_data.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/php/names.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/php/php_generator.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/plugin.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/plugin.pb.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/python/generator.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/python/helpers.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/python/pyi_generator.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/retention.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/ruby/ruby_generator.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/subprocess.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/zip_writer.cc
 )
 
 add_library(_libprotoc ${libprotoc_files})
-target_link_libraries(_libprotoc _libprotobuf)
+target_link_libraries(_libprotoc
+    _libprotobuf
+    ${protobuf_absl_used_targets})
 add_library(protobuf::libprotoc ALIAS _libprotoc)
 
 set(protoc_files ${protobuf_source_dir}/src/google/protobuf/compiler/main.cc)
@@ -235,7 +316,11 @@ if (CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
     AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR)
 
     add_executable(protoc ${protoc_files})
-    target_link_libraries(protoc _libprotoc _libprotobuf pthread)
+    target_link_libraries(protoc _libprotoc
+        _libprotobuf
+        pthread
+        utf8_validity
+        ${protobuf_absl_used_targets})
     add_executable(protobuf::protoc ALIAS protoc)
 
     if (ENABLE_FUZZING)
diff --git a/contrib/grpc b/contrib/grpc
index 6e5e645de7c..a08fe1a3407 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit 6e5e645de7cb0604e3ad4ba55abff2eca38c1803
+Subproject commit a08fe1a34075c93bb2d606dd608b9a3953288b81

From 9e1357dd7eb733466fcfe00715490b00ef7d44ee Mon Sep 17 00:00:00 2001
From: Mikhail Koviazin <mikhail.koviazin@aiven.io>
Date: Tue, 7 Nov 2023 13:44:52 +0200
Subject: [PATCH 0516/1097] Update `query_masking_rules` when reloading the
 config

Fixes #56449
---
 programs/server/Server.cpp                    |  2 +
 src/Common/SensitiveDataMasker.cpp            |  4 +
 src/Interpreters/Context.cpp                  | 16 ++++
 src/Interpreters/Context.h                    |  2 +
 .../__init__.py                               |  0
 .../configs/changed_settings.xml              | 19 +++++
 .../configs/empty_settings.xml                | 12 +++
 .../test_reload_query_masking_rules/test.py   | 74 +++++++++++++++++++
 8 files changed, 129 insertions(+)
 create mode 100644 tests/integration/test_reload_query_masking_rules/__init__.py
 create mode 100644 tests/integration/test_reload_query_masking_rules/configs/changed_settings.xml
 create mode 100644 tests/integration/test_reload_query_masking_rules/configs/empty_settings.xml
 create mode 100644 tests/integration/test_reload_query_masking_rules/test.py

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 9e974e796e0..ca091dbeceb 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1372,6 +1372,8 @@ try
 
                 global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);
 
+                global_context->reloadQueryMaskingRulesIfChanged(config);
+
                 std::lock_guard lock(servers_lock);
                 updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables);
             }
diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp
index 2b21c223bd8..5fc5c3618cc 100644
--- a/src/Common/SensitiveDataMasker.cpp
+++ b/src/Common/SensitiveDataMasker.cpp
@@ -104,6 +104,10 @@ void SensitiveDataMasker::setInstance(std::unique_ptr<SensitiveDataMasker> sensi
     {
         sensitive_data_masker = std::move(sensitive_data_masker_);
     }
+    else
+    {
+        sensitive_data_masker.reset();
+    }
 }
 
 SensitiveDataMasker * SensitiveDataMasker::getInstance()
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 75cc5f8366c..8bd4d619349 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4,6 +4,7 @@
 #include <memory>
 #include <Poco/UUID.h>
 #include <Poco/Util/Application.h>
+#include <Common/SensitiveDataMasker.h>
 #include <Common/Macros.h>
 #include <Common/EventNotifier.h>
 #include <Common/Stopwatch.h>
@@ -196,6 +197,9 @@ struct ContextSharedPart : boost::noncopyable
     mutable zkutil::ZooKeeperPtr zookeeper TSA_GUARDED_BY(zookeeper_mutex);                 /// Client for ZooKeeper.
     ConfigurationPtr zookeeper_config TSA_GUARDED_BY(zookeeper_mutex);                      /// Stores zookeeper configs
 
+    mutable std::mutex sensitive_data_masker_mutex;
+    ConfigurationPtr sensitive_data_masker_config;
+
 #if USE_NURAFT
     mutable std::mutex keeper_dispatcher_mutex;
     mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
@@ -3198,6 +3202,18 @@ bool Context::hasAuxiliaryZooKeeper(const String & name) const
     return getConfigRef().has("auxiliary_zookeepers." + name);
 }
 
+void Context::reloadQueryMaskingRulesIfChanged(const ConfigurationPtr & config) const
+{
+    std::lock_guard lock(shared->sensitive_data_masker_mutex);
+
+    const auto old_config = shared->sensitive_data_masker_config;
+    if (old_config && isSameConfiguration(*config, *old_config, "query_masking_rules"))
+        return;
+
+    SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(*config, "query_masking_rules"));
+    shared->sensitive_data_masker_config = config;
+}
+
 InterserverCredentialsPtr Context::getInterserverCredentials() const
 {
     return shared->interserver_io_credentials.get();
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index e12a5c4b69b..f90812df8c1 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -946,6 +946,8 @@ public:
     // Reload Zookeeper
     void reloadZooKeeperIfChanged(const ConfigurationPtr & config) const;
 
+    void reloadQueryMaskingRulesIfChanged(const ConfigurationPtr & config) const;
+
     void setSystemZooKeeperLogAfterInitializationIfNeeded();
 
     /// --- Caches ------------------------------------------------------------------------------------------
diff --git a/tests/integration/test_reload_query_masking_rules/__init__.py b/tests/integration/test_reload_query_masking_rules/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_reload_query_masking_rules/configs/changed_settings.xml b/tests/integration/test_reload_query_masking_rules/configs/changed_settings.xml
new file mode 100644
index 00000000000..d681496d843
--- /dev/null
+++ b/tests/integration/test_reload_query_masking_rules/configs/changed_settings.xml
@@ -0,0 +1,19 @@
+<clickhouse>
+    <query_log>
+        <database>system</database>
+        <table>query_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <max_size_rows>1048576</max_size_rows>
+        <reserved_size_rows>8192</reserved_size_rows>
+        <buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
+        <flush_on_crash>false</flush_on_crash>
+    </query_log>
+
+    <query_masking_rules>
+        <rule>
+            <regexp>TOPSECRET.TOPSECRET</regexp>
+            <replace>[hidden]</replace>
+        </rule>
+    </query_masking_rules>
+</clickhouse>
diff --git a/tests/integration/test_reload_query_masking_rules/configs/empty_settings.xml b/tests/integration/test_reload_query_masking_rules/configs/empty_settings.xml
new file mode 100644
index 00000000000..82647ff82b5
--- /dev/null
+++ b/tests/integration/test_reload_query_masking_rules/configs/empty_settings.xml
@@ -0,0 +1,12 @@
+<clickhouse>
+    <query_log>
+        <database>system</database>
+        <table>query_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <max_size_rows>1048576</max_size_rows>
+        <reserved_size_rows>8192</reserved_size_rows>
+        <buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
+        <flush_on_crash>false</flush_on_crash>
+    </query_log>
+</clickhouse>
diff --git a/tests/integration/test_reload_query_masking_rules/test.py b/tests/integration/test_reload_query_masking_rules/test.py
new file mode 100644
index 00000000000..0f29bd0825e
--- /dev/null
+++ b/tests/integration/test_reload_query_masking_rules/test.py
@@ -0,0 +1,74 @@
+import pytest
+import os
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import assert_eq_with_retry, assert_logs_contain_with_retry
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance("node", user_configs=["configs/empty_settings.xml"])
+
+
+@pytest.fixture(scope="module", autouse=True)
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+@pytest.fixture(autouse=True)
+def reset_to_normal_settings_after_test():
+    try:
+        node.copy_file_to_container(
+            os.path.join(SCRIPT_DIR, "configs/empty_settings.xml"),
+            "/etc/clickhouse-server/config.d/z.xml",
+        )
+        node.query("SYSTEM RELOAD CONFIG")
+        yield
+    finally:
+        pass
+
+
+# @pytest.mark.parametrize("reload_strategy", ["force", "timeout"])
+def test_reload_query_masking_rules():
+    # At first, empty configuration is fed to ClickHouse. The query
+    # "SELECT 'TOPSECRET.TOPSECRET'" will not be redacted, and the new masking
+    # event will not be registered
+    node.query("SELECT 'TOPSECRET.TOPSECRET'")
+    assert_logs_contain_with_retry(node, "SELECT 'TOPSECRET.TOPSECRET'")
+
+    # If there were no 'QueryMaskingRulesMatch' events, the query below returns
+    # 0 rows
+    assert (
+        node.query(
+            "SELECT count(value) FROM system.events WHERE name = 'QueryMaskingRulesMatch'"
+        )
+        == "0\n"
+    )
+
+    node.copy_file_to_container(
+        os.path.join(SCRIPT_DIR, "configs/changed_settings.xml"),
+        "/etc/clickhouse-server/config.d/z.xml",
+    )
+
+    node.query("SYSTEM RELOAD CONFIG")
+
+    # Now the same query will be redacted in the logs and the counter of events
+    # will be incremented
+    node.query("SELECT 'TOPSECRET.TOPSECRET'")
+
+    assert_eq_with_retry(
+        node,
+        "SELECT count(value) FROM system.events WHERE name = 'QueryMaskingRulesMatch'",
+        "1",
+    )
+    assert_logs_contain_with_retry(node, r"SELECT '\[hidden\]'")
+    assert (
+        node.query(
+            "SELECT value FROM system.events WHERE name = 'QueryMaskingRulesMatch'"
+        )
+        == "1\n"
+    )
+
+    node.rotate_logs()

From dfdd1a761e74d49150f927534fa8f8d31bcc1b43 Mon Sep 17 00:00:00 2001
From: Mikhail Koviazin <mikhail.koviazin@aiven.io>
Date: Mon, 13 Nov 2023 13:00:52 +0200
Subject: [PATCH 0517/1097] Move mutex from Context to SensitiveDataMasker

---
 src/Common/SensitiveDataMasker.cpp | 5 +++++
 src/Common/SensitiveDataMasker.h   | 2 ++
 src/Interpreters/Context.cpp       | 3 ---
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp
index 5fc5c3618cc..fd7f45b11c6 100644
--- a/src/Common/SensitiveDataMasker.cpp
+++ b/src/Common/SensitiveDataMasker.cpp
@@ -1,5 +1,6 @@
 #include "SensitiveDataMasker.h"
 
+#include <mutex>
 #include <set>
 #include <string>
 #include <atomic>
@@ -94,9 +95,12 @@ public:
 SensitiveDataMasker::~SensitiveDataMasker() = default;
 
 std::unique_ptr<SensitiveDataMasker> SensitiveDataMasker::sensitive_data_masker = nullptr;
+std::mutex SensitiveDataMasker::instance_mutex;
 
 void SensitiveDataMasker::setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_)
 {
+    std::lock_guard lock(instance_mutex);
+
     if (!sensitive_data_masker_)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: the 'sensitive_data_masker' is not set");
 
@@ -112,6 +116,7 @@ void SensitiveDataMasker::setInstance(std::unique_ptr<SensitiveDataMasker> sensi
 
 SensitiveDataMasker * SensitiveDataMasker::getInstance()
 {
+    std::lock_guard lock(instance_mutex);
     return sensitive_data_masker.get();
 }
 
diff --git a/src/Common/SensitiveDataMasker.h b/src/Common/SensitiveDataMasker.h
index de5cc125dcc..abb613043d1 100644
--- a/src/Common/SensitiveDataMasker.h
+++ b/src/Common/SensitiveDataMasker.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <memory>
+#include <mutex>
 #include <vector>
 #include <cstdint>
 
@@ -45,6 +46,7 @@ class SensitiveDataMasker
 private:
     class MaskingRule;
     std::vector<std::unique_ptr<MaskingRule>> all_masking_rules;
+    static std::mutex instance_mutex;
     static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker;
 
 public:
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 8bd4d619349..d687b48a970 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -197,7 +197,6 @@ struct ContextSharedPart : boost::noncopyable
     mutable zkutil::ZooKeeperPtr zookeeper TSA_GUARDED_BY(zookeeper_mutex);                 /// Client for ZooKeeper.
     ConfigurationPtr zookeeper_config TSA_GUARDED_BY(zookeeper_mutex);                      /// Stores zookeeper configs
 
-    mutable std::mutex sensitive_data_masker_mutex;
     ConfigurationPtr sensitive_data_masker_config;
 
 #if USE_NURAFT
@@ -3204,8 +3203,6 @@ bool Context::hasAuxiliaryZooKeeper(const String & name) const
 
 void Context::reloadQueryMaskingRulesIfChanged(const ConfigurationPtr & config) const
 {
-    std::lock_guard lock(shared->sensitive_data_masker_mutex);
-
     const auto old_config = shared->sensitive_data_masker_config;
     if (old_config && isSameConfiguration(*config, *old_config, "query_masking_rules"))
         return;

From 2fddc176428893867cff92ebbaaf3b91f7229b8c Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Mon, 13 Nov 2023 17:03:55 +0100
Subject: [PATCH 0518/1097] Add test

---
 tests/integration/test_config_substitutions/test.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py
index 692b36f1fae..d2a7a4a7ae4 100644
--- a/tests/integration/test_config_substitutions/test.py
+++ b/tests/integration/test_config_substitutions/test.py
@@ -30,6 +30,12 @@ node6 = cluster.add_instance(
     },
     main_configs=["configs/include_from_source.xml"],
 )
+node7 = cluster.add_instance(
+    "node7",
+    user_configs=["configs/000-config_with_env_subst.xml", "configs/010-env_subst_override.xml"],
+    env_variables={"MAX_QUERY_SIZE": "121212"},
+    instance_env_variables=True,
+) # overridden with 424242
 
 
 @pytest.fixture(scope="module")
@@ -78,6 +84,10 @@ def test_config(start_cluster):
         node6.query("select value from system.settings where name = 'max_query_size'")
         == "99999\n"
     )
+    assert (
+        node7.query("select value from system.settings where name = 'max_query_size'")
+        == "424242\n"
+    )
 
 
 def test_include_config(start_cluster):

From 5bb4eb2383c9d6f98c2b49f1059ab4dab5ef3ec9 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Mon, 13 Nov 2023 17:04:26 +0100
Subject: [PATCH 0519/1097] Fix

---
 src/Common/Config/ConfigProcessor.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index fe16313c0bf..3d64eac6fa6 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -326,6 +326,11 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
                     NodePtr new_node = config->importNode(with_node, true);
                     config_root->replaceChild(new_node, config_node);
                 }
+                else if (with_element.hasChildNodes() && with_element.firstChild()->nodeType() == Node::TEXT_NODE)
+                {
+                    NodePtr new_node = config->importNode(with_node, true);
+                    config_root->replaceChild(new_node, config_node);
+                }
                 else
                 {
                     Element & config_element = dynamic_cast<Element &>(*config_node);

From cc3dfafd7b1b1203662c068aa120f87bf9584aae Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 13 Nov 2023 19:05:20 +0000
Subject: [PATCH 0520/1097] Automatic style fix

---
 tests/integration/test_config_substitutions/test.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py
index d2a7a4a7ae4..46961e5da71 100644
--- a/tests/integration/test_config_substitutions/test.py
+++ b/tests/integration/test_config_substitutions/test.py
@@ -32,10 +32,13 @@ node6 = cluster.add_instance(
 )
 node7 = cluster.add_instance(
     "node7",
-    user_configs=["configs/000-config_with_env_subst.xml", "configs/010-env_subst_override.xml"],
+    user_configs=[
+        "configs/000-config_with_env_subst.xml",
+        "configs/010-env_subst_override.xml",
+    ],
     env_variables={"MAX_QUERY_SIZE": "121212"},
     instance_env_variables=True,
-) # overridden with 424242
+)  # overridden with 424242
 
 
 @pytest.fixture(scope="module")

From 3e6a5d82086842cf66431906a4fefdceb740648e Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Tue, 14 Nov 2023 12:22:32 +0000
Subject: [PATCH 0521/1097] merge_row_policy: some comments

---
 src/Storages/StorageMerge.cpp | 2 ++
 src/Storages/StorageMerge.h   | 9 ++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 5d5c4b716ec..1eb93f13e9b 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -634,6 +634,8 @@ void ReadFromMerge::processAliases(
                         storage_metadata_snapshot->getColumns().getAll(), context);
                     column_expr = setAlias(column_expr, column);
 
+                    /// use storage type for transient columns that are not represented in result
+                    ///  e.g. for columns that needed to evaluate row policy
                     auto type = sample_block.has(column) ? sample_block.getByName(column).type : column_description.type;
 
                     aliases.push_back({ .name = column, .type = type, .expression = column_expr->clone() });
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 71680b90072..d6b4c5d8ad2 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -170,9 +170,9 @@ private:
 
     struct AliasData
     {
-        String name;
-        DataTypePtr type;
-        ASTPtr expression;
+        String name;       /// "size" in  "size String Alias formatReadableSize(size_bytes)"
+        DataTypePtr type;  /// String in "size String Alias formatReadableSize(size_bytes)", or something different came from query
+        ASTPtr expression; /// formatReadableSize(size_bytes) in "size String Alias formatReadableSize(size_bytes)"
     };
 
     using Aliases = std::vector<AliasData>;
@@ -184,6 +184,9 @@ private:
         const StorageWithLockAndName & storage_with_lock_and_name,
         const StorageSnapshotPtr & storage_snapshot);
 
+    /// Populates AliasData structures for further processing
+    ///   using types from result query if possible
+    /// and removes alias columns from real_column_names
     void processAliases(
         Names & real_column_names,
         const StorageWithLockAndName & storage_with_lock,

From d5907e10de95bc9a3e50f877338fae29dfc3d3ea Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 14 Nov 2023 12:51:25 +0000
Subject: [PATCH 0522/1097] Fixing tests.

---
 .../PredicateExpressionsOptimizer.cpp         |  4 ++-
 src/Storages/StorageInput.cpp                 | 25 +++++++++++++------
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp
index 885c99aeb90..8dc8c1c92cc 100644
--- a/src/Interpreters/PredicateExpressionsOptimizer.cpp
+++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp
@@ -84,8 +84,10 @@ std::vector<ASTs> PredicateExpressionsOptimizer::extractTablesPredicates(const A
             return {};   /// Not optimized when predicate contains stateful function or indeterministic function or window functions
         }
 
+        /// Skip predicate like `... IN (SELECT ... FROM input())` because
+        /// it can be duplicated but we can't execute `input()` twice.
         if (hasInputTableFunction(predicate_expression))
-            return {}; /// Not optimized when predicate contains input table function
+            return {};
 
         if (!expression_info.is_array_join)
         {
diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp
index 2314d3fb581..7f1eeaedfb1 100644
--- a/src/Storages/StorageInput.cpp
+++ b/src/Storages/StorageInput.cpp
@@ -5,6 +5,7 @@
 
 #include <memory>
 #include <Processors/ISource.h>
+#include <Processors/Sources/ThrowingExceptionSource.h>
 #include <Processors/QueryPlan/ISourceStep.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <QueryPipeline/Pipe.h>
@@ -60,16 +61,16 @@ public:
 
     ReadFromInput(
         Block sample_block,
-        //StorageSnapshotPtr storage_snapshot_,
+        Pipe pipe_,
         StorageInput & storage_)
         : ISourceStep(DataStream{.header = std::move(sample_block)})
-        //, storage_snapshot(std::move(storage_snapshot_))
+        , pipe(std::move(pipe_))
         , storage(storage_)
     {
     }
 
 private:
-    //StorageSnapshotPtr storage_snapshot;
+    Pipe pipe;
     StorageInput & storage;
 };
 
@@ -85,21 +86,20 @@ void StorageInput::read(
 {
     storage_snapshot->check(column_names);
     Block sample_block = storage_snapshot->metadata->getSampleBlock();
+    Pipe input_source_pipe;
 
     auto query_context = context->getQueryContext();
     /// It is TCP request if we have callbacks for input().
-    if (!was_pipe_initialized && query_context->getInputBlocksReaderCallback())
+    if (query_context->getInputBlocksReaderCallback())
     {
         /// Send structure to the client.
         query_context->initializeInput(shared_from_this());
+        input_source_pipe = Pipe(std::make_shared<StorageInputSource>(query_context, sample_block));
     }
 
-    if (!was_pipe_initialized)
-        throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "Input stream is not initialized, input() must be used only in INSERT SELECT query");
-
     auto reading = std::make_unique<ReadFromInput>(
         std::move(sample_block),
-        //storage_snapshot,
+        std::move(input_source_pipe),
         *this);
 
     query_plan.addStep(std::move(reading));
@@ -107,6 +107,15 @@ void StorageInput::read(
 
 void ReadFromInput::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
+    if (!pipe.empty())
+    {
+        pipeline.init(std::move(pipe));
+        return;
+    }
+
+    if (!storage.was_pipe_initialized)
+        throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "Input stream is not initialized, input() must be used only in INSERT SELECT query");
+
     if (storage.was_pipe_used)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to read from input() twice.");
 

From 22a9f71393e0087ed45f28b81a007a153b3f5cd7 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 14 Nov 2023 13:15:14 +0000
Subject: [PATCH 0523/1097] fixed due to review

---
 .../Formats/Impl/NpyRowInputFormat.cpp        |  39 +++++++++---------
 .../Formats/Impl/NpyRowInputFormat.h          |   5 ++-
 .../0_stateless/02895_npy_format.reference    |   3 ++
 tests/queries/0_stateless/02895_npy_format.sh |   2 +
 .../0_stateless/data_npy/npy_inf_nan_null.npy | Bin 0 -> 134 bytes
 5 files changed, 29 insertions(+), 20 deletions(-)
 create mode 100644 tests/queries/0_stateless/data_npy/npy_inf_nan_null.npy

diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
index 9e985dffe0c..a7fb15fa375 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
@@ -32,6 +32,8 @@ namespace
 
 float convertFloat16ToFloat32(uint16_t float16_value)
 {
+    if (float16_value == 0000000000000000)
+        return float(0);
     uint16_t sign = (float16_value >> 15) & 0x1;
     uint16_t exponent = (float16_value >> 10) & 0x1F;
     uint16_t fraction = float16_value & 0x3FF;
@@ -298,26 +300,25 @@ NpyRowInputFormat::NpyRowInputFormat(ReadBuffer & in_, Block header_, Params par
 }
 
 template <typename ColumnValue, typename DataValue>
-void NpyRowInputFormat::readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness, bool isFloat16)
+void NpyRowInputFormat::readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness)
 {
-    if (!isFloat16)
-    {
-        DataValue value;
-        if (endianness == NumpyDataType::Endianness::BIG)
-            readBinaryBigEndian(value, *in);
-        else
-            readBinaryLittleEndian(value, *in);
-        assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue((static_cast<ColumnValue>(value)));
-    }
+    DataValue value;
+    if (endianness == NumpyDataType::Endianness::BIG)
+        readBinaryBigEndian(value, *in);
     else
-    {
-        uint16_t value;
-        if (endianness == NumpyDataType::Endianness::BIG)
-            readBinaryBigEndian(value, *in);
-        else
-            readBinaryLittleEndian(value, *in);
-        assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue(static_cast<ColumnValue>(convertFloat16ToFloat32(value)));
-    }
+        readBinaryLittleEndian(value, *in);
+    assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue((static_cast<ColumnValue>(value)));
+}
+
+template <typename ColumnValue>
+void NpyRowInputFormat::readBinaryValueAndInsertFloat16(MutableColumnPtr column, NumpyDataType::Endianness endianness)
+{
+    uint16_t value;
+    if (endianness == NumpyDataType::Endianness::BIG)
+        readBinaryBigEndian(value, *in);
+    else
+        readBinaryLittleEndian(value, *in);
+    assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue(static_cast<ColumnValue>(convertFloat16ToFloat32(value)));
 }
 
 template <typename T>
@@ -344,7 +345,7 @@ void NpyRowInputFormat::readAndInsertFloat(IColumn * column, const DataTypePtr &
 {
     switch (npy_type.getTypeIndex())
     {
-        case NumpyDataTypeIndex::Float16: readBinaryValueAndInsert<T, Float32>(column->getPtr(), npy_type.getEndianness(), true); break;
+        case NumpyDataTypeIndex::Float16: readBinaryValueAndInsertFloat16<T>(column->getPtr(), npy_type.getEndianness()); break;
         case NumpyDataTypeIndex::Float32: readBinaryValueAndInsert<T, Float32>(column->getPtr(), npy_type.getEndianness()); break;
         case NumpyDataTypeIndex::Float64: readBinaryValueAndInsert<T, Float64>(column->getPtr(), npy_type.getEndianness()); break;
         default:
diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.h b/src/Processors/Formats/Impl/NpyRowInputFormat.h
index 2bc28121d11..358dac72f5a 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.h
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.h
@@ -43,7 +43,10 @@ private:
     void readAndInsertString(MutableColumnPtr column, const DataTypePtr & data_type, const NumpyDataType & npy_type, bool is_fixed);
 
     template <typename ColumnValue, typename DataValue>
-    void readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness, bool isFloat16 = false);
+    void readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness);
+
+    template <typename ColumnValue>
+    void readBinaryValueAndInsertFloat16(MutableColumnPtr column, NumpyDataType::Endianness endianness);
 
     void readRows(MutableColumns & columns);
 
diff --git a/tests/queries/0_stateless/02895_npy_format.reference b/tests/queries/0_stateless/02895_npy_format.reference
index e1c168d1c67..76c8a7a2abf 100644
--- a/tests/queries/0_stateless/02895_npy_format.reference
+++ b/tests/queries/0_stateless/02895_npy_format.reference
@@ -86,3 +86,6 @@ c
 1
 [2.199219,1.099609,3.300781]
 [4.25,3.34961,6.628906]
+inf
+nan
+0
diff --git a/tests/queries/0_stateless/02895_npy_format.sh b/tests/queries/0_stateless/02895_npy_format.sh
index b60f324467d..c4fb2e2f67d 100755
--- a/tests/queries/0_stateless/02895_npy_format.sh
+++ b/tests/queries/0_stateless/02895_npy_format.sh
@@ -58,3 +58,5 @@ $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy',
 $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/complex.npy')" 2>&1 | grep -c "BAD_ARGUMENTS"
 
 $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/float_16.npy')"
+
+$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/npy_inf_nan_null.npy')"
diff --git a/tests/queries/0_stateless/data_npy/npy_inf_nan_null.npy b/tests/queries/0_stateless/data_npy/npy_inf_nan_null.npy
new file mode 100644
index 0000000000000000000000000000000000000000..12ee359f6658be20810b5c40a53874753eed1280
GIT binary patch
literal 134
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+r@qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
bXCxM+0{I%oI+{8PwF(pfE`}P0ItB&+`TQJG

literal 0
HcmV?d00001


From 9ce9dc86d38662aa4163a5047047639b88c21945 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 14 Nov 2023 13:27:32 +0000
Subject: [PATCH 0524/1097] fixed due to review

---
 .../02908_Npy_files_caching.reference         |   6 +++++-
 .../0_stateless/02908_Npy_files_caching.sh    |   6 ++++++
 .../queries/0_stateless/data_npy/npy_big.npy  | Bin 0 -> 8000128 bytes
 3 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/data_npy/npy_big.npy

diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.reference b/tests/queries/0_stateless/02908_Npy_files_caching.reference
index 6159dd69b57..5f5e5f6282d 100644
--- a/tests/queries/0_stateless/02908_Npy_files_caching.reference
+++ b/tests/queries/0_stateless/02908_Npy_files_caching.reference
@@ -2,4 +2,8 @@
 3
 3
 array	Int64					
-3
+\N
+1000000
+1000000
+array	Int64					
+\N
diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.sh b/tests/queries/0_stateless/02908_Npy_files_caching.sh
index f77351f1180..4845f740972 100755
--- a/tests/queries/0_stateless/02908_Npy_files_caching.sh
+++ b/tests/queries/0_stateless/02908_Npy_files_caching.sh
@@ -11,3 +11,9 @@ $CLICKHOUSE_LOCAL -nm -q "
 desc file('$CURDIR/data_npy/one_dim.npy');
 select number_of_rows from system.schema_inference_cache where format='Npy';
 "
+$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=0"
+$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=1"
+$CLICKHOUSE_LOCAL -nm -q "
+desc file('$CURDIR/data_npy/npy_big.npy');
+select number_of_rows from system.schema_inference_cache where format='Npy';
+"
diff --git a/tests/queries/0_stateless/data_npy/npy_big.npy b/tests/queries/0_stateless/data_npy/npy_big.npy
new file mode 100644
index 0000000000000000000000000000000000000000..7dc3c2bf6009c9f3c09d70779e4c35a45c9ac111
GIT binary patch
literal 8000128
zcmeFxu?_)26a~=L`ikiW*@8|)ts@$RN;Y<cM37n0h_CR&cKHMKxy{SG^5xEXuQto|
zwpTogXRK4R&v6#w{4|Yah=*&w=jyV%<~nuzMRjgcyKj!wEw%5#Wc*DzEW<1OmHw#s
z#dL@60(zhadf*>$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70
z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1
z4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4
zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G
z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$
zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0
z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0
z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2
z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8
zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W
zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h
z00%h00S<70103K02ROh14sd`29N@sGojM=^0000G^#64`0v(|TJm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
z4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%
zJm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5
z@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVK
zzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_
z0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z
z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>
z9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kj
zc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A
z-~kVKzyluefCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzylue
zfCoI_0S|b<10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<
z10L{z2Rz^b4|u=>9`Jw%Jm3Kjc)$Z5@PG$A-~kVKzyluefCoI_0S|b<10L{z2Rz^b
T4|u=>9`Jw%Jm3Kjc;NX!bS59p

literal 0
HcmV?d00001


From bd52e9833e1935caaa4b6cd26b9d002363d4a75a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 14 Nov 2023 14:33:20 +0000
Subject: [PATCH 0525/1097] Try to fix stateful test

---
 tests/queries/1_stateful/00178_gcd_codec.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/1_stateful/00178_gcd_codec.sql b/tests/queries/1_stateful/00178_gcd_codec.sql
index 443075b1b24..04715939d15 100644
--- a/tests/queries/1_stateful/00178_gcd_codec.sql
+++ b/tests/queries/1_stateful/00178_gcd_codec.sql
@@ -7,7 +7,7 @@ CREATE TABLE hits_gcd (`WatchID` UInt64 CODEC (GCD,LZ4), `JavaEnable` UInt8 CODE
         SAMPLE BY intHash32(UserID);
 
 
-INSERT INTO hits_gcd SELECT * FROM hits;
+INSERT INTO hits_gcd SELECT * FROM test.hits;
 SELECT * FROM hits_gcd FORMAT Null;
 
-DROP TABLE IF EXISTS hits_gcd;
\ No newline at end of file
+DROP TABLE IF EXISTS hits_gcd;

From 2f9ac9b49cc8b2f8fbd7e824138855f73403ce38 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 14 Nov 2023 14:33:34 +0000
Subject: [PATCH 0526/1097] Address comments

---
 src/Backups/BackupCoordinationFileInfos.cpp   | 81 +++++++++++++++++--
 src/Backups/BackupFileInfo.h                  |  4 +
 src/Backups/BackupIO.h                        |  2 +
 src/Backups/BackupIO_Default.cpp              |  2 +
 src/Backups/BackupIO_Disk.cpp                 |  9 +++
 src/Backups/BackupIO_Disk.h                   |  2 +
 src/Backups/BackupIO_File.cpp                 | 10 +++
 src/Backups/BackupIO_File.h                   |  2 +
 src/Backups/BackupIO_S3.cpp                   | 18 +++++
 src/Backups/BackupIO_S3.h                     |  2 +
 src/Backups/BackupImpl.cpp                    | 27 ++-----
 .../test_backup_restore_keeper_map/test.py    | 28 ++++---
 12 files changed, 149 insertions(+), 38 deletions(-)

diff --git a/src/Backups/BackupCoordinationFileInfos.cpp b/src/Backups/BackupCoordinationFileInfos.cpp
index ac2cbc337aa..b17b755b966 100644
--- a/src/Backups/BackupCoordinationFileInfos.cpp
+++ b/src/Backups/BackupCoordinationFileInfos.cpp
@@ -80,24 +80,76 @@ void BackupCoordinationFileInfos::prepare() const
 
     if (plain_backup)
     {
+        std::vector<BackupFileInfo *> unresolved_references;
+        std::unordered_map<std::string_view, BackupFileInfo *> file_name_to_info;
+
+        const auto try_resolve_reference = [&](BackupFileInfo & reference)
+        {
+            auto it = file_name_to_info.find(reference.reference_target);
+
+            if (it == file_name_to_info.end())
+                return false;
+
+            auto & target_info = it->second;
+            target_info->reference_sources.push_back(reference.file_name);
+            reference.size = target_info->size;
+            total_size_of_files += reference.size;
+            reference.checksum = target_info->checksum;
+            return true;
+        };
+
         /// For plain backup all file infos are stored as is, without checking for duplicates or skipping empty files.
         for (size_t i = 0; i != file_infos_for_all_hosts.size(); ++i)
         {
             auto & info = *(file_infos_for_all_hosts[i]);
-
-            if (!info.reference_target.empty())
-                continue;
-
             info.data_file_name = info.file_name;
             info.data_file_index = i;
             info.base_size = 0; /// Base backup must not be used while creating a plain backup.
             info.base_checksum = 0;
-            total_size_of_files += info.size;
+
+            if (info.reference_target.empty())
+            {
+                file_name_to_info.emplace(info.file_name, &info);
+                total_size_of_files += info.size;
+            }
+            else if (!try_resolve_reference(info))
+            {
+                unresolved_references.push_back(&info);
+            }
         }
+
+        for (auto * reference : unresolved_references)
+        {
+            if (!try_resolve_reference(*reference))
+                throw DB::Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "Couldn't resolve reference {} with target {}",
+                    reference->file_name,
+                    reference->reference_target);
+        }
+
         num_files = file_infos_for_all_hosts.size();
     }
     else
     {
+        std::vector<BackupFileInfo *> unresolved_references;
+        std::unordered_map<std::string_view, BackupFileInfo *> file_name_to_info;
+
+        const auto try_resolve_reference = [&](BackupFileInfo & reference)
+        {
+            auto it = file_name_to_info.find(reference.reference_target);
+
+            if (it == file_name_to_info.end())
+                return false;
+
+            auto & target_info = it->second;
+            reference.size = target_info->size;
+            reference.checksum = target_info->checksum;
+            reference.data_file_name = target_info->data_file_name;
+            reference.data_file_index = target_info->data_file_index;
+            return true;
+        };
+
         /// For non-plain backups files with the same size and checksum are stored only once,
         /// in order to find those files we'll use this map.
         std::map<SizeAndChecksum, size_t> data_file_index_by_checksum;
@@ -107,7 +159,12 @@ void BackupCoordinationFileInfos::prepare() const
             auto & info = *(file_infos_for_all_hosts[i]);
 
             if (!info.reference_target.empty())
+            {
+                if (!try_resolve_reference(info))
+                    unresolved_references.push_back(&info);
+
                 continue;
+            }
 
             if (info.size == info.base_size)
             {
@@ -134,7 +191,21 @@ void BackupCoordinationFileInfos::prepare() const
                     info.data_file_name = file_infos_for_all_hosts[it->second]->data_file_name;
                 }
             }
+
+            file_name_to_info.emplace(info.file_name, &info);
         }
+
+        for (auto * reference : unresolved_references)
+        {
+            if (!try_resolve_reference(*reference))
+                throw DB::Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "Couldn't resolve reference {} with target {}",
+                    reference->file_name,
+                    reference->reference_target);
+        }
+
+        num_files = file_infos_for_all_hosts.size();
     }
 
     prepared = true;
diff --git a/src/Backups/BackupFileInfo.h b/src/Backups/BackupFileInfo.h
index 1d5607fd418..42bda3aa6ed 100644
--- a/src/Backups/BackupFileInfo.h
+++ b/src/Backups/BackupFileInfo.h
@@ -42,6 +42,10 @@ struct BackupFileInfo
     /// Set if this file is just a reference to another file
     String reference_target;
 
+    /// List of files that are referencing this file
+    /// Used for plain backup which needs to resolve all references
+    Strings reference_sources;
+
     struct LessByFileName
     {
         bool operator()(const BackupFileInfo & lhs, const BackupFileInfo & rhs) const { return (lhs.file_name < rhs.file_name); }
diff --git a/src/Backups/BackupIO.h b/src/Backups/BackupIO.h
index e4a82a604e8..91d57e5ab0a 100644
--- a/src/Backups/BackupIO.h
+++ b/src/Backups/BackupIO.h
@@ -61,6 +61,8 @@ public:
     virtual void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
                                   bool copy_encrypted, UInt64 start_pos, UInt64 length) = 0;
 
+    virtual void copyFile(const String & destination, const String & source, size_t size) = 0;
+
     virtual void removeFile(const String & file_name) = 0;
     virtual void removeFiles(const Strings & file_names) = 0;
 
diff --git a/src/Backups/BackupIO_Default.cpp b/src/Backups/BackupIO_Default.cpp
index 5ac522695ce..95f2c66b6b9 100644
--- a/src/Backups/BackupIO_Default.cpp
+++ b/src/Backups/BackupIO_Default.cpp
@@ -91,4 +91,6 @@ void BackupWriterDefault::copyFileFromDisk(const String & path_in_backup, DiskPt
 
     copyDataToFile(path_in_backup, create_read_buffer, start_pos, length);
 }
+
+
 }
diff --git a/src/Backups/BackupIO_Disk.cpp b/src/Backups/BackupIO_Disk.cpp
index 1e260ad22d9..91e8b97bc20 100644
--- a/src/Backups/BackupIO_Disk.cpp
+++ b/src/Backups/BackupIO_Disk.cpp
@@ -128,4 +128,13 @@ void BackupWriterDisk::copyFileFromDisk(const String & path_in_backup, DiskPtr s
     BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
 }
 
+void BackupWriterDisk::copyFile(const String & destination, const String & source, size_t /*size*/)
+{
+    LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
+    auto dest_file_path = root_path / destination;
+    auto src_file_path = root_path / source;
+    disk->createDirectories(dest_file_path.parent_path());
+    disk->copyFile(src_file_path, *disk, dest_file_path, read_settings, write_settings);
+}
+
 }
diff --git a/src/Backups/BackupIO_Disk.h b/src/Backups/BackupIO_Disk.h
index 70d31eacc1a..575ec3f5707 100644
--- a/src/Backups/BackupIO_Disk.h
+++ b/src/Backups/BackupIO_Disk.h
@@ -44,6 +44,8 @@ public:
     void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
                           bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
 
+    void copyFile(const String & destination, const String & source, size_t size) override;
+
     void removeFile(const String & file_name) override;
     void removeFiles(const Strings & file_names) override;
 
diff --git a/src/Backups/BackupIO_File.cpp b/src/Backups/BackupIO_File.cpp
index 2bedb5470fb..5384637a969 100644
--- a/src/Backups/BackupIO_File.cpp
+++ b/src/Backups/BackupIO_File.cpp
@@ -152,4 +152,14 @@ void BackupWriterFile::copyFileFromDisk(const String & path_in_backup, DiskPtr s
     BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
 }
 
+void BackupWriterFile::copyFile(const String & destination, const String & source, size_t /*size*/)
+{
+    LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
+
+    auto abs_source_path = root_path / source;
+    auto abs_dest_path = root_path / destination;
+    fs::create_directories(abs_dest_path.parent_path());
+    fs::copy(abs_source_path, abs_dest_path, fs::copy_options::overwrite_existing);
+}
+
 }
diff --git a/src/Backups/BackupIO_File.h b/src/Backups/BackupIO_File.h
index 6bb4b11e134..ebe9a0f02cb 100644
--- a/src/Backups/BackupIO_File.h
+++ b/src/Backups/BackupIO_File.h
@@ -38,6 +38,8 @@ public:
     void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
                           bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
 
+    void copyFile(const String & destination, const String & source, size_t size) override;
+
     void removeFile(const String & file_name) override;
     void removeFiles(const Strings & file_names) override;
 
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 8bb2f895e38..9688d7f0730 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -249,6 +249,24 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
     BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
 }
 
+void BackupWriterS3::copyFile(const String & destination, const String & source, size_t size)
+{
+    LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
+    copyS3File(
+        client,
+        client,
+        /* src_bucket */ s3_uri.bucket,
+        /* src_key= */ fs::path(s3_uri.key) / source,
+        0,
+        size,
+        s3_uri.bucket,
+        fs::path(s3_uri.key) / destination,
+        s3_settings.request_settings,
+        read_settings,
+        {},
+        threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
+}
+
 void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
     copyDataToS3File(create_read_buffer, start_pos, length, client, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index 4abcbedf89f..c00ce747ff5 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -49,6 +49,8 @@ public:
     void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
                           bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
 
+    void copyFile(const String & destination, const String & source, size_t size) override;
+
     void removeFile(const String & file_name) override;
     void removeFiles(const Strings & file_names) override;
 
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index bf1853828df..56c30fab5c2 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -362,10 +362,10 @@ void BackupImpl::writeBackupMetadata()
         *out << "<file>";
 
         *out << "<name>" << xml << info.file_name << "</name>";
+        *out << "<size>" << info.size << "</size>";
 
         if (info.size)
         {
-            *out << "<size>" << info.size << "</size>";
             *out << "<checksum>" << hexChecksum(info.checksum) << "</checksum>";
             if (info.base_size)
             {
@@ -381,10 +381,6 @@ void BackupImpl::writeBackupMetadata()
             if (info.encrypted_by_disk)
                 *out << "<encrypted_by_disk>true</encrypted_by_disk>";
         }
-        else if (!info.reference_target.empty())
-            *out << "<reference_target>" << xml << info.reference_target << "</reference_target>";
-        else
-            *out << "<size>" << info.size << "</size>";
 
         total_size += info.size;
         bool has_entry = !deduplicate_files || (info.size && (info.size != info.base_size) && (info.data_file_name.empty() || (info.data_file_name == info.file_name)));
@@ -465,13 +461,6 @@ void BackupImpl::readBackupMetadata()
             BackupFileInfo info;
             info.file_name = getString(file_config, "name");
 
-            info.reference_target = getString(file_config, "reference_target", "");
-            if (!info.reference_target.empty())
-            {
-                reference_files.emplace_back(std::move(info.file_name), std::move(info.reference_target));
-                continue;
-            }
-
             info.size = getUInt64(file_config, "size");
             if (info.size)
             {
@@ -521,14 +510,6 @@ void BackupImpl::readBackupMetadata()
         }
     }
 
-    for (auto & [source_file, target_file] : reference_files)
-    {
-        auto it = file_names.find(target_file);
-        if (it == file_names.end())
-            throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup entry {} referenced by {} not found", target_file, source_file);
-        file_names.emplace(std::move(source_file), it->second);
-    }
-
     uncompressed_size = size_of_entries + str.size();
     compressed_size = uncompressed_size;
     if (!use_archive)
@@ -954,6 +935,12 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
         writer->copyDataToFile(info.data_file_name, create_read_buffer, info.base_size, info.size - info.base_size);
     }
 
+    if (!deduplicate_files)
+    {
+        for (const auto & reference : info.reference_sources)
+            writer->copyFile(reference, info.data_file_name, info.size - info.base_size);
+    }
+
     {
         std::lock_guard lock{mutex};
         ++num_entries;
diff --git a/tests/integration/test_backup_restore_keeper_map/test.py b/tests/integration/test_backup_restore_keeper_map/test.py
index 8343ad3177f..c401f482c3f 100644
--- a/tests/integration/test_backup_restore_keeper_map/test.py
+++ b/tests/integration/test_backup_restore_keeper_map/test.py
@@ -65,22 +65,24 @@ def new_backup_name(base_name):
     return f"Disk('backups', '{base_name}{backup_id_counter}')"
 
 
-def test_on_cluster():
-    node1.query_with_retry("CREATE DATABASE keeper_backup ON CLUSTER cluster")
+@pytest.mark.parametrize("deduplicate_files", [0, 1])
+def test_on_cluster(deduplicate_files):
+    database_name = f"keeper_backup{deduplicate_files}"
+    node1.query_with_retry(f"CREATE DATABASE {database_name} ON CLUSTER cluster")
     node1.query_with_retry(
-        "CREATE TABLE keeper_backup.keeper1 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster1') PRIMARY KEY key"
+        f"CREATE TABLE {database_name}.keeper1 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/{database_name}/test_on_cluster1') PRIMARY KEY key"
     )
     node1.query_with_retry(
-        "CREATE TABLE keeper_backup.keeper2 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster1') PRIMARY KEY key"
+        f"CREATE TABLE {database_name}.keeper2 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/{database_name}/test_on_cluster1') PRIMARY KEY key"
     )
     node1.query_with_retry(
-        "CREATE TABLE keeper_backup.keeper3 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/test_on_cluster2') PRIMARY KEY key"
+        f"CREATE TABLE {database_name}.keeper3 ON CLUSTER cluster (key UInt64, value String) Engine=KeeperMap('/{database_name}/test_on_cluster2') PRIMARY KEY key"
     )
     node1.query_with_retry(
-        "INSERT INTO keeper_backup.keeper2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5"
+        f"INSERT INTO {database_name}.keeper2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5"
     )
     node1.query_with_retry(
-        "INSERT INTO keeper_backup.keeper3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5"
+        f"INSERT INTO {database_name}.keeper3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5"
     )
 
     expected_result = "".join(f"{i}\ttest{i}\n" for i in range(5))
@@ -89,7 +91,7 @@ def test_on_cluster():
         for node in [node1, node2, node3]:
             for i in range(1, 4):
                 result = node.query_with_retry(
-                    f"SELECT key, value FROM keeper_backup.keeper{i} ORDER BY key FORMAT TSV"
+                    f"SELECT key, value FROM {database_name}.keeper{i} ORDER BY key FORMAT TSV"
                 )
                 assert result == expected_result
 
@@ -97,10 +99,10 @@ def test_on_cluster():
 
     backup_name = new_backup_name("test_on_cluster")
     node1.query(
-        f"BACKUP DATABASE keeper_backup ON CLUSTER cluster TO {backup_name} SETTINGS async = false;"
+        f"BACKUP DATABASE {database_name} ON CLUSTER cluster TO {backup_name} SETTINGS async = false, deduplicate_files = {deduplicate_files};"
     )
 
-    node1.query("DROP DATABASE keeper_backup ON CLUSTER cluster SYNC;")
+    node1.query(f"DROP DATABASE {database_name} ON CLUSTER cluster SYNC;")
 
     def apply_for_all_nodes(f):
         for node in [node1, node2, node3]:
@@ -121,14 +123,14 @@ def test_on_cluster():
     apply_for_all_nodes(lambda node: node.start_clickhouse())
 
     node1.query(
-        f"RESTORE DATABASE keeper_backup ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;"
+        f"RESTORE DATABASE {database_name} ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;"
     )
 
     verify_data()
 
-    node1.query("DROP TABLE keeper_backup.keeper3 ON CLUSTER cluster SYNC;")
+    node1.query(f"DROP TABLE {database_name}.keeper3 ON CLUSTER cluster SYNC;")
     node1.query(
-        f"RESTORE TABLE keeper_backup.keeper3 ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;"
+        f"RESTORE TABLE {database_name}.keeper3 ON CLUSTER cluster FROM {backup_name} SETTINGS async = false;"
     )
 
     verify_data()

From ecef6db1fe862e561edd22148da0c3ff43f150ba Mon Sep 17 00:00:00 2001
From: Igor Nikonov <954088+devcrafter@users.noreply.github.com>
Date: Tue, 14 Nov 2023 15:36:50 +0100
Subject: [PATCH 0527/1097] Fix: RabbitMQ OpenSSL dynamic loading issue
 (#56703)

* Fix: RabbitMQ OpenSSL dynamic loading issue
* Update RabbitMQ
* Fix minor glitch with TLS_client_method()
https://github.com/ClickHouse/AMQP-CPP/commit/00f09897ce020a84e38f87dc416af4a19c5da9ae

---------

Co-authored-by: Robert Schulze <robert@clickhouse.com>
---
 contrib/AMQP-CPP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/AMQP-CPP b/contrib/AMQP-CPP
index 818c2d8ad96..00f09897ce0 160000
--- a/contrib/AMQP-CPP
+++ b/contrib/AMQP-CPP
@@ -1 +1 @@
-Subproject commit 818c2d8ad96a08a5d20fece7d1e1e8855a2b0860
+Subproject commit 00f09897ce020a84e38f87dc416af4a19c5da9ae

From 094eebde4fd783023887267227e1837483204d40 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 8 Nov 2023 13:14:05 +0100
Subject: [PATCH 0528/1097] Update the runner version

---
 tests/ci/worker/prepare-ci-ami.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh
index 20e7e3fd53e..35a98b8e7d1 100644
--- a/tests/ci/worker/prepare-ci-ami.sh
+++ b/tests/ci/worker/prepare-ci-ami.sh
@@ -9,7 +9,7 @@ set -xeuo pipefail
 
 echo "Running prepare script"
 export DEBIAN_FRONTEND=noninteractive
-export RUNNER_VERSION=2.304.0
+export RUNNER_VERSION=2.311.0
 export RUNNER_HOME=/home/ubuntu/actions-runner
 
 deb_arch() {

From 0c0f70d26d822ca85166fad6b27c847c55cd5095 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 8 Nov 2023 13:14:46 +0100
Subject: [PATCH 0529/1097] Add the installing of azure cli

---
 tests/ci/worker/prepare-ci-ami.sh | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh
index 35a98b8e7d1..7aec9c5dfdd 100644
--- a/tests/ci/worker/prepare-ci-ami.sh
+++ b/tests/ci/worker/prepare-ci-ami.sh
@@ -56,12 +56,12 @@ apt-get install --yes --no-install-recommends \
     qemu-user-static \
     unzip
 
+# Install docker
 curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
 
 echo "deb [arch=$(deb_arch) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
 
 apt-get update
-
 apt-get install --yes --no-install-recommends docker-ce docker-buildx-plugin docker-ce-cli containerd.io
 
 usermod -aG docker ubuntu
@@ -81,6 +81,14 @@ cat <<EOT > /etc/docker/daemon.json
 }
 EOT
 
+# Install azure-cli
+curl -sLS https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/keyrings/microsoft.gpg
+AZ_DIST=$(lsb_release -cs)
+echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ $AZ_DIST main" | tee /etc/apt/sources.list.d/azure-cli.list
+
+apt-get update
+apt-get install --yes --no-install-recommends azure-cli
+
 # Increase the limit on number of virtual memory mappings to aviod 'Cannot mmap' error
 echo "vm.max_map_count = 2097152" > /etc/sysctl.d/01-increase-map-counts.conf
 

From e77c9fbd53ee89febe788600a47a9667b7457409 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 8 Nov 2023 14:02:28 +0100
Subject: [PATCH 0530/1097] Add TOE in the comment, find and fix possible
 issues for the second run

---
 tests/ci/worker/prepare-ci-ami.sh | 43 +++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh
index 7aec9c5dfdd..c27d956c834 100644
--- a/tests/ci/worker/prepare-ci-ami.sh
+++ b/tests/ci/worker/prepare-ci-ami.sh
@@ -96,10 +96,12 @@ systemctl restart docker
 
 # buildx builder is user-specific
 sudo -u ubuntu docker buildx version
+sudo -u ubuntu docker buildx rm default-builder || : # if it's the second attempt
 sudo -u ubuntu docker buildx create --use --name default-builder
 
 pip install boto3 pygithub requests urllib3 unidiff dohq-artifactory
 
+rm -rf $RUNNER_HOME  # if it's the second attempt
 mkdir -p $RUNNER_HOME && cd $RUNNER_HOME
 
 RUNNER_ARCHIVE="actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz"
@@ -138,3 +140,44 @@ systemctl enable amazon-cloudwatch-agent.service
 
 # The following line is used in aws TOE check.
 touch /var/tmp/clickhouse-ci-ami.success
+# END OF THE SCRIPT
+
+# TOE description
+# name: CIInfrastructurePrepare
+# description: instals the infrastructure for ClickHouse CI runners
+# schemaVersion: 1.0
+#
+# phases:
+#   - name: build
+#     steps:
+#       - name: DownloadRemoteScript
+#         maxAttempts: 3
+#         action: WebDownload
+#         onFailure: Abort
+#         inputs:
+#           - source: https://github.com/ClickHouse/ClickHouse/raw/653da5f00219c088af66d97a8f1ea3e35e798268/tests/ci/worker/prepare-ci-ami.sh
+#             destination: /tmp/prepare-ci-ami.sh
+#       - name: RunScript
+#         maxAttempts: 3
+#         action: ExecuteBash
+#         onFailure: Abort
+#         inputs:
+#           commands:
+#             - bash -x '{{build.DownloadRemoteScript.inputs[0].destination}}'
+#
+#
+#   - name: validate
+#     steps:
+#       - name: RunScript
+#         maxAttempts: 3
+#         action: ExecuteBash
+#         onFailure: Abort
+#         inputs:
+#           commands:
+#             - ls /var/tmp/clickhouse-ci-ami.success
+#       - name: Cleanup
+#         action: DeleteFile
+#         onFailure: Abort
+#         maxAttempts: 3
+#         inputs:
+#           - path: /var/tmp/clickhouse-ci-ami.success

From 5f9704a3c1485b6297e42405141091526def82a4 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 13 Nov 2023 16:39:48 +0100
Subject: [PATCH 0531/1097] Kill forgotten Runner.Listener too

---
 tests/ci/worker/init_runner.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh
index d452c985407..eed4bfdd223 100644
--- a/tests/ci/worker/init_runner.sh
+++ b/tests/ci/worker/init_runner.sh
@@ -104,7 +104,7 @@ check_proceed_spot_termination() {
             if [ -n "$runner_pid" ]; then
                 # Kill the runner to not allow it cancelling the job
                 # shellcheck disable=SC2046
-                kill -9 $(list_children "$runner_pid")
+                kill -9 "$runner_pid" $(list_children "$runner_pid")
             fi
             sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)"
             terminate_and_exit

From b4cc55ea34aceb664b95259db77030bdcf20b82f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 14 Nov 2023 12:48:27 +0100
Subject: [PATCH 0532/1097] Deploy cloud-init script to S3 for following usage

---
 tests/ci/worker/.gitignore            |  1 +
 tests/ci/worker/deploy-runner-init.sh | 85 +++++++++++++++++++++++++++
 tests/ci/worker/init_runner.sh        | 39 ++++++++++++
 3 files changed, 125 insertions(+)
 create mode 100644 tests/ci/worker/.gitignore
 create mode 100755 tests/ci/worker/deploy-runner-init.sh

diff --git a/tests/ci/worker/.gitignore b/tests/ci/worker/.gitignore
new file mode 100644
index 00000000000..4ed18989e78
--- /dev/null
+++ b/tests/ci/worker/.gitignore
@@ -0,0 +1 @@
+generated_*init_runner.sh
diff --git a/tests/ci/worker/deploy-runner-init.sh b/tests/ci/worker/deploy-runner-init.sh
new file mode 100755
index 00000000000..06edede48fa
--- /dev/null
+++ b/tests/ci/worker/deploy-runner-init.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+
+usage() {
+  echo "Usage: $0 ENVIRONMENT" >&2
+  echo "Valid values for ENVIRONMENT: staging, production" >&2
+  exit 1
+}
+
+case "$1" in
+  staging|production)
+    ENVIRONMENT="$1" ;;
+  --help)
+    usage ;;
+  *)
+    echo "Invalid argument" >&2
+    usage ;;
+esac
+
+cd "$(dirname "$0")" || exit 1
+SOURCE_SCRIPT='init_runner.sh'
+
+check_response() {
+  # Are we even in the interactive shell?
+  [ -t 1 ] || return 1
+  local request
+  request="$1"
+  read -rp "$request (y/N): " response
+  case "$response" in
+    [Yy])
+      return 0
+      # Your code to continue goes here
+      ;;
+    *)
+      return 1
+      ;;
+  esac
+}
+
+check_dirty() {
+  if [ -n "$(git status --porcelain=v2 "$SOURCE_SCRIPT")" ]; then
+    echo "The $SOURCE_SCRIPT has uncommited changes, won't deploy it" >&2
+    exit 1
+  fi
+}
+GIT_HASH=$(git log -1 --format=format:%H)
+
+header() {
+  cat << EOF
+#!/usr/bin/env bash
+
+echo 'The $ENVIRONMENT script is generated from $SOURCE_SCRIPT, commit $GIT_HASH'
+
+EOF
+}
+
+body() {
+  local first_line
+  first_line=$(sed -n '/^# THE SCRIPT START$/{=;q}' "$SOURCE_SCRIPT")
+  if [ -z "$first_line" ]; then
+    echo "The pattern '# THE SCRIPT START' is not found in $SOURCE_SCRIPT" >&2
+    exit 1
+  fi
+  tail "+$first_line" "$SOURCE_SCRIPT"
+}
+
+GENERATED_FILE="generated_${ENVIRONMENT}_${SOURCE_SCRIPT}"
+
+{ header && body; } > "$GENERATED_FILE"
+
+echo "The file $GENERATED_FILE is generated"
+
+if check_response "Display the content of $GENERATED_FILE?"; then
+  if [ -z "$PAGER" ]; then
+    less "$GENERATED_FILE"
+  else
+    $PAGER "$GENERATED_FILE"
+  fi
+fi
+
+check_dirty
+
+S3_OBJECT=${S3_OBJECT:-s3://github-runners-data/cloud-init/${ENVIRONMENT}.sh}
+if check_response "Deploy the generated script to $S3_OBJECT?"; then
+  aws s3 mv "$GENERATED_FILE" "$S3_OBJECT"
+fi
diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh
index eed4bfdd223..5d7b009bf3d 100644
--- a/tests/ci/worker/init_runner.sh
+++ b/tests/ci/worker/init_runner.sh
@@ -1,4 +1,43 @@
 #!/usr/bin/env bash
+
+cat > /dev/null << 'EOF'
+The following content is embedded into the s3 object via the script
+deploy-runner-init.sh {staging,production}
+with additional helping information
+
+In the `user data` you should define as the following
+with appropriate <ENVIRONMENT> as 'staging' or 'production':
+
+### COPY AFTER
+Content-Type: multipart/mixed; boundary="//"
+MIME-Version: 1.0
+
+--//
+Content-Type: text/cloud-config; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Content-Disposition: attachment; filename="cloud-config.txt"
+
+#cloud-config
+cloud_final_modules:
+- [scripts-user, always]
+
+--//
+Content-Type: text/x-shellscript; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Content-Disposition: attachment; filename="userdata.txt"
+
+#!/bin/bash
+aws s3 cp s3://github-runners-data/cloud-init/<ENVIRONMENT>.sh /tmp/cloud-init.sh
+chmod 0700 /tmp/cloud-init.sh
+exec bash /tmp/cloud-init.sh
+--//
+### COPY BEFORE
+EOF
+
+# THE SCRIPT START
+
 set -uo pipefail
 
 ####################################

From 556e0faad2bd314a5d6d814fc7eb69c21aa26062 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 14 Nov 2023 13:25:02 +0100
Subject: [PATCH 0533/1097] Make sleep in init_runner loop global

---
 tests/ci/worker/init_runner.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh
index 5d7b009bf3d..ad79d398b95 100644
--- a/tests/ci/worker/init_runner.sh
+++ b/tests/ci/worker/init_runner.sh
@@ -314,7 +314,7 @@ while true; do
           ACTIONS_RUNNER_HOOK_JOB_STARTED=/tmp/actions-hooks/pre-run.sh \
           ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \
           ./run.sh &
-        sleep 15
+        sleep 10
     else
         echo "Runner is working with pid $runner_pid, checking the metadata in background"
         check_proceed_spot_termination
@@ -330,8 +330,8 @@ while true; do
                 terminate_and_exit
             fi
         fi
-        sleep 5
     fi
+    sleep 5
 done
 
 # vim:ts=4:sw=4

From 8fbc0d3d519ad986ed6fcc3f6b28f4cb9e9a0f83 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 14 Nov 2023 15:47:47 +0100
Subject: [PATCH 0534/1097] Disable autoupdate for GH runners to speed-up boot

---
 tests/ci/worker/init_runner.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh
index ad79d398b95..a2737575955 100644
--- a/tests/ci/worker/init_runner.sh
+++ b/tests/ci/worker/init_runner.sh
@@ -302,7 +302,8 @@ while true; do
         check_proceed_spot_termination
 
         echo "Going to configure runner"
-        sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$(get_runner_token)" --ephemeral \
+        sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$(get_runner_token)" \
+          --ephemeral --disableupdate --unattended \
           --runnergroup Default --labels "$LABELS" --work _work --name "$INSTANCE_ID"
 
         echo "Another one check to avoid race between runner and infrastructure"

From 1356dc2eaa76ca63d7b6ff982201fe0bd26550bc Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 14 Nov 2023 14:55:21 +0000
Subject: [PATCH 0535/1097] Fixing style.

---
 src/Storages/StorageInput.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp
index 7f1eeaedfb1..4c319ed9414 100644
--- a/src/Storages/StorageInput.cpp
+++ b/src/Storages/StorageInput.cpp
@@ -17,6 +17,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INVALID_USAGE_OF_INPUT;
+    extern const int LOGICAL_ERROR;
 }
 
 StorageInput::StorageInput(const StorageID & table_id, const ColumnsDescription & columns_)

From e8eb425ff86ffd77a7f646401ebf600742d57cc5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 14 Nov 2023 15:13:37 +0000
Subject: [PATCH 0536/1097] Try to fix new calamities with cross-compilation

---
 contrib/google-protobuf-cmake/CMakeLists.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt
index fc5bf2c0241..f6955a3d8ce 100644
--- a/contrib/google-protobuf-cmake/CMakeLists.txt
+++ b/contrib/google-protobuf-cmake/CMakeLists.txt
@@ -20,7 +20,6 @@ endif()
 set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf")
 set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/google-protobuf")
 
-
 add_definitions(-DGOOGLE_PROTOBUF_CMAKE_BUILD)
 
 add_definitions(-DHAVE_PTHREAD)
@@ -340,6 +339,8 @@ else ()
 
         # This is quite ugly but I cannot make dependencies work propery.
 
+        set(abseil_source_dir "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
+
         execute_process(
             COMMAND mkdir -p ${PROTOC_BUILD_DIR}
             COMMAND_ECHO STDOUT)
@@ -354,6 +355,8 @@ else ()
                 "-Dprotobuf_BUILD_CONFORMANCE=0"
                 "-Dprotobuf_BUILD_EXAMPLES=0"
                 "-Dprotobuf_BUILD_PROTOC_BINARIES=1"
+                "-DABSL_ROOT_DIR=${abseil_source_dir}"
+                "-DABSL_ENABLE_INSTALL=0"
                 "${protobuf_source_dir}/cmake"
             WORKING_DIRECTORY "${PROTOC_BUILD_DIR}"
             COMMAND_ECHO STDOUT)

From 80dffe5823217be0ebcefc3fccd9f36ec389a1e0 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 14 Nov 2023 15:55:46 +0000
Subject: [PATCH 0537/1097] fixed null due to review

---
 src/Processors/Formats/Impl/NpyRowInputFormat.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
index a7fb15fa375..8b40cd12282 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
@@ -32,12 +32,13 @@ namespace
 
 float convertFloat16ToFloat32(uint16_t float16_value)
 {
-    if (float16_value == 0000000000000000)
-        return float(0);
     uint16_t sign = (float16_value >> 15) & 0x1;
     uint16_t exponent = (float16_value >> 10) & 0x1F;
     uint16_t fraction = float16_value & 0x3FF;
 
+    if (exponent == 0 && fraction == 0)
+        return float(sign << 31);
+
     // Handling special cases for exponent
     if (exponent == 0x1F)
     {

From d2aaac686d0cbf40f62b9d8b9bd182d6b5fc1e18 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 14 Nov 2023 18:32:37 +0100
Subject: [PATCH 0538/1097] Update 02908_Npy_files_caching.reference

---
 tests/queries/0_stateless/02908_Npy_files_caching.reference | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.reference b/tests/queries/0_stateless/02908_Npy_files_caching.reference
index 5f5e5f6282d..db9adf2d9c1 100644
--- a/tests/queries/0_stateless/02908_Npy_files_caching.reference
+++ b/tests/queries/0_stateless/02908_Npy_files_caching.reference
@@ -2,8 +2,8 @@
 3
 3
 array	Int64					
-\N
+3
 1000000
 1000000
 array	Int64					
-\N
+1000000

From 03c3e968595512558f2cd4a3fddeec0ede75990c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 14 Nov 2023 17:32:34 +0000
Subject: [PATCH 0539/1097] Small test changes

---
 src/Coordination/Changelog.cpp                |  1 -
 src/Coordination/tests/gtest_coordination.cpp | 27 ++++++++-----------
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 933eda485dd..c28cc368ac0 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -14,7 +14,6 @@
 #include <Common/SipHash.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/logger_useful.h>
-#include "IO/CompressionMethod.h"
 #include <libnuraft/log_val_type.hxx>
 
 
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index 1dc4ae1382c..2b5fd3424c0 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -1104,20 +1104,15 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
 }
 
 /// Truncating only some entries from the end
-TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3)
+/// For compressed logs we have no reliable way of knowing how many log entries were lost 
+/// after we truncate some bytes from the end
+TEST_F(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3)
 {
-    auto params = GetParam();
-
-    /// For compressed logs we have no reliable way of knowing how many log entries were lost 
-    /// after we truncate some bytes from the end
-    if (!params.extension.empty())
-        return;
-
     ChangelogDirTest test("./logs");
     setLogDirectory("./logs");
 
     DB::KeeperLogStore changelog(
-        DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20},
+        DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20},
         DB::FlushSettings(),
         keeper_context);
     changelog.init(1, 0);
@@ -1131,23 +1126,23 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3)
     changelog.end_of_append_batch(0, 0);
 
     waitDurableLogs(changelog);
-    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension));
-    EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension));
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin"));
 
     DB::WriteBufferFromFile plain_buf(
-        "./logs/changelog_1_20.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
+        "./logs/changelog_1_20.bin", DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
     plain_buf.truncate(plain_buf.size() - 30);
 
     DB::KeeperLogStore changelog_reader(
-        DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20},
+        DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20},
         DB::FlushSettings(),
         keeper_context);
     changelog_reader.init(1, 0);
 
     EXPECT_EQ(changelog_reader.size(), 19);
-    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension));
-    assertBrokenLogRemoved("./logs", "changelog_21_40.bin" + params.extension);
-    EXPECT_TRUE(fs::exists("./logs/changelog_20_39.bin" + params.extension));
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
+    assertBrokenLogRemoved("./logs", "changelog_21_40.bin");
+    EXPECT_TRUE(fs::exists("./logs/changelog_20_39.bin"));
     auto entry = getLogEntry("hello_world", 7777);
     changelog_reader.append(entry);
     changelog_reader.end_of_append_batch(0, 0);

From 9df2775f08d5ab377ba3aa2dd05010329b67ef20 Mon Sep 17 00:00:00 2001
From: Jianfei Hu <hujianfei258@gmail.com>
Date: Tue, 14 Nov 2023 02:28:09 +0000
Subject: [PATCH 0540/1097] reduce timeout and setTimeout earlier.

Signed-off-by: Jianfei Hu <hujianfei258@gmail.com>
---
 src/Coordination/KeeperContext.cpp                     |  7 ++++---
 src/IO/S3/Credentials.cpp                              | 10 +++++-----
 tests/integration/helpers/keeper_config2.xml           |  1 +
 .../integration/test_keeper_availability_zone/test.py  |  6 ++++--
 .../integration/test_keeper_four_word_command/test.py  |  8 ++++----
 5 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp
index 9745a53d1ab..c3cb166abee 100644
--- a/src/Coordination/KeeperContext.cpp
+++ b/src/Coordination/KeeperContext.cpp
@@ -39,8 +39,9 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config,
 
     if (config.hasProperty("keeper_server.availability_zone"))
     {
-        auto keeper_az = config.getString("keeper_server.availability_zone.value");
-        if (config.getBool("keeper_server.availability_zone.enable_auto_detection_on_cloud", false))
+        auto keeper_az = config.getString("keeper_server.availability_zone.value", "");
+        const auto auto_detect_for_cloud = config.getBool("keeper_server.availability_zone.enable_auto_detection_on_cloud", false);
+        if (keeper_az.empty() && auto_detect_for_cloud)
         {
             try
             {
@@ -54,7 +55,7 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config,
         if (!keeper_az.empty())
         {
             system_nodes_with_data[keeper_availability_zone_path] = keeper_az;
-            LOG_INFO(&Poco::Logger::get("KeeperContext"), "Initialize the KeeperContext with availability zone: '{}'.'. ", keeper_az);
+            LOG_INFO(&Poco::Logger::get("KeeperContext"), "Initialize the KeeperContext with availability zone: '{}'", keeper_az);
         }
     }
 
diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index 4ba14572589..7d6ed094486 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -65,7 +65,7 @@ bool areCredentialsEmptyOrExpired(const Aws::Auth::AWSCredentials & credentials,
 }
 
 const char SSO_CREDENTIALS_PROVIDER_LOG_TAG[] = "SSOCredentialsProvider";
-const int AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS = 5;
+const int AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS = 3;
 
 }
 
@@ -241,11 +241,11 @@ String AWSEC2MetadataClient::getAvailabilityZoneOrException()
 {
     Poco::URI uri(getAWSMetadataEndpoint() + EC2_AVAILABILITY_ZONE_RESOURCE);
     Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort());
+    session.setTimeout(Poco::Timespan(AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS, 0));
 
     Poco::Net::HTTPResponse response;
     Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri.getPath());
     session.sendRequest(request);
-    session.setTimeout(Poco::Timespan(AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS, 0));
 
     std::istream & rs = session.receiveResponse(response);
     if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
@@ -287,17 +287,17 @@ String getRunningAvailabilityZoneImpl()
         auto aws_az = AWSEC2MetadataClient::getAvailabilityZoneOrException();
         return aws_az;
     }
-    catch (const DB::Exception & aws_ex)
+    catch (const std::exception & aws_ex)
     {
         try
         {
             auto gcp_zone = getGCPAvailabilityZoneOrException();
             return gcp_zone;
         }
-        catch (const DB::Exception & gcp_ex)
+        catch (const std::exception & gcp_ex)
         {
             throw DB::Exception(ErrorCodes::UNSUPPORTED_METHOD,
-                "Failed to find the availability zone, tried AWS and GCP. AWS Error: {}\nGCP Error: {}", aws_ex.displayText(), gcp_ex.displayText());
+                "Failed to find the availability zone, tried AWS and GCP. AWS Error: {}\nGCP Error: {}", aws_ex.what(), gcp_ex.what());
         }
     }
 }
diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml
index 0c58aaceb1c..2afff2f5e59 100644
--- a/tests/integration/helpers/keeper_config2.xml
+++ b/tests/integration/helpers/keeper_config2.xml
@@ -14,6 +14,7 @@
         <server_id>2</server_id>
         <availability_zone>
             <value>az-zoo2</value>
+            <enable_auto_detection_on_cloud>1</enable_auto_detection_on_cloud>
         </availability_zone>
 
         <coordination_settings>
diff --git a/tests/integration/test_keeper_availability_zone/test.py b/tests/integration/test_keeper_availability_zone/test.py
index b78e776f3c6..a2003f8539e 100644
--- a/tests/integration/test_keeper_availability_zone/test.py
+++ b/tests/integration/test_keeper_availability_zone/test.py
@@ -27,10 +27,12 @@ def test_get_availability_zone():
     with KeeperClient.from_cluster(cluster, "zoo1") as client1:
         assert client1.get("/keeper/availability_zone") == "az-zoo1"
 
+    # Keeper2 set enable_auto_detection_on_cloud to true, but is ignored and <value>az-zoo2</value> is used.
     with KeeperClient.from_cluster(cluster, "zoo2") as client2:
         assert client2.get("/keeper/availability_zone") == "az-zoo2"
-    
+        assert "availability_zone" in client2.ls("/keeper")
+
     # keeper3 is not configured with availability_zone value.
     with KeeperClient.from_cluster(cluster, "zoo3") as client3:
         with pytest.raises(Exception):
-            client3.get("/keeper/availability_zone")
\ No newline at end of file
+            client3.get("/keeper/availability_zone")
diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index 25c4bc55327..71501133ae7 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -183,8 +183,8 @@ def test_cmd_mntr(started_cluster):
         # contains:
         #   10 nodes created by test
         #   3 nodes created by clickhouse "/clickhouse/task_queue/ddl"
-        #   1 root node, 4 keeper system nodes
-        assert int(result["zk_znode_count"]) == 15
+        #   1 root node, 3 keeper system nodes
+        assert int(result["zk_znode_count"]) == 14
         assert int(result["zk_watch_count"]) == 2
         assert int(result["zk_ephemerals_count"]) == 2
         assert int(result["zk_approximate_data_size"]) > 0
@@ -333,7 +333,7 @@ def test_cmd_srvr(started_cluster):
         assert int(result["Connections"]) == 1
         assert int(result["Zxid"], 16) > 10
         assert result["Mode"] == "leader"
-        assert result["Node count"] == "15"
+        assert result["Node count"] == "14"
 
     finally:
         destroy_zk_client(zk)
@@ -373,7 +373,7 @@ def test_cmd_stat(started_cluster):
         assert int(result["Connections"]) == 1
         assert int(result["Zxid"], 16) >= 10
         assert result["Mode"] == "leader"
-        assert result["Node count"] == "15"
+        assert result["Node count"] == "14"
 
         # filter connection statistics
         cons = [n for n in data.split("\n") if "=" in n]

From 79ed0f8a21c156c578f8863830c9e9cc11cd8f12 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 14 Nov 2023 19:21:56 +0100
Subject: [PATCH 0541/1097] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 8cb921bdd19..230c1e99151 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -34,7 +34,6 @@
 02352_grouby_shadows_arg
 02354_annoy
 02428_parameterized_view
-02458_use_structure_from_insertion_table
 02479_race_condition_between_insert_and_droppin_mv
 02493_inconsistent_hex_and_binary_number
 02554_fix_grouping_sets_predicate_push_down
@@ -42,12 +41,9 @@
 01009_global_array_join_names
 00917_multiple_joins_denny_crane
 00636_partition_key_parts_pruning
-01825_type_json_multiple_files
 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
 02404_memory_bound_merging
 02725_agg_projection_resprect_PK
-02721_url_cluster
-02534_s3_cluster_insert_select_schema_inference
 02765_parallel_replicas_final_modifier
 02784_parallel_replicas_automatic_decision_join
 02818_parameterized_view_with_cte_multiple_usage

From d24eb8bebd5cb827ad321006c250d48af67f77a8 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 9 Nov 2023 12:44:32 +0300
Subject: [PATCH 0542/1097] Analyzer remove unused projection columns

---
 .../RemoveUnusedProjectionColumnsPass.cpp     | 89 +++++++++++++++++++
 .../RemoveUnusedProjectionColumnsPass.h       | 24 +++++
 src/Analyzer/QueryNode.cpp                    | 64 +++++++++++++
 src/Analyzer/QueryNode.h                      | 11 ++-
 src/Analyzer/QueryTreePassManager.cpp         |  2 +
 src/Analyzer/UnionNode.cpp                    | 35 ++++++++
 src/Analyzer/UnionNode.h                      |  6 ++
 ...remove_unused_projection_columns.reference |  8 ++
 ...lyzer_remove_unused_projection_columns.sql | 22 +++++
 9 files changed, 257 insertions(+), 4 deletions(-)
 create mode 100644 src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
 create mode 100644 src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h
 create mode 100644 tests/queries/0_stateless/02911_analyzer_remove_unused_projection_columns.reference
 create mode 100644 tests/queries/0_stateless/02911_analyzer_remove_unused_projection_columns.sql

diff --git a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
new file mode 100644
index 00000000000..223ccf67380
--- /dev/null
+++ b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
@@ -0,0 +1,89 @@
+#include <Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h>
+
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/FunctionNode.h>
+#include <Analyzer/QueryNode.h>
+#include <Analyzer/ColumnNode.h>
+#include <Analyzer/SortNode.h>
+#include <Functions/FunctionFactory.h>
+
+namespace DB
+{
+
+namespace
+{
+
+class CollectUsedColumnsVisitor : public InDepthQueryTreeVisitor<CollectUsedColumnsVisitor>
+{
+public:
+    bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
+    {
+        auto node_type = child->getNodeType();
+        if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)
+        {
+            subqueries_nodes_to_visit.insert(child);
+            return false;
+        }
+
+        return true;
+    }
+
+    void visitImpl(QueryTreeNodePtr & node)
+    {
+        auto node_type = node->getNodeType();
+        if (node_type != QueryTreeNodeType::COLUMN)
+            return;
+
+        auto & column_node = node->as<ColumnNode &>();
+        auto column_source_node = column_node.getColumnSource();
+        auto column_source_node_type = column_source_node->getNodeType();
+
+        if (column_source_node_type == QueryTreeNodeType::QUERY || column_source_node_type == QueryTreeNodeType::UNION)
+        {
+            auto * column_source_node_ptr = column_source_node.get();
+            query_or_union_node_to_used_columns[column_source_node_ptr].insert(column_node.getColumnName());
+        }
+    }
+
+    void reset()
+    {
+        subqueries_nodes_to_visit.clear();
+        query_or_union_node_to_used_columns.clear();
+    }
+
+    std::unordered_set<QueryTreeNodePtr> subqueries_nodes_to_visit;
+    std::unordered_map<IQueryTreeNode *, std::unordered_set<std::string>> query_or_union_node_to_used_columns;
+};
+
+}
+
+void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
+{
+    std::vector<QueryTreeNodePtr> nodes_to_visit;
+    nodes_to_visit.push_back(query_tree_node);
+
+    CollectUsedColumnsVisitor visitor;
+
+    while (!nodes_to_visit.empty())
+    {
+        auto node_to_visit = std::move(nodes_to_visit.back());
+        nodes_to_visit.pop_back();
+
+        visitor.visit(node_to_visit);
+
+        for (auto & [query_or_union_node, used_columns] : visitor.query_or_union_node_to_used_columns)
+        {
+            if (auto * union_node = query_or_union_node->as<UnionNode>())
+                union_node->removeUnusedProjectionColumns(used_columns);
+            else if (auto * query_node = query_or_union_node->as<QueryNode>())
+                query_node->removeUnusedProjectionColumns(used_columns);
+        }
+
+        for (const auto & subquery_node_to_visit : visitor.subqueries_nodes_to_visit)
+            nodes_to_visit.push_back(subquery_node_to_visit);
+
+        visitor.reset();
+    }
+}
+
+}
diff --git a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h
new file mode 100644
index 00000000000..2c6768ed4bf
--- /dev/null
+++ b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <Analyzer/IQueryTreePass.h>
+
+namespace DB
+{
+
+/** Remove unused projection columns in subqueries.
+  *
+  * Example: SELECT a FROM (SELECT a, b FROM test_table);
+  * Result: SELECT a FROM (SELECT a FROM test_table);
+  */
+class RemoveUnusedProjectionColumnsPass final : public IQueryTreePass
+{
+public:
+    String getName() override { return "RemoveUnusedProjectionColumnsPass"; }
+
+    String getDescription() override { return "Remove unused projection columns in subqueries."; }
+
+    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+
+};
+
+}
diff --git a/src/Analyzer/QueryNode.cpp b/src/Analyzer/QueryNode.cpp
index 51e3dac781d..529631f045d 100644
--- a/src/Analyzer/QueryNode.cpp
+++ b/src/Analyzer/QueryNode.cpp
@@ -46,6 +46,70 @@ QueryNode::QueryNode(ContextMutablePtr context_)
     : QueryNode(std::move(context_), {} /*settings_changes*/)
 {}
 
+void QueryNode::resolveProjectionColumns(NamesAndTypes projection_columns_value)
+{
+    if (projection_columns_value.size() != getProjection().getNodes().size())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected projection columns size to match projection nodes size");
+
+    projection_columns = std::move(projection_columns_value);
+}
+
+void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns)
+{
+    auto & projection_nodes = getProjection().getNodes();
+
+    if (used_projection_columns.empty())
+    {
+        /// Keep at least 1 column if used columns are empty
+        projection_nodes.erase(projection_nodes.begin() + 1, projection_nodes.end());
+        projection_columns.erase(projection_columns.begin() + 1, projection_columns.end());
+    }
+
+    size_t projection_columns_size = projection_columns.size();
+    size_t write_index = 0;
+
+    for (size_t i = 0; i < projection_columns_size; ++i)
+    {
+        if (!used_projection_columns.contains(projection_columns[i].name))
+            continue;
+
+        projection_nodes[write_index] = projection_nodes[i];
+        projection_columns[write_index] = projection_columns[i];
+        ++write_index;
+    }
+
+    projection_nodes.erase(projection_nodes.begin() + write_index, projection_nodes.end());
+    projection_columns.erase(projection_columns.begin() + write_index, projection_columns.end());
+}
+
+void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes)
+{
+    auto & projection_nodes = getProjection().getNodes();
+
+    if (used_projection_columns_indexes.empty())
+    {
+        /// Keep at least 1 column if used columns are empty
+        projection_nodes.erase(projection_nodes.begin() + 1, projection_nodes.end());
+        projection_columns.erase(projection_columns.begin() + 1, projection_columns.end());
+    }
+
+    size_t projection_columns_size = projection_columns.size();
+    size_t write_index = 0;
+
+    for (size_t i = 0; i < projection_columns_size; ++i)
+    {
+        if (!used_projection_columns_indexes.contains(i))
+            continue;
+
+        projection_nodes[write_index] = projection_nodes[i];
+        projection_columns[write_index] = projection_columns[i];
+        ++write_index;
+    }
+
+    projection_nodes.erase(projection_nodes.begin() + write_index, projection_nodes.end());
+    projection_columns.erase(projection_columns.begin() + write_index, projection_columns.end());
+}
+
 void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
 {
     buffer << std::string(indent, ' ') << "QUERY id: " << format_state.getNodeId(this);
diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h
index 277d6404965..82bc72b7411 100644
--- a/src/Analyzer/QueryNode.h
+++ b/src/Analyzer/QueryNode.h
@@ -556,10 +556,13 @@ public:
     }
 
     /// Resolve query node projection columns
-    void resolveProjectionColumns(NamesAndTypes projection_columns_value)
-    {
-        projection_columns = std::move(projection_columns_value);
-    }
+    void resolveProjectionColumns(NamesAndTypes projection_columns_value);
+
+    /// Remove unused projection columns
+    void removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns);
+
+    /// Remove unused projection columns
+    void removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes);
 
     QueryTreeNodeType getNodeType() const override
     {
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 08474c4100a..254bd81c030 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -17,6 +17,7 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/Utils.h>
 #include <Analyzer/Passes/QueryAnalysisPass.h>
+#include <Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h>
 #include <Analyzer/Passes/CountDistinctPass.h>
 #include <Analyzer/Passes/UniqToCountPass.h>
 #include <Analyzer/Passes/FunctionToSubcolumnsPass.h>
@@ -243,6 +244,7 @@ void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index)
 void addQueryTreePasses(QueryTreePassManager & manager)
 {
     manager.addPass(std::make_unique<QueryAnalysisPass>());
+    manager.addPass(std::make_unique<RemoveUnusedProjectionColumnsPass>());
     manager.addPass(std::make_unique<FunctionToSubcolumnsPass>());
 
     manager.addPass(std::make_unique<ConvertLogicalExpressionToCNFPass>());
diff --git a/src/Analyzer/UnionNode.cpp b/src/Analyzer/UnionNode.cpp
index 2bc3daeef36..5d2ac128abe 100644
--- a/src/Analyzer/UnionNode.cpp
+++ b/src/Analyzer/UnionNode.cpp
@@ -88,6 +88,41 @@ NamesAndTypes UnionNode::computeProjectionColumns() const
     return result_columns;
 }
 
+void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns)
+{
+    auto projection_columns = computeProjectionColumns();
+    size_t projection_columns_size = projection_columns.size();
+    std::unordered_set<size_t> used_projection_column_indexes;
+
+    for (size_t i = 0; i < projection_columns_size; ++i)
+    {
+        const auto & projection_column = projection_columns[i];
+        if (used_projection_columns.contains(projection_column.name))
+            used_projection_column_indexes.insert(i);
+    }
+
+    auto & query_nodes = getQueries().getNodes();
+    for (auto & query_node : query_nodes)
+    {
+        if (auto * query_node_typed = query_node->as<QueryNode>())
+            query_node_typed->removeUnusedProjectionColumns(used_projection_column_indexes);
+        else if (auto * union_node_typed = query_node->as<UnionNode>())
+            union_node_typed->removeUnusedProjectionColumns(used_projection_column_indexes);
+    }
+}
+
+void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes)
+{
+    auto & query_nodes = getQueries().getNodes();
+    for (auto & query_node : query_nodes)
+    {
+        if (auto * query_node_typed = query_node->as<QueryNode>())
+            query_node_typed->removeUnusedProjectionColumns(used_projection_columns_indexes);
+        else if (auto * union_node_typed = query_node->as<UnionNode>())
+            union_node_typed->removeUnusedProjectionColumns(used_projection_columns_indexes);
+    }
+}
+
 void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
 {
     buffer << std::string(indent, ' ') << "UNION id: " << format_state.getNodeId(this);
diff --git a/src/Analyzer/UnionNode.h b/src/Analyzer/UnionNode.h
index 0045b0c334f..7686b73f5e0 100644
--- a/src/Analyzer/UnionNode.h
+++ b/src/Analyzer/UnionNode.h
@@ -129,6 +129,12 @@ public:
     /// Compute union node projection columns
     NamesAndTypes computeProjectionColumns() const;
 
+    /// Remove unused projection columns
+    void removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns);
+
+    /// Remove unused projection columns
+    void removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes);
+
     QueryTreeNodeType getNodeType() const override
     {
         return QueryTreeNodeType::UNION;
diff --git a/tests/queries/0_stateless/02911_analyzer_remove_unused_projection_columns.reference b/tests/queries/0_stateless/02911_analyzer_remove_unused_projection_columns.reference
new file mode 100644
index 00000000000..405d3348775
--- /dev/null
+++ b/tests/queries/0_stateless/02911_analyzer_remove_unused_projection_columns.reference
@@ -0,0 +1,8 @@
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/tests/queries/0_stateless/02911_analyzer_remove_unused_projection_columns.sql b/tests/queries/0_stateless/02911_analyzer_remove_unused_projection_columns.sql
new file mode 100644
index 00000000000..70de63c592d
--- /dev/null
+++ b/tests/queries/0_stateless/02911_analyzer_remove_unused_projection_columns.sql
@@ -0,0 +1,22 @@
+SET allow_experimental_analyzer = 1;
+
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    id UInt64,
+    value String
+) ENGINE = MergeTree ORDER BY id;
+
+INSERT INTO test_table VALUES (0, 'Value_0');
+
+SET max_columns_to_read = 1;
+
+SELECT id FROM (SELECT * FROM test_table);
+SELECT id FROM (SELECT * FROM (SELECT * FROM test_table));
+SELECT id FROM (SELECT * FROM test_table UNION ALL SELECT * FROM test_table);
+
+SELECT id FROM (SELECT id, value FROM test_table);
+SELECT id FROM (SELECT id, value FROM (SELECT id, value FROM test_table));
+SELECT id FROM (SELECT id, value FROM test_table UNION ALL SELECT id, value FROM test_table);
+
+DROP TABLE test_table;

From 3104939cff4349daf867467c5d106a904fde952f Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 9 Nov 2023 16:38:16 +0300
Subject: [PATCH 0543/1097] Updated tests

---
 .../02227_union_match_by_name.reference       | 21 ++++-------
 .../02476_fuse_sum_count.reference            | 23 +++---------
 .../02477_fuse_quantiles.reference            | 36 +++++--------------
 ...8_distinct_to_count_optimization.reference | 36 +++++--------------
 4 files changed, 29 insertions(+), 87 deletions(-)

diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference
index 685b3c83b05..42b9b01a529 100644
--- a/tests/queries/0_stateless/02227_union_match_by_name.reference
+++ b/tests/queries/0_stateless/02227_union_match_by_name.reference
@@ -14,36 +14,29 @@ Header: avgWeighted(x, y) Nullable(Float64)
         Header: x_0 Nullable(UInt8)
                 y_1 UInt8
           Union
-          Header: NULL Nullable(UInt8)
-                  x Nullable(UInt8)
+          Header: x Nullable(UInt8)
                   y UInt8
             Expression (Conversion before UNION)
-            Header: NULL Nullable(UInt8)
-                    x Nullable(UInt8)
+            Header: x Nullable(UInt8)
                     y UInt8
               Expression (Project names)
-              Header: NULL Nullable(Nothing)
-                      x UInt8
+              Header: x UInt8
                       y UInt8
                 Expression (Projection)
-                Header: NULL_Nullable(Nothing) Nullable(Nothing)
-                        255_UInt8 UInt8
+                Header: 255_UInt8 UInt8
                         1_UInt8 UInt8
                   Expression (Change column names to column identifiers)
                   Header: dummy_0 UInt8
                     ReadFromStorage (SystemOne)
                     Header: dummy UInt8
             Expression (Conversion before UNION)
-            Header: NULL Nullable(UInt8)
-                    x Nullable(UInt8)
+            Header: x Nullable(UInt8)
                     y UInt8
               Expression (Project names)
-              Header: y UInt8
-                      x Nullable(Nothing)
+              Header: x Nullable(Nothing)
                       y UInt8
                 Expression (Projection)
-                Header: 1_UInt8 UInt8
-                        NULL_Nullable(Nothing) Nullable(Nothing)
+                Header: NULL_Nullable(Nothing) Nullable(Nothing)
                         1_UInt8 UInt8
                   Expression (Change column names to column identifiers)
                   Header: dummy_0 UInt8
diff --git a/tests/queries/0_stateless/02476_fuse_sum_count.reference b/tests/queries/0_stateless/02476_fuse_sum_count.reference
index 43a39e8b7e5..c62583e8c88 100644
--- a/tests/queries/0_stateless/02476_fuse_sum_count.reference
+++ b/tests/queries/0_stateless/02476_fuse_sum_count.reference
@@ -256,27 +256,14 @@ QUERY id: 0
         QUERY id: 14, is_subquery: 1
           PROJECTION COLUMNS
             x Int64
-            count(b) UInt64
           PROJECTION
-            LIST id: 15, nodes: 2
-              FUNCTION id: 16, function_name: tupleElement, function_type: ordinary, result_type: Int64
+            LIST id: 15, nodes: 1
+              FUNCTION id: 16, function_name: sum, function_type: aggregate, result_type: Int64
                 ARGUMENTS
-                  LIST id: 17, nodes: 2
-                    FUNCTION id: 18, function_name: sumCount, function_type: aggregate, result_type: Tuple(Int64, UInt64)
-                      ARGUMENTS
-                        LIST id: 19, nodes: 1
-                          COLUMN id: 20, column_name: b, result_type: Int8, source_id: 21
-                    CONSTANT id: 22, constant_value: UInt64_1, constant_value_type: UInt8
-              FUNCTION id: 23, function_name: tupleElement, function_type: ordinary, result_type: UInt64
-                ARGUMENTS
-                  LIST id: 24, nodes: 2
-                    FUNCTION id: 18, function_name: sumCount, function_type: aggregate, result_type: Tuple(Int64, UInt64)
-                      ARGUMENTS
-                        LIST id: 19, nodes: 1
-                          COLUMN id: 20, column_name: b, result_type: Int8, source_id: 21
-                    CONSTANT id: 25, constant_value: UInt64_2, constant_value_type: UInt8
+                  LIST id: 17, nodes: 1
+                    COLUMN id: 18, column_name: b, result_type: Int8, source_id: 19
           JOIN TREE
-            TABLE id: 21, table_name: default.fuse_tbl
+            TABLE id: 19, table_name: default.fuse_tbl
 0	0	nan
 0	0	nan
 45	10	4.5	Decimal(38, 0)	UInt64	Float64
diff --git a/tests/queries/0_stateless/02477_fuse_quantiles.reference b/tests/queries/0_stateless/02477_fuse_quantiles.reference
index 7c7d581f7fb..8384df9f04a 100644
--- a/tests/queries/0_stateless/02477_fuse_quantiles.reference
+++ b/tests/queries/0_stateless/02477_fuse_quantiles.reference
@@ -48,35 +48,17 @@ QUERY id: 0
         QUERY id: 19, is_subquery: 1
           PROJECTION COLUMNS
             x Float64
-            quantile(0.9)(b) Float64
           PROJECTION
-            LIST id: 21, nodes: 2
-              FUNCTION id: 22, function_name: arrayElement, function_type: ordinary, result_type: Float64
+            LIST id: 21, nodes: 1
+              FUNCTION id: 22, function_name: quantile, function_type: aggregate, result_type: Float64
+                PARAMETERS
+                  LIST id: 23, nodes: 1
+                    CONSTANT id: 24, constant_value: Float64_0.5, constant_value_type: Float64
                 ARGUMENTS
-                  LIST id: 23, nodes: 2
-                    FUNCTION id: 24, function_name: quantiles, function_type: aggregate, result_type: Array(Float64)
-                      PARAMETERS
-                        LIST id: 25, nodes: 2
-                          CONSTANT id: 26, constant_value: Float64_0.5, constant_value_type: Float64
-                          CONSTANT id: 27, constant_value: Float64_0.9, constant_value_type: Float64
-                      ARGUMENTS
-                        LIST id: 28, nodes: 1
-                          COLUMN id: 29, column_name: b, result_type: Int32, source_id: 30
-                    CONSTANT id: 31, constant_value: UInt64_1, constant_value_type: UInt8
-              FUNCTION id: 32, function_name: arrayElement, function_type: ordinary, result_type: Float64
-                ARGUMENTS
-                  LIST id: 33, nodes: 2
-                    FUNCTION id: 24, function_name: quantiles, function_type: aggregate, result_type: Array(Float64)
-                      PARAMETERS
-                        LIST id: 25, nodes: 2
-                          CONSTANT id: 26, constant_value: Float64_0.5, constant_value_type: Float64
-                          CONSTANT id: 27, constant_value: Float64_0.9, constant_value_type: Float64
-                      ARGUMENTS
-                        LIST id: 28, nodes: 1
-                          COLUMN id: 29, column_name: b, result_type: Int32, source_id: 30
-                    CONSTANT id: 34, constant_value: UInt64_2, constant_value_type: UInt8
+                  LIST id: 25, nodes: 1
+                    COLUMN id: 26, column_name: b, result_type: Int32, source_id: 27
           JOIN TREE
-            TABLE id: 30, table_name: default.fuse_tbl
+            TABLE id: 27, table_name: default.fuse_tbl
       GROUP BY
-        LIST id: 35, nodes: 1
+        LIST id: 28, nodes: 1
           COLUMN id: 18, column_name: x, result_type: Float64, source_id: 19
diff --git a/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference b/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference
index b2b15f92199..a2c441fa460 100644
--- a/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference
+++ b/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference
@@ -125,18 +125,13 @@ QUERY id: 0
     QUERY id: 3, is_subquery: 1
       PROJECTION COLUMNS
         a UInt8
-        sum(b) UInt64
       PROJECTION
-        LIST id: 4, nodes: 2
+        LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
-          FUNCTION id: 7, function_name: sum, function_type: aggregate, result_type: UInt64
-            ARGUMENTS
-              LIST id: 8, nodes: 1
-                COLUMN id: 9, column_name: b, result_type: UInt8, source_id: 6
       JOIN TREE
         TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
       GROUP BY
-        LIST id: 10, nodes: 1
+        LIST id: 7, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
   SETTINGS allow_experimental_analyzer=1
 6. test group by with subquery alias
@@ -162,18 +157,13 @@ QUERY id: 0
     QUERY id: 3, alias: t, is_subquery: 1
       PROJECTION COLUMNS
         a UInt8
-        sum(b) UInt64
       PROJECTION
-        LIST id: 4, nodes: 2
+        LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
-          FUNCTION id: 7, function_name: sum, function_type: aggregate, result_type: UInt64
-            ARGUMENTS
-              LIST id: 8, nodes: 1
-                COLUMN id: 9, column_name: b, result_type: UInt8, source_id: 6
       JOIN TREE
         TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
       GROUP BY
-        LIST id: 10, nodes: 1
+        LIST id: 7, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
   SETTINGS allow_experimental_analyzer=1
 7. test group by with compound column name
@@ -199,18 +189,13 @@ QUERY id: 0
     QUERY id: 3, alias: t, is_subquery: 1
       PROJECTION COLUMNS
         alias_of_a UInt8
-        sum(b) UInt64
       PROJECTION
-        LIST id: 4, nodes: 2
+        LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
-          FUNCTION id: 7, function_name: sum, function_type: aggregate, result_type: UInt64
-            ARGUMENTS
-              LIST id: 8, nodes: 1
-                COLUMN id: 9, column_name: b, result_type: UInt8, source_id: 6
       JOIN TREE
         TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
       GROUP BY
-        LIST id: 10, nodes: 1
+        LIST id: 7, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
   SETTINGS allow_experimental_analyzer=1
 8. test group by with select expression alias
@@ -236,17 +221,12 @@ QUERY id: 0
     QUERY id: 3, alias: t, is_subquery: 1
       PROJECTION COLUMNS
         alias_of_a UInt8
-        sum(b) UInt64
       PROJECTION
-        LIST id: 4, nodes: 2
+        LIST id: 4, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
-          FUNCTION id: 7, function_name: sum, function_type: aggregate, result_type: UInt64
-            ARGUMENTS
-              LIST id: 8, nodes: 1
-                COLUMN id: 9, column_name: b, result_type: UInt8, source_id: 6
       JOIN TREE
         TABLE id: 6, table_name: default.test_rewrite_uniq_to_count
       GROUP BY
-        LIST id: 10, nodes: 1
+        LIST id: 7, nodes: 1
           COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6
   SETTINGS allow_experimental_analyzer=1

From 1562e24232f17adab742978cfad6d821ab88bfdd Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 9 Nov 2023 16:39:43 +0300
Subject: [PATCH 0544/1097] Fixed tests

---
 src/Planner/Utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp
index 9c46622f578..9a6ef6f5d83 100644
--- a/src/Planner/Utils.cpp
+++ b/src/Planner/Utils.cpp
@@ -436,8 +436,8 @@ QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const NamesAndTyp
 
     auto query_node = std::make_shared<QueryNode>(std::move(context_copy));
 
-    query_node->resolveProjectionColumns(projection_columns);
     query_node->getProjection().getNodes() = std::move(subquery_projection_nodes);
+    query_node->resolveProjectionColumns(projection_columns);
     query_node->getJoinTree() = table_expression;
     query_node->setIsSubquery(true);
 

From 5f009e99f6d70bba7fe1ad7c985936dd666d07e8 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 10 Nov 2023 16:29:19 +0300
Subject: [PATCH 0545/1097] Updated implementation

---
 src/Analyzer/IQueryTreeNode.h                 |  2 +-
 src/Analyzer/InDepthQueryTreeVisitor.h        |  2 +-
 .../RemoveUnusedProjectionColumnsPass.cpp     | 90 ++++++++++++++++---
 src/Analyzer/QueryNode.cpp                    | 16 ----
 src/Analyzer/Utils.cpp                        | 11 +++
 src/Analyzer/Utils.h                          |  6 ++
 tests/analyzer_tech_debt.txt                  |  4 -
 7 files changed, 95 insertions(+), 36 deletions(-)

diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h
index 3f6816696b4..922eaabe75c 100644
--- a/src/Analyzer/IQueryTreeNode.h
+++ b/src/Analyzer/IQueryTreeNode.h
@@ -182,7 +182,7 @@ public:
 
     struct ConvertToASTOptions
     {
-        /// Add _CAST if constant litral type is different from column type
+        /// Add _CAST if constant literal type is different from column type
         bool add_cast_for_constants = true;
 
         /// Identifiers are fully qualified (`database.table.column`), otherwise names are just column names (`column`)
diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h
index dec329b5403..62ddc06659c 100644
--- a/src/Analyzer/InDepthQueryTreeVisitor.h
+++ b/src/Analyzer/InDepthQueryTreeVisitor.h
@@ -188,7 +188,7 @@ private:
         if (auto * table_function_node = parent->as<TableFunctionNode>())
         {
             if (child != table_function_node->getArgumentsNode())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctioNode is expected to have only one child node");
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionNode is expected to have only one child node");
 
             const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes();
 
diff --git a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
index 223ccf67380..29626c97d68 100644
--- a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
+++ b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
@@ -1,11 +1,14 @@
 #include <Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h>
 
+#include <Functions/FunctionFactory.h>
+
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/FunctionNode.h>
 #include <Analyzer/QueryNode.h>
 #include <Analyzer/ColumnNode.h>
 #include <Analyzer/SortNode.h>
-#include <Functions/FunctionFactory.h>
+#include <Analyzer/AggregationUtils.h>
+#include <Analyzer/Utils.h>
 
 namespace DB
 {
@@ -13,13 +16,15 @@ namespace DB
 namespace
 {
 
-class CollectUsedColumnsVisitor : public InDepthQueryTreeVisitor<CollectUsedColumnsVisitor>
+class CollectUsedColumnsVisitor : public InDepthQueryTreeVisitorWithContext<CollectUsedColumnsVisitor>
 {
 public:
+    using Base = InDepthQueryTreeVisitorWithContext<CollectUsedColumnsVisitor>;
+    using Base::Base;
+
     bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
     {
-        auto node_type = child->getNodeType();
-        if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)
+        if (isQueryOrUnionNode(child))
         {
             subqueries_nodes_to_visit.insert(child);
             return false;
@@ -28,9 +33,21 @@ public:
         return true;
     }
 
-    void visitImpl(QueryTreeNodePtr & node)
+    void enterImpl(QueryTreeNodePtr & node)
     {
         auto node_type = node->getNodeType();
+
+        if (node_type == QueryTreeNodeType::QUERY)
+        {
+            auto & query_node = node->as<QueryNode &>();
+            auto table_expressions = extractTableExpressions(query_node.getJoinTree());
+            for (const auto & table_expression : table_expressions)
+                if (isQueryOrUnionNode(table_expression))
+                    query_or_union_node_to_used_columns.emplace(table_expression, std::unordered_set<std::string>());
+
+            return;
+        }
+
         if (node_type != QueryTreeNodeType::COLUMN)
             return;
 
@@ -39,10 +56,7 @@ public:
         auto column_source_node_type = column_source_node->getNodeType();
 
         if (column_source_node_type == QueryTreeNodeType::QUERY || column_source_node_type == QueryTreeNodeType::UNION)
-        {
-            auto * column_source_node_ptr = column_source_node.get();
-            query_or_union_node_to_used_columns[column_source_node_ptr].insert(column_node.getColumnName());
-        }
+            query_or_union_node_to_used_columns[column_source_node].insert(column_node.getColumnName());
     }
 
     void reset()
@@ -52,17 +66,59 @@ public:
     }
 
     std::unordered_set<QueryTreeNodePtr> subqueries_nodes_to_visit;
-    std::unordered_map<IQueryTreeNode *, std::unordered_set<std::string>> query_or_union_node_to_used_columns;
+    std::unordered_map<QueryTreeNodePtr, std::unordered_set<std::string>> query_or_union_node_to_used_columns;
 };
 
+std::unordered_set<size_t> convertUsedColumnNamesToUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, const std::unordered_set<std::string> & used_column_names)
+{
+    std::unordered_set<size_t> result;
+
+    auto * union_node = query_or_union_node->as<UnionNode>();
+    auto * query_node = query_or_union_node->as<QueryNode>();
+
+    const auto & projection_columns = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns();
+    size_t projection_columns_size = projection_columns.size();
+
+    for (size_t i = 0; i < projection_columns_size; ++i)
+    {
+        const auto & projection_column = projection_columns[i];
+        if (used_column_names.contains(projection_column.name))
+            result.insert(i);
+    }
+
+    return result;
+}
+
+/// We cannot remove aggregate functions, if query does not contain GROUP BY or arrayJoin from subquery projection
+void updateUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, std::unordered_set<size_t> & used_projection_columns_indexes)
+{
+    if (auto * union_node = query_or_union_node->as<UnionNode>())
+    {
+        for (auto & query_node : union_node->getQueries().getNodes())
+            updateUsedProjectionIndexes(query_node, used_projection_columns_indexes);
+        return;
+    }
+
+    const auto & query_node = query_or_union_node->as<const QueryNode &>();
+    const auto & projection_nodes = query_node.getProjection().getNodes();
+    size_t projection_nodes_size = projection_nodes.size();
+
+    for (size_t i = 0; i < projection_nodes_size; ++i)
+    {
+        const auto & projection_node = projection_nodes[i];
+        if ((!query_node.hasGroupBy() && hasAggregateFunctionNodes(projection_node)) && hasFunctionNode(projection_node, "arrayJoin"))
+            used_projection_columns_indexes.insert(i);
+    }
+}
+
 }
 
-void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
+void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
 {
     std::vector<QueryTreeNodePtr> nodes_to_visit;
     nodes_to_visit.push_back(query_tree_node);
 
-    CollectUsedColumnsVisitor visitor;
+    CollectUsedColumnsVisitor visitor(std::move(context));
 
     while (!nodes_to_visit.empty())
     {
@@ -73,10 +129,16 @@ void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, Co
 
         for (auto & [query_or_union_node, used_columns] : visitor.query_or_union_node_to_used_columns)
         {
+            auto used_projection_indexes = convertUsedColumnNamesToUsedProjectionIndexes(query_or_union_node, used_columns);
+            updateUsedProjectionIndexes(query_or_union_node, used_projection_indexes);
+
+            /// Keep at least 1 column if used columns are empty
+            used_projection_indexes.insert(0);
+
             if (auto * union_node = query_or_union_node->as<UnionNode>())
-                union_node->removeUnusedProjectionColumns(used_columns);
+                union_node->removeUnusedProjectionColumns(used_projection_indexes);
             else if (auto * query_node = query_or_union_node->as<QueryNode>())
-                query_node->removeUnusedProjectionColumns(used_columns);
+                query_node->removeUnusedProjectionColumns(used_projection_indexes);
         }
 
         for (const auto & subquery_node_to_visit : visitor.subqueries_nodes_to_visit)
diff --git a/src/Analyzer/QueryNode.cpp b/src/Analyzer/QueryNode.cpp
index 529631f045d..738b1ac62e8 100644
--- a/src/Analyzer/QueryNode.cpp
+++ b/src/Analyzer/QueryNode.cpp
@@ -57,14 +57,6 @@ void QueryNode::resolveProjectionColumns(NamesAndTypes projection_columns_value)
 void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns)
 {
     auto & projection_nodes = getProjection().getNodes();
-
-    if (used_projection_columns.empty())
-    {
-        /// Keep at least 1 column if used columns are empty
-        projection_nodes.erase(projection_nodes.begin() + 1, projection_nodes.end());
-        projection_columns.erase(projection_columns.begin() + 1, projection_columns.end());
-    }
-
     size_t projection_columns_size = projection_columns.size();
     size_t write_index = 0;
 
@@ -85,14 +77,6 @@ void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<std::stri
 void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes)
 {
     auto & projection_nodes = getProjection().getNodes();
-
-    if (used_projection_columns_indexes.empty())
-    {
-        /// Keep at least 1 column if used columns are empty
-        projection_nodes.erase(projection_nodes.begin() + 1, projection_nodes.end());
-        projection_columns.erase(projection_columns.begin() + 1, projection_columns.end());
-    }
-
     size_t projection_columns_size = projection_columns.size();
     size_t write_index = 0;
 
diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp
index 9b3a77e7e93..a3c9813f3d0 100644
--- a/src/Analyzer/Utils.cpp
+++ b/src/Analyzer/Utils.cpp
@@ -152,6 +152,17 @@ void makeUniqueColumnNamesInBlock(Block & block)
     }
 }
 
+bool isQueryOrUnionNode(const IQueryTreeNode * node)
+{
+    auto node_type = node->getNodeType();
+    return node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION;
+}
+
+bool isQueryOrUnionNode(const QueryTreeNodePtr & node)
+{
+    return isQueryOrUnionNode(node.get());
+}
+
 QueryTreeNodePtr buildCastFunction(const QueryTreeNodePtr & expression,
     const DataTypePtr & type,
     const ContextPtr & context,
diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h
index 2bf12f01d8a..060dc7d8bc0 100644
--- a/src/Analyzer/Utils.h
+++ b/src/Analyzer/Utils.h
@@ -27,6 +27,12 @@ std::string getGlobalInFunctionNameForLocalInFunctionName(const std::string & fu
 /// Add unique suffix to names of duplicate columns in block
 void makeUniqueColumnNamesInBlock(Block & block);
 
+/// Returns true, if node has type QUERY or UNION
+bool isQueryOrUnionNode(const IQueryTreeNode * node);
+
+/// Returns true, if node has type QUERY or UNION
+bool isQueryOrUnionNode(const QueryTreeNodePtr & node);
+
 /** Build cast function that cast expression into type.
   * If resolve = true, then result cast function is resolved during build, otherwise
   * result cast function is not resolved during build.
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 833b04d5648..e5283f55d61 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -1,5 +1,4 @@
 00223_shard_distributed_aggregation_memory_efficient
-00593_union_all_assert_columns_removed
 00717_merge_and_distributed
 00725_memory_tracking
 01062_pm_all_join_with_block_continuation
@@ -11,12 +10,9 @@
 01244_optimize_distributed_group_by_sharding_key
 01268_mv_scalars
 01268_shard_avgweighted
-01287_max_execution_speed
-01455_shard_leaf_max_rows_bytes_to_read
 01495_subqueries_in_with_statement
 01560_merge_distributed_join
 01584_distributed_buffer_cannot_find_column
-01586_columns_pruning
 01624_soft_constraints
 01656_test_query_log_factories_info
 01739_index_hint

From dc73819a38989462de554f4277e9d9434f23d52e Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 11 Nov 2023 12:45:18 +0300
Subject: [PATCH 0546/1097] Updated implementation

---
 src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
index 29626c97d68..c4966a45074 100644
--- a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
+++ b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
@@ -132,8 +132,9 @@ void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, Co
             auto used_projection_indexes = convertUsedColumnNamesToUsedProjectionIndexes(query_or_union_node, used_columns);
             updateUsedProjectionIndexes(query_or_union_node, used_projection_indexes);
 
-            /// Keep at least 1 column if used columns are empty
-            used_projection_indexes.insert(0);
+            /// Keep at least 1 column if used projection columns are empty
+            if (used_projection_indexes.empty())
+                used_projection_indexes.insert(0);
 
             if (auto * union_node = query_or_union_node->as<UnionNode>())
                 union_node->removeUnusedProjectionColumns(used_projection_indexes);

From 13518633a8fb39aaaa5f1164c8a6f3df5b8b44ae Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 14 Nov 2023 14:39:33 +0300
Subject: [PATCH 0547/1097] Fixed tests

---
 .../RemoveUnusedProjectionColumnsPass.cpp      | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
index c4966a45074..1d00f6e5dee 100644
--- a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
+++ b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
@@ -94,6 +94,22 @@ void updateUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, s
 {
     if (auto * union_node = query_or_union_node->as<UnionNode>())
     {
+        auto union_node_mode = union_node->getUnionMode();
+        bool is_distinct = union_node_mode == SelectUnionMode::UNION_DISTINCT ||
+            union_node_mode == SelectUnionMode::INTERSECT_DISTINCT ||
+            union_node_mode == SelectUnionMode::EXCEPT_DISTINCT;
+
+        if (is_distinct)
+        {
+            auto union_projection_columns = union_node->computeProjectionColumns();
+            size_t union_projection_columns_size = union_projection_columns.size();
+
+            for (size_t i = 0; i < union_projection_columns_size; ++i)
+                used_projection_columns_indexes.insert(i);
+
+            return;
+        }
+
         for (auto & query_node : union_node->getQueries().getNodes())
             updateUsedProjectionIndexes(query_node, used_projection_columns_indexes);
         return;
@@ -106,7 +122,7 @@ void updateUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, s
     for (size_t i = 0; i < projection_nodes_size; ++i)
     {
         const auto & projection_node = projection_nodes[i];
-        if ((!query_node.hasGroupBy() && hasAggregateFunctionNodes(projection_node)) && hasFunctionNode(projection_node, "arrayJoin"))
+        if ((!query_node.hasGroupBy() && hasAggregateFunctionNodes(projection_node)) || hasFunctionNode(projection_node, "arrayJoin"))
             used_projection_columns_indexes.insert(i);
     }
 }

From 38f200d969c39e8f97ab658ac641142a7dd83a2d Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 14 Nov 2023 18:54:55 +0000
Subject: [PATCH 0548/1097] Fix Date text parsing in optimistic path

1
---
 src/IO/ReadHelpers.h                          | 22 ++++++++++++++--
 .../02916_date_text_parsing.reference         |  5 ++++
 .../0_stateless/02916_date_text_parsing.sql   | 25 +++++++++++++++++++
 3 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02916_date_text_parsing.reference
 create mode 100644 tests/queries/0_stateless/02916_date_text_parsing.sql

diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 5c55b36d3c3..17f3d3d4151 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -664,11 +664,20 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf);
 template <typename ReturnType = void>
 inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
 {
+    static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
+
     /// Optimistic path, when whole value is in buffer.
     if (!buf.eof() && buf.position() + 10 <= buf.buffer().end())
     {
         char * pos = buf.position();
 
+        auto error = [&]
+        {
+            if constexpr (throw_exception)
+                throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Cannot parse date here: {}", String(buf.position(), 10));
+            return ReturnType(false);
+        };
+
         /// YYYY-MM-DD
         /// YYYY-MM-D
         /// YYYY-M-DD
@@ -677,6 +686,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
 
         /// The delimiters can be arbitrary characters, like YYYY/MM!DD, but obviously not digits.
 
+        if (!isNumericASCII(pos[0]) || !isNumericASCII(pos[1]) || !isNumericASCII(pos[2]) || !isNumericASCII(pos[3]))
+            return error();
+
         UInt16 year = (pos[0] - '0') * 1000 + (pos[1] - '0') * 100 + (pos[2] - '0') * 10 + (pos[3] - '0');
         UInt8 month;
         UInt8 day;
@@ -685,12 +697,18 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
         if (isNumericASCII(pos[-1]))
         {
             /// YYYYMMDD
+            if (!isNumericASCII(pos[0]) || !isNumericASCII(pos[1]) || !isNumericASCII(pos[2]))
+                return error();
+
             month = (pos[-1] - '0') * 10 + (pos[0] - '0');
             day = (pos[1] - '0') * 10 + (pos[2] - '0');
             pos += 3;
         }
         else
         {
+            if (!isNumericASCII(pos[0]))
+                return error();
+
             month = pos[0] - '0';
             if (isNumericASCII(pos[1]))
             {
@@ -700,8 +718,8 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
             else
                 pos += 2;
 
-            if (isNumericASCII(pos[-1]))
-                return ReturnType(false);
+            if (isNumericASCII(pos[-1]) || !isNumericASCII(pos[0]))
+                return error();
 
             day = pos[0] - '0';
             if (isNumericASCII(pos[1]))
diff --git a/tests/queries/0_stateless/02916_date_text_parsing.reference b/tests/queries/0_stateless/02916_date_text_parsing.reference
new file mode 100644
index 00000000000..2ec123200d0
--- /dev/null
+++ b/tests/queries/0_stateless/02916_date_text_parsing.reference
@@ -0,0 +1,5 @@
+2020-01-02	SomeString
+2020-01-02	SomeString
+2020-01-02	SomeString
+2020-01-02	SomeString
+2020-01-02	SomeString
diff --git a/tests/queries/0_stateless/02916_date_text_parsing.sql b/tests/queries/0_stateless/02916_date_text_parsing.sql
new file mode 100644
index 00000000000..d895ccece19
--- /dev/null
+++ b/tests/queries/0_stateless/02916_date_text_parsing.sql
@@ -0,0 +1,25 @@
+select * from format(CSV, 'd Date, s String', 'abcdefgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2bcdefgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '20cdefgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '202defgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020efgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '20200fgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '202001gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020010h,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '20200102,SomeString');
+select * from format(CSV, 'd Date, s String', 'abcd-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2bcd-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '20cd-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '202d-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020-f-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020-f-g,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020-0f-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020-01-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020-01-h,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020-1-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020-1-h,SomeString'); -- {serverError CANNOT_PARSE_DATE}
+select * from format(CSV, 'd Date, s String', '2020-01-02,SomeString');
+select * from format(CSV, 'd Date, s String', '2020-01-2,SomeString');
+select * from format(CSV, 'd Date, s String', '2020-1-2,SomeString');
+select * from format(CSV, 'd Date, s String', '2020-1-02,SomeString');

From 64c2a696666d594783c1996c0910166cacba000f Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Tue, 14 Nov 2023 20:28:37 +0100
Subject: [PATCH 0549/1097] check performance

---
 base/poco/Net/src/HTTPSession.cpp | 32 ++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp
index 9ebbd7d04cd..97decded282 100644
--- a/base/poco/Net/src/HTTPSession.cpp
+++ b/base/poco/Net/src/HTTPSession.cpp
@@ -94,22 +94,24 @@ void HTTPSession::setTimeout(const Poco::Timespan& timeout)
 void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco::Timespan& sendTimeout, const Poco::Timespan& receiveTimeout)
 {
 	 _connectionTimeout = connectionTimeout;
+     _sendTimeout = sendTimeout;
+     _receiveTimeout = receiveTimeout;
 
-     if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds())
-     {
-         _sendTimeout = sendTimeout;
-
-         if (connected())
-             _socket.setSendTimeout(_sendTimeout);
-     }
-
-     if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds())
-     {
-         _receiveTimeout = receiveTimeout;
-
-         if (connected())
-             _socket.setReceiveTimeout(_receiveTimeout);
-     }
+//     if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds())
+//     {
+//         _sendTimeout = sendTimeout;
+//
+//         if (connected())
+//             _socket.setSendTimeout(_sendTimeout);
+//     }
+//
+//     if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds())
+//     {
+//         _receiveTimeout = receiveTimeout;
+//
+//         if (connected())
+//             _socket.setReceiveTimeout(_receiveTimeout);
+//     }
 }
 
 
From cd909ffc48cc4fb6fb7bc23843659c9559c5921a Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 14 Nov 2023 20:13:09 +0000
Subject: [PATCH 0550/1097] Test RabbitMQ with secure connection

---
 .../compose/docker_compose_rabbitmq.yml       |  6 ++-
 .../integration/runner/misc/rabbitmq.conf     |  8 ---
 .../runner/misc/rabbitmq/ca-cert.pem          | 32 ++++++++++++
 .../runner/misc/rabbitmq/generate_certs.sh    | 10 ++++
 .../runner/misc/rabbitmq/rabbitmq.conf        | 15 ++++++
 .../runner/misc/rabbitmq/server-cert.pem      | 33 ++++++++++++
 .../runner/misc/rabbitmq/server-key.pem       | 52 +++++++++++++++++++
 .../integration/test_storage_rabbitmq/test.py | 28 +++++++---
 8 files changed, 168 insertions(+), 16 deletions(-)
 delete mode 100644 docker/test/integration/runner/misc/rabbitmq.conf
 create mode 100644 docker/test/integration/runner/misc/rabbitmq/ca-cert.pem
 create mode 100755 docker/test/integration/runner/misc/rabbitmq/generate_certs.sh
 create mode 100644 docker/test/integration/runner/misc/rabbitmq/rabbitmq.conf
 create mode 100644 docker/test/integration/runner/misc/rabbitmq/server-cert.pem
 create mode 100644 docker/test/integration/runner/misc/rabbitmq/server-key.pem

diff --git a/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml b/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml
index 2db9fb589d2..61b21e0e3d9 100644
--- a/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml
+++ b/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml
@@ -6,9 +6,13 @@ services:
         hostname: rabbitmq1
         expose:
             - ${RABBITMQ_PORT:-5672}
+            - ${RABBITMQ_SECURE_PORT:-5671}
         volumes:
             - type: ${RABBITMQ_LOGS_FS:-tmpfs}
               source: ${RABBITMQ_LOGS:-}
               target: /rabbitmq_logs/
             - "${RABBITMQ_COOKIE_FILE}:/var/lib/rabbitmq/.erlang.cookie"
-            - /misc/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf
\ No newline at end of file
+            - /misc/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf
+            - /misc/rabbitmq/ca-cert.pem:/etc/rabbitmq/ca-cert.pem
+            - /misc/rabbitmq/server-cert.pem:/etc/rabbitmq/server-cert.pem
+            - /misc/rabbitmq/server-key.pem:/etc/rabbitmq/server-key.pem
diff --git a/docker/test/integration/runner/misc/rabbitmq.conf b/docker/test/integration/runner/misc/rabbitmq.conf
deleted file mode 100644
index 3527c83880b..00000000000
--- a/docker/test/integration/runner/misc/rabbitmq.conf
+++ /dev/null
@@ -1,8 +0,0 @@
-loopback_users.guest = false
-listeners.tcp.default = 5672
-default_pass = clickhouse
-default_user = root
-management.tcp.port = 15672
-
-log.file = /rabbitmq_logs/rabbit.log
-log.file.level = debug
diff --git a/docker/test/integration/runner/misc/rabbitmq/ca-cert.pem b/docker/test/integration/runner/misc/rabbitmq/ca-cert.pem
new file mode 100644
index 00000000000..4a7b88f7936
--- /dev/null
+++ b/docker/test/integration/runner/misc/rabbitmq/ca-cert.pem
@@ -0,0 +1,32 @@
+-----BEGIN CERTIFICATE-----
+MIIFhTCCA22gAwIBAgIUWhfjFfbwannH3KIqITDtgcvSItMwDQYJKoZIhvcNAQEL
+BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM
+GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjMxMTE0
+MTgyODI2WhcNMzMxMTExMTgyODI2WjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK
+U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQsw
+CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAJfJegdC
+gavNGYzSdva+5QMxGvqyLwZzjophMeyEzlW/Di4KFGPho+fVlVMB/EwaTRoBRLEu
+SQusQwoFg71mGvUTOpgHzlsUz4vcVVFOiL4bJdzCWQKzdC8M8rUFoks9FMboVeSx
+jhAnKAm/NpCLpm9VYnRjEq2KEbJp7VkPAHgZEXR7VABwCFvmDcztrfcWfmXxm6IH
+o+AkF/nqdphLu7Q1yDQiF8Q8TuszuhqgQ7/1PrRcaSADrF15jJjQb05sILpGCT3e
+lxJYId5RF0+fgTIqy03bAKB53+8V8cAkowI4rvPTmcFXhcG3rkDO6lyZixHhlpKi
+PmXEzHh0kfsRjzkNBP0CKqPnu3D2iymROiPAH2cteaYe6jdD2HIjuVLk/TjX1ZFy
+DlZCrJIwj0l8A2xAfLq8Gw5RSr0a9k5TiMD5nZtfd12Vd0K82vO32vmcjO2Igddc
+VWccDDwUY/ZWV3uznkusOBrB8wba3ZsXA5hjJzs0KlTvQKPjX0y4lFMmZGbelwjt
+pR5dRNLi5XTdMPzV0mAnvJhDTFEmME19Bh6AEsjuAz3gHUdwNTbSxUS3mF/hTL9k
+v2wh5udUAOwqD1uEzqPJyG4JCJQozIDOEEZVixWqQ60b9wUHN8meqO4y9fxTdmHW
+Vo5BAF1xEJhJJb0QY/O6GahPtWqb/Mr1rtPJAgMBAAGjUzBRMB0GA1UdDgQWBBSw
+fQcOabXwX/v9F1hd2cmuIug56jAfBgNVHSMEGDAWgBSwfQcOabXwX/v9F1hd2cmu
+Iug56jAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQAms8y6RVxl
+mKSUbsU8JscYwOzcRUQJWETeIr4rtZvMHH+3vkdBU0yKxGpEm7U8J3+5oVTYPhbs
+11ZAL+DvIZ6gT6pjDvECyVox1OkjNogz843fTMbNqjuuehjSKXwpMTy5/kmT2aLj
+//nBi5UX1xo3RQ9vtmBwzZ3VFK99DFXraDOPS/yk43WV2uqdWsXCNvyEyCHmM1IB
+9FQe2EFcO6s4/N+TarhIZ8Udhj5bl8d4eDd1yEckmTD4aHJBgMII2uEwrAxR5CT1
+tCqUKutvNrkXI5PIULvmy+Lwm7PJAC7grPtUHK6anSugpljd7bFj18fHH9APiC45
+Ou4OOK1BUZogCEo7rD36UlanxQO0GEzgDCVEoEdoe0WRdc6T9b4fM8vpQqwBdf9t
+nkPB8oLCKerqqYwCiMuWm4BcRmExA7ypIkUCcluGO9/kTmdps3NqOvET9oLTjXuA
+z5TPmaK5a3poKLoxBfv6WfRTgisOnMNTsjL1R8+xuhEn5hSlE2r3wAi8Cys9Z9PV
+LhTj0SRTXILd2NW3lO8QfO0pGdjgk90GqkyUY9YjuiMVPvdUAFQsHm+0GEZEXjOD
+Bw7tLSJQ4IKhfactg/Puxd15ahcWAxeelyED+w/zVGdHYblqbvfdtiGj370KVhoj
+DL5HkdPa0IhTPqMBnmoVQ4C/WzKofXBjQQ==
+-----END CERTIFICATE-----
diff --git a/docker/test/integration/runner/misc/rabbitmq/generate_certs.sh b/docker/test/integration/runner/misc/rabbitmq/generate_certs.sh
new file mode 100755
index 00000000000..442d2fe004f
--- /dev/null
+++ b/docker/test/integration/runner/misc/rabbitmq/generate_certs.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# 1. Generate CA's private key and self-signed certificate
+openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout ca-key.pem -out ca-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=ca"
+
+# 2. Generate server's private key and certificate signing request (CSR)
+openssl req -newkey rsa:4096 -nodes -batch -keyout server-key.pem -out server-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=server"
+
+# 3. Use CA's private key to sign server's CSR and get back the signed certificate
+openssl x509 -req -days 3650 -in server-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -extfile server-ext.cnf -out server-cert.pem
diff --git a/docker/test/integration/runner/misc/rabbitmq/rabbitmq.conf b/docker/test/integration/runner/misc/rabbitmq/rabbitmq.conf
new file mode 100644
index 00000000000..307871ba589
--- /dev/null
+++ b/docker/test/integration/runner/misc/rabbitmq/rabbitmq.conf
@@ -0,0 +1,15 @@
+loopback_users.guest = false
+listeners.tcp.default = 5672
+default_pass = clickhouse
+default_user = root
+management.tcp.port = 15672
+
+log.file = /rabbitmq_logs/rabbit.log
+log.file.level = debug
+
+listeners.ssl.default = 5671
+ssl_options.verify = verify_none
+ssl_options.fail_if_no_peer_cert = false
+ssl_options.cacertfile = /etc/rabbitmq/ca_cert.pem
+ssl_options.certfile = /etc/rabbitmq/server_cert.pem
+ssl_options.keyfile = /etc/rabbitmq/server_key.pem
diff --git a/docker/test/integration/runner/misc/rabbitmq/server-cert.pem b/docker/test/integration/runner/misc/rabbitmq/server-cert.pem
new file mode 100644
index 00000000000..338de91aa0f
--- /dev/null
+++ b/docker/test/integration/runner/misc/rabbitmq/server-cert.pem
@@ -0,0 +1,33 @@
+-----BEGIN CERTIFICATE-----
+MIIFpTCCA42gAwIBAgIUJvQslezZO09XgFGQCxOM6orIsWowDQYJKoZIhvcNAQEL
+BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM
+GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjMxMTE0
+MTgyODI5WhcNMzMxMTExMTgyODI5WjBWMQswCQYDVQQGEwJSVTETMBEGA1UECAwK
+U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQ8w
+DQYDVQQDDAZzZXJ2ZXIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCe
+o/K71WdKpVpdDvhaZy6wBVhFlu7j7DhfTSYvcPpAJfExmzO8JK3vh5/yGyAO1t79
+gAjqyXLMCZKw7ajM2rez9YnGYqaFi70BlTcU2KQ8LbFEYRc3cYNDmmWIKBpwpSri
+We5SQrRLnDXqAn6T8FG5ejQ/t+1IUMrtZENB4lp8fBmEOJb5yr1TE++6EhiDBQho
+cLDWWWP8b55kyZhqP/VgmId4lvboGMRKxbiRJ6/SPr/i/pteBD8jTYfbJr6ceXov
+/p5yxIp61z5ry1anU7W3B8jTl/gj7SqtFdSnRajZ0DGJJAUKpiiJSCSlp5YB5Ub2
+eBBMHmdA5R1MuiU9TOA35nUW5wkhEOJXnBR/WCsYioVmn/+5dm6JPYiwp/TefYnr
+x9iLbb/Tyx7MnXzeyvKg781SwmnvS6Blhtr0zhAW9szZz8cVHPBqFs6PzGs/5mwE
+C+tM3Zp85aHd28nIT4NQLHdMDwVmGwmPdy4uavtYWMDhsuIyEU8hCZymiHhPnuHU
+VbmfZ8GOTIzUgQAvZb0fL1Xow2Tf6XuARnvuU9weRttg9jSOqPuUENRsFXv0mU8M
+EpQjrxry88Wfz7bBEjN5JHC16PB/Nu7zTGJ4/slThbxNv0bIONzvTBPbXrKnxw7Z
+d9WhGJI+LQxRqLTynQe6yzDwIuW9LRdBNTp7CtQRwQIDAQABo28wbTArBgNVHREE
+JDAigiBpbnRlZ3JhdGlvbi10ZXN0cy5jbGlja2hvdXNlLmNvbTAdBgNVHQ4EFgQU
+54GvBUYWvMADpTz/zglwMlaJuskwHwYDVR0jBBgwFoAUsH0HDmm18F/7/RdYXdnJ
+riLoOeowDQYJKoZIhvcNAQELBQADggIBADfNH6O6ay+xg0XmV6sR0n4j6PwL9Cnc
+VjuCmHQbpFXfMvgCdfHvbtT0Y/pG7IoeKmrrm0JPvKa2E9Ht0j6ZnowQ2m9mJk8U
+5Fd/PbC1I4KgVCw6HRSOcwqANJxOGe7RyN9PTZZ8fxzmzIR3FiQ2bXfr+LaotZOK
+aVS8F8xCOzoMvL9LFls2YpEn20p/1EATIf2MFX3j9vKfcJVOyDJV4i5BMImStFLM
+g3sdC96de/59yxt9khM0PNucU1ldNFs/kZVEcNSwGOAIgQEPwULJtDY+ZSWeROpX
+EpWndN6zQsv1pdNvLtXsDXfi4YoH9QVaA/k4aFFJ08CjSZfMYmwyPOGsf/wqT65i
+ADID2yb1A/FIIe/fM+d2gXHBVFBDmydJ1JCdCoYrEJgfWj1LO/0jLi34ZZ17Hu7F
+D33fLARF9nlLzlUiWjcQlOjNoCM48AgG/3wHk4eiSfc/3PIJDuDGDa0NdtDeKKhH
+XkP2ll4cMUH6EQ9KO1jHPmf5RokX4QJgH+ofO4U5XQFwc3lOyJzEQnED+wame7do
+R7TE4F/OXhxLqA6DFkzXe89/kSCoAF9bjzmUn/ilrg8NXKKgprgHg4DJHgvCQVVC
+34ab7Xj7msUm4D9vI+GAeUbUqnqCaWxDF6vCMT0Qq7iSVDxa/SV8TX8Vp2Zh+PSh
+4m23Did+KjLq
+-----END CERTIFICATE-----
diff --git a/docker/test/integration/runner/misc/rabbitmq/server-key.pem b/docker/test/integration/runner/misc/rabbitmq/server-key.pem
new file mode 100644
index 00000000000..92e93e8fba5
--- /dev/null
+++ b/docker/test/integration/runner/misc/rabbitmq/server-key.pem
@@ -0,0 +1,52 @@
+-----BEGIN PRIVATE KEY-----
+MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQCeo/K71WdKpVpd
+DvhaZy6wBVhFlu7j7DhfTSYvcPpAJfExmzO8JK3vh5/yGyAO1t79gAjqyXLMCZKw
+7ajM2rez9YnGYqaFi70BlTcU2KQ8LbFEYRc3cYNDmmWIKBpwpSriWe5SQrRLnDXq
+An6T8FG5ejQ/t+1IUMrtZENB4lp8fBmEOJb5yr1TE++6EhiDBQhocLDWWWP8b55k
+yZhqP/VgmId4lvboGMRKxbiRJ6/SPr/i/pteBD8jTYfbJr6ceXov/p5yxIp61z5r
+y1anU7W3B8jTl/gj7SqtFdSnRajZ0DGJJAUKpiiJSCSlp5YB5Ub2eBBMHmdA5R1M
+uiU9TOA35nUW5wkhEOJXnBR/WCsYioVmn/+5dm6JPYiwp/TefYnrx9iLbb/Tyx7M
+nXzeyvKg781SwmnvS6Blhtr0zhAW9szZz8cVHPBqFs6PzGs/5mwEC+tM3Zp85aHd
+28nIT4NQLHdMDwVmGwmPdy4uavtYWMDhsuIyEU8hCZymiHhPnuHUVbmfZ8GOTIzU
+gQAvZb0fL1Xow2Tf6XuARnvuU9weRttg9jSOqPuUENRsFXv0mU8MEpQjrxry88Wf
+z7bBEjN5JHC16PB/Nu7zTGJ4/slThbxNv0bIONzvTBPbXrKnxw7Zd9WhGJI+LQxR
+qLTynQe6yzDwIuW9LRdBNTp7CtQRwQIDAQABAoICAA0lev0T3z5xW36wueYL/PN7
+TehebKeYsMc9BngR/bsJKea5fN0PkRZzf865brusFMifLp3+WbQM6wocd8uaKHUS
+WPuGu1P/04bpDap9lYajJriK7ziaAI2+osFYyXAiT954I2bPvk8xv8oHsOOjm7Iq
+LWBGZrSCdX6cu3IfRu5f/mFVqzVCFtRmp4wc6ckZxquZAx6QQ9fsjAzAJBBSAoyh
+t0BICmgLfWDQ582no0tiBdbS0J9G7NCJIUQI/uzKqFSH3iuWm/84DSUzsZemOT3U
+uFDInDil885qK7g87pQ2S5SY1o4eXOebgeX0cFrx3CKaqocUUewv0HDGUEW3NDFs
+KhUvlJZIFgk6bMend16U6kfRCUsjLA22Rfxzanl53cGVywCeIMirnLYuEu0TsxyK
+CblBvyhcpjrGi7FQskzR+J9LpZPnmtn6TAb7JCAALRVHcAGKhGeh613SjPUfkWb0
+KpDps08x8MWGEAALuHbOK0nMLFm+PuMt7+krqCeJET+XM44GT+6ZstrDv0RufxUN
++pkLW7AsVZoXcFvaOWjuyBvX/f6UHCSfueo0mB3H80WoftDIfdhM+AI7/oBTYCBx
+Z8BtW+g7Eq3pOUg/Um7S7Z2bybBWE14kpi95gRf3upEYPqHJUpJPdu20lk24iAt9
+LCXF4AjZBIdAuyJrYOJBAoIBAQDd/Bm14WvmBOablGLn6hmohi6M75D+/eQanlg9
+eJhXJUVd8FzOTjKi70EHWvkqswenNDbe/WGtImqG+9G+N/ol2qhi5xVSQ2XQmcVQ
+U+k15Bzm9xKM0OqsStFvRgP1Cy6Ms3/jxr5JEEwUepmjvWTDGTlhTQASA/D7Uh2q
+5HpPiHEVm4g5eTAYWeAbI6cGwVS0L4y6xkFGde37Kh2P8ZodWB+d3fglVu4Ok9Nf
+wE2f8MK2ewQ0SbF/Nj2WjlVomvOvOJG/2CDLuiH/vc4YUvLAm8pNwvsmgtSh1Okt
+E/HfXegrlPPEgw6owqoQFt+aGUITgEhiwEVAcYS0pXzzkQX5AoIBAQC28wJ8ueKr
+fINpJM2pSc7WRDFduP5yGsRreSLBXLKMbvOlIVb3PaWp11Cg3+X5O90bPXYJ9mBI
+WGR0g14/VD8edxs2D5TUZcP4/vKXGHaWRY9Z4A3jVpjzAxAaviNDHJ08tLXEMXZQ
+lbA7dX8z6lpoQfwnPzjBwB01mVegwXPeIwIIfT/FmAiGzvSnAMXBGSGWRRdzof0M
+/vPFbgllcQmM4AnEGcErCgFRpwcssO87T2jnvf6QVE5JCcnUcGIli1ThxCU9TRZM
+5s6R7Nvk3/UjwcpRcqMtnGpTT2QXSnRwvWUfM+bKTwaxz4PjqKpgIc11kwJAjlxk
+4CxYf1mDGLwJAoIBAGFJRTNS8ejDKRXyOE6PaGNVOz2FGLTILJoF34JBQfKfYQFE
+gEfiOYry9Dr3AdBW2fnLhmi//3jTZoB2CHwnKDhC1h1STSPaadq8KZ+ExuZZbNlE
+WxrfzJlpyNPNiZpxJht/54K57Vc0D0PCX2dFb82ZVm5wQqGinJBocpwcugX1NCpW
+GaOmmw9xBCigvWjWffriA/kvPhhVQtEaqg4Vwoctwd18FG645Gf7HV4Pd3WrHIrA
+6xzHV0T7To6XHpNTpYybbDT50ZW3o4LjellqsPz8yfK+izdbizjJiM+6t/w+uauw
+Ag2Tqm8HsWSPwbtVaoIFbLPqs+8EUTaieFp+qnECggEAVuaTdd9uFfrtCNKchh8z
+CoAV2uj2pAim6E3//k0j2qURQozVnFdCC6zk9aWkvYB8BGZrXUwUbAjgnp+P8xD3
+cmctG77G+STls66WWMMcAUFFWHGe5y/JMxVvXuSWJ1i+L4m/FVRRWPHhZjznkSdu
+jjtZpOLY+N9igIU4JHn/qbKDUrj7w8X1tuMzPuiVBqYDWDe1bg2x/6xS6qLb/71z
+xeDdgrKhGOqFud1XARmCaW/M6tdKxg/lp7fokOpZFHBcf2kGL1ogj6LK2HHj+ZGQ
+Bc4VZh7H9/BmaPA7IP0S1kKAeBPVOp/TFD737Pm/BC7KQ2DzHusAZEI/jkHfqO/k
+0QKCAQEAuiYLn9iLgk4uQO9oaSBGWKrJsR2L2dqI7IWU0X9xJlsQrJKcEeWg4LXt
+djLsz0HrxZV/c+Pnh79hmFlBoEmH+hz32D/xd+/qrwwAcMkHAwMbznJu0IIuW2O9
+Uzma++7SvVmr9H0DkUwXFP3jn1A2n3uuI4czqtQ8N7GiH0UAWR5CsIP7azHvZTSj
+s4Fzf8rTE6pNqVgQXjrVbI9H/h0uPP4alJbhnPba9mgB1cGmfBEnPkKgYNqSZse+
+95G2TlcK74sKBUSdBKqYBZ4ZUeTXV974Nva9guE9vzDQt1Cj6k0HWISVPUshPzIh
+qrdHdxcM6yhA0Z0Gu6zj+Zsy4lU8gA==
+-----END PRIVATE KEY-----
diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 983e52ca294..837263bd70f 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -102,18 +102,32 @@ def rabbitmq_setup_teardown():
 # Tests
 
 
-def test_rabbitmq_select(rabbitmq_cluster):
+@pytest.mark.parametrize(
+    "secure",
+    [
+        pytest.param(0),
+        pytest.param(1),
+    ],
+)
+def test_rabbitmq_select(rabbitmq_cluster, secure):
+    port = 5672
+    if secure:
+        port = 5671
+
     instance.query(
         """
         CREATE TABLE test.rabbitmq (key UInt64, value UInt64)
             ENGINE = RabbitMQ
-            SETTINGS rabbitmq_host_port = '{}:5672',
+            SETTINGS rabbitmq_host_port = '{}:{}',
                      rabbitmq_exchange_name = 'select',
                      rabbitmq_commit_on_select = 1,
                      rabbitmq_format = 'JSONEachRow',
-                     rabbitmq_row_delimiter = '\\n';
+                     rabbitmq_row_delimiter = '\\n',
+                     rabbitmq_secure = {};
         """.format(
-            rabbitmq_cluster.rabbitmq_host
+            rabbitmq_cluster.rabbitmq_host,
+            port,
+            secure
         )
     )
 
@@ -3442,18 +3456,18 @@ def test_rabbitmq_handle_error_mode_stream(rabbitmq_cluster):
                      rabbitmq_row_delimiter = '\\n',
                      rabbitmq_handle_error_mode = 'stream';
 
-        
+
         CREATE TABLE test.errors (error Nullable(String), broken_message Nullable(String))
              ENGINE = MergeTree()
              ORDER BY tuple();
 
         CREATE MATERIALIZED VIEW test.errors_view TO test.errors AS
                 SELECT _error as error, _raw_message as broken_message FROM test.rabbit where not isNull(_error);
-                
+
         CREATE TABLE test.data (key UInt64, value UInt64)
              ENGINE = MergeTree()
              ORDER BY key;
-        
+
         CREATE MATERIALIZED VIEW test.view TO test.data AS
                 SELECT key, value FROM test.rabbit;
         """.format(

From 369aaef92fb280c4b7ae4e5e04d0da2930718e3b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 14 Nov 2023 20:26:32 +0000
Subject: [PATCH 0551/1097] Automatic style fix

---
 tests/integration/test_storage_rabbitmq/test.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 837263bd70f..adb7f59769a 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -125,9 +125,7 @@ def test_rabbitmq_select(rabbitmq_cluster, secure):
                      rabbitmq_row_delimiter = '\\n',
                      rabbitmq_secure = {};
         """.format(
-            rabbitmq_cluster.rabbitmq_host,
-            port,
-            secure
+            rabbitmq_cluster.rabbitmq_host, port, secure
         )
     )
 

From 07452b613a6b147c53530d6325fc9038ce58f675 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 14 Nov 2023 20:35:54 +0000
Subject: [PATCH 0552/1097] Fix certificate's file names

---
 docker/test/integration/runner/misc/rabbitmq/rabbitmq.conf  | 6 +++---
 docker/test/integration/runner/misc/rabbitmq/server-ext.cnf | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 docker/test/integration/runner/misc/rabbitmq/server-ext.cnf

diff --git a/docker/test/integration/runner/misc/rabbitmq/rabbitmq.conf b/docker/test/integration/runner/misc/rabbitmq/rabbitmq.conf
index 307871ba589..258a282907a 100644
--- a/docker/test/integration/runner/misc/rabbitmq/rabbitmq.conf
+++ b/docker/test/integration/runner/misc/rabbitmq/rabbitmq.conf
@@ -10,6 +10,6 @@ log.file.level = debug
 listeners.ssl.default = 5671
 ssl_options.verify = verify_none
 ssl_options.fail_if_no_peer_cert = false
-ssl_options.cacertfile = /etc/rabbitmq/ca_cert.pem
-ssl_options.certfile = /etc/rabbitmq/server_cert.pem
-ssl_options.keyfile = /etc/rabbitmq/server_key.pem
+ssl_options.cacertfile = /etc/rabbitmq/ca-cert.pem
+ssl_options.certfile = /etc/rabbitmq/server-cert.pem
+ssl_options.keyfile = /etc/rabbitmq/server-key.pem
diff --git a/docker/test/integration/runner/misc/rabbitmq/server-ext.cnf b/docker/test/integration/runner/misc/rabbitmq/server-ext.cnf
new file mode 100644
index 00000000000..49859873222
--- /dev/null
+++ b/docker/test/integration/runner/misc/rabbitmq/server-ext.cnf
@@ -0,0 +1 @@
+subjectAltName=DNS:integration-tests.clickhouse.com

From 3d95ac5f2a3cadda0eb7571b0a4bf314f63ca139 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Tue, 14 Nov 2023 21:52:07 +0100
Subject: [PATCH 0553/1097] Fix flaky
 test_replicated_merge_tree_encryption_codec.

---
 .../test_replicated_merge_tree_encryption_codec/test.py         | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/test.py b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
index a50f8341ee7..d2dbc7c5466 100644
--- a/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
+++ b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
@@ -92,6 +92,8 @@ def test_different_keys():
     create_table()
 
     insert_data()
+    node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
+
     assert "BAD_DECRYPT" in node1.query_and_get_error("SELECT * FROM tbl")
     assert "BAD_DECRYPT" in node2.query_and_get_error("SELECT * FROM tbl")
 

From f04db1ba4c26d679acf950ab239bf7bfd7506bb6 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 14 Nov 2023 21:06:49 +0000
Subject: [PATCH 0554/1097] Fix tests

---
 tests/queries/0_stateless/01933_invalid_date.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01933_invalid_date.sql b/tests/queries/0_stateless/01933_invalid_date.sql
index aac09c99e60..b9ea9319aea 100644
--- a/tests/queries/0_stateless/01933_invalid_date.sql
+++ b/tests/queries/0_stateless/01933_invalid_date.sql
@@ -1,4 +1,4 @@
-SELECT toDate('07-08-2019'); -- { serverError 6 }
+SELECT toDate('07-08-2019'); -- { serverError 38 }
 SELECT toDate('2019-0708'); -- { serverError 38 }
 SELECT toDate('201907-08'); -- { serverError 38 }
 SELECT toDate('2019^7^8');
@@ -6,5 +6,5 @@ SELECT toDate('2019^7^8');
 CREATE TEMPORARY TABLE test (d Date);
 INSERT INTO test VALUES ('2018-01-01');
 
-SELECT * FROM test WHERE d >= '07-08-2019'; -- { serverError 53 }
+SELECT * FROM test WHERE d >= '07-08-2019'; -- { serverError 38 }
 SELECT * FROM test WHERE d >= '2019-07-08';

From 58b9bde0faeb87bc3c616206fa550ab5ccb03195 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 14 Nov 2023 21:15:30 +0000
Subject: [PATCH 0555/1097] Better docs for virtual columns in
 Kafka/RabbitMQ/NATS/FileLog

---
 .../table-engines/integrations/kafka.md       | 20 +++++++++----------
 .../table-engines/integrations/nats.md        |  8 ++++----
 .../table-engines/integrations/rabbitmq.md    | 18 ++++++++---------
 .../engines/table-engines/special/filelog.md  | 10 +++++-----
 4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md
index f16f9692bb6..de1a090d491 100644
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@@ -238,19 +238,19 @@ Example:
 
 ## Virtual Columns {#virtual-columns}
 
-- `_topic` — Kafka topic.
-- `_key` — Key of the message.
-- `_offset` — Offset of the message.
-- `_timestamp` — Timestamp of the message.
-- `_timestamp_ms` — Timestamp in milliseconds of the message.
-- `_partition` — Partition of Kafka topic.
-- `_headers.name` — Array of message's headers keys.
-- `_headers.value` — Array of message's headers values.
+- `_topic` — Kafka topic. Data type: `LowCardinality(String)`.
+- `_key` — Key of the message. Data type: `String`.
+- `_offset` — Offset of the message. Data type: `UInt64`.
+- `_timestamp` — Timestamp of the message Data type: `Nullable(DateTime)`.
+- `_timestamp_ms` — Timestamp in milliseconds of the message. Data type: `Nullable(DateTime64(3))`.
+- `_partition` — Partition of Kafka topic. Data type: `UInt64`.
+- `_headers.name` — Array of message's headers keys. Data type: `Array(String)`.
+- `_headers.value` — Array of message's headers values. Data type: `Array(String)`.
 
 Additional virtual columns when `kafka_handle_error_mode='stream'`:
 
-- `_raw_message` - Raw message that couldn't be parsed successfully.
-- `_error` - Exception message happened during failed parsing.
+- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `String`.
+- `_error` - Exception message happened during failed parsing. Data type: `String`.
 
 Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
 
diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md
index 5819a8e95c8..37a41159fab 100644
--- a/docs/en/engines/table-engines/integrations/nats.md
+++ b/docs/en/engines/table-engines/integrations/nats.md
@@ -163,14 +163,14 @@ If you want to change the target table by using `ALTER`, we recommend disabling
 
 ## Virtual Columns {#virtual-columns}
 
-- `_subject` - NATS message subject.
+- `_subject` - NATS message subject. Data type: `String`.
 
 Additional virtual columns when `kafka_handle_error_mode='stream'`:
 
-- `_raw_message` - Raw message that couldn't be parsed successfully.
-- `_error` - Exception message happened during failed parsing.
+- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `Nullable(String)`.
+- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.
 
-Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
+Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
 
 
 ## Data formats support {#data-formats-support}
diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index 10e7146ff85..53c6e089a70 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -184,19 +184,19 @@ Example:
 
 ## Virtual Columns {#virtual-columns}
 
-- `_exchange_name` - RabbitMQ exchange name.
-- `_channel_id` - ChannelID, on which consumer, who received the message, was declared.
-- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel.
-- `_redelivered` - `redelivered` flag of the message.
-- `_message_id` - messageID of the received message; non-empty if was set, when message was published.
-- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published.
+- `_exchange_name` - RabbitMQ exchange name. Data type: `String`.
+- `_channel_id` - ChannelID, on which consumer, who received the message, was declared. Data type: `String`.
+- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel. Data type: `UInt64`.
+- `_redelivered` - `redelivered` flag of the message. Data type: `UInt8`.
+- `_message_id` - messageID of the received message; non-empty if was set, when message was published. Data type: `String`.
+- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published. Data type: `UInt64`.
 
 Additional virtual columns when `kafka_handle_error_mode='stream'`:
 
-- `_raw_message` - Raw message that couldn't be parsed successfully.
-- `_error` - Exception message happened during failed parsing.
+- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `Nullable(String)`.
+- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.
 
-Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
+Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
 
 ## Data formats support {#data-formats-support}
 
diff --git a/docs/en/engines/table-engines/special/filelog.md b/docs/en/engines/table-engines/special/filelog.md
index 0c2a2601fc9..eef9a17444e 100644
--- a/docs/en/engines/table-engines/special/filelog.md
+++ b/docs/en/engines/table-engines/special/filelog.md
@@ -94,12 +94,12 @@ If you want to change the target table by using `ALTER`, we recommend disabling
 
 ## Virtual Columns {#virtual-columns}
 
-- `_filename` - Name of the log file.
-- `_offset` - Offset in the log file.
+- `_filename` - Name of the log file. Data type: `LowCardinality(String)`.
+- `_offset` - Offset in the log file. Data type: `UInt64`.
 
 Additional virtual columns when `kafka_handle_error_mode='stream'`:
 
-- `_raw_record` - Raw record that couldn't be parsed successfully.
-- `_error` - Exception message happened during failed parsing.
+- `_raw_record` - Raw record that couldn't be parsed successfully. Data type: `Nullable(String)`.
+- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.
 
-Note: `_raw_record` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
+Note: `_raw_record` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.

From bd66ac483c9dcd770ea1182fd6f8d11f54ef04d2 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Tue, 14 Nov 2023 23:03:06 +0000
Subject: [PATCH 0556/1097] Load queries for dashboard page from new
 `system.dashboards` table

---
 programs/server/dashboard.html                | 338 +++++++-----------
 .../System/StorageSystemDashboards.cpp        | 226 ++++++++++++
 src/Storages/System/StorageSystemDashboards.h |  28 ++
 src/Storages/System/attachSystemTables.cpp    |   2 +
 4 files changed, 390 insertions(+), 204 deletions(-)
 create mode 100644 src/Storages/System/StorageSystemDashboards.cpp
 create mode 100644 src/Storages/System/StorageSystemDashboards.h

diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html
index d4fa1626873..f3096d72e14 100644
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@@ -202,6 +202,10 @@
             margin-right: 0.25rem;
         }
 
+        #chart-params .param {
+            width: 6%;
+        }
+
         input {
             font-family: Liberation Sans, DejaVu Sans, sans-serif, Noto Color Emoji, Apple Color Emoji, Segoe UI Emoji;
             outline: none;
@@ -240,14 +244,13 @@
             font-weight: bold;
             user-select: none;
             cursor: pointer;
-            margin-bottom: 1rem;
         }
 
         #run:hover {
             filter: contrast(125%);
         }
 
-        #add, #reload, #edit {
+        #add, #reload, #edit, #search {
             padding: 0.25rem 0.5rem;
             text-align: center;
             font-weight: bold;
@@ -264,10 +267,16 @@
             height: 3ex;
         }
 
-        #add:hover, #reload:hover, #edit:hover {
+        #add:hover, #reload:hover, #edit:hover, #search:hover {
             background: var(--button-background-color);
         }
 
+        #search-query {
+            float: right;
+            width: 36%;
+        }
+
+
         #auth-error {
             align-self: center;
             width: 60%;
@@ -445,6 +454,7 @@
             <input id="edit" type="button" value="✎" style="display: none;">
             <input id="add" type="button" value="Add chart" style="display: none;">
             <input id="reload" type="button" value="Reload">
+            <span id="search-span" class="nowrap" style="display: none;"><input id="search" type="button" value="🔎" title="Run query to obtain list of charts from ClickHouse"><input id="search-query" name="search" type="text" spellcheck="false"></span>
             <div id="chart-params"></div>
         </div>
     </form>
@@ -501,151 +511,10 @@ const errorMessages = [
     }
 ]
 
-/// This is just a demo configuration of the dashboard.
 
-let queries = [
-    {
-      "title": "Queries/second",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_Query)
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "CPU Usage (cores)",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUVirtualTimeMicroseconds) / 1000000
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Queries Running",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_Query)
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Merges Running",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_Merge)
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Selected Bytes/second",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_SelectedBytes)
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "IO Wait",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSIOWaitMicroseconds) / 1000000
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "CPU Wait",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUWaitMicroseconds) / 1000000
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "OS CPU Usage (Userspace)",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
-FROM system.asynchronous_metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-AND metric = 'OSUserTimeNormalized'
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "OS CPU Usage (Kernel)",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
-FROM system.asynchronous_metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-AND metric = 'OSSystemTimeNormalized'
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Read From Disk",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSReadBytes)
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Read From Filesystem",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSReadChars)
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Memory (tracked)",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_MemoryTracking)
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Load Average (15 minutes)",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
-FROM system.asynchronous_metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-AND metric = 'LoadAverage15'
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Selected Rows/second",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_SelectedRows)
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Inserted Rows/second",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_InsertedRows)
-FROM system.metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Total MergeTree Parts",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
-FROM system.asynchronous_metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-AND metric = 'TotalPartsOfMergeTreeTables'
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    },
-    {
-      "title": "Max Parts For Partition",
-      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, max(value)
-FROM system.asynchronous_metric_log
-WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
-AND metric = 'MaxPartCountForPartition'
-GROUP BY t
-ORDER BY t WITH FILL STEP {rounding:UInt32}`
-    }
-];
+/// Query to fill `queries` list for the dashboard
+let search_query = `SELECT title, query FROM system.dashboards WHERE dashboard = 'overview'`;
+let queries = [];
 
 /// Query parameters with predefined default values.
 /// All other parameters will be automatically found in the queries.
@@ -683,7 +552,7 @@ function findParamsInQuery(query, new_params) {
 }
 
 function findParamsInQueries() {
-    let new_params = {}
+    let new_params = {};
     queries.forEach(q => findParamsInQuery(q.query, new_params));
     params = new_params;
 }
@@ -958,7 +827,7 @@ function insertChart(i) {
 
     charts.appendChild(chart);
     return {chart: chart, textarea: query_editor_textarea};
-};
+}
 
 document.getElementById('add').addEventListener('click', e => {
     queries.push({ title: '', query: '' });
@@ -972,9 +841,12 @@ document.getElementById('add').addEventListener('click', e => {
 });
 
 document.getElementById('reload').addEventListener('click', e => {
-    reloadAll();
+    reloadAll(false);
 });
 
+document.getElementById('search').addEventListener('click', e => {
+    reloadAll(true);
+});
 
 let mass_editor_active = false;
 
@@ -1085,13 +957,8 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
     };
 }
 
-async function draw(idx, chart, url_params, query) {
-    if (plots[idx]) {
-        plots[idx].destroy();
-        plots[idx] = null;
-    }
-
-    host = document.getElementById('url').value;
+async function doFetch(query, url_params = '') {
+    host = document.getElementById('url').value || host;
     user = document.getElementById('user').value;
     password = document.getElementById('password').value;
 
@@ -1135,6 +1002,17 @@ async function draw(idx, chart, url_params, query) {
         }
     }
 
+    return {data, error};
+}
+
+async function draw(idx, chart, url_params, query) {
+    if (plots[idx]) {
+        plots[idx].destroy();
+        plots[idx] = null;
+    }
+
+    let {data, error} = await doFetch(query, url_params);
+
     if (!error) {
         if (!Array.isArray(data)) {
             error = "Query should return an array.";
@@ -1151,7 +1029,7 @@ async function draw(idx, chart, url_params, query) {
     let title_div = chart.querySelector('.title');
     if (error) {
         error_div.firstChild.data = error;
-        title_div.style.display = 'none'; 
+        title_div.style.display = 'none';
         error_div.style.display = 'block';
         return false;
     } else {
@@ -1194,7 +1072,7 @@ async function draw(idx, chart, url_params, query) {
     /// Set title
     const title = queries[idx] && queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : '';
     chart.querySelector('.title').firstChild.data = title;
-    return true
+    return true;
 }
 
 function showAuthError(message) {
@@ -1203,6 +1081,7 @@ function showAuthError(message) {
     charts.style.opacity = '0';
     document.getElementById('add').style.display = 'none';
     document.getElementById('edit').style.display = 'none';
+    document.getElementById('search-span').style.display = 'none';
 
     const authError = document.getElementById('auth-error');
     authError.textContent = message;
@@ -1239,20 +1118,20 @@ async function drawAll() {
         if (firstLoad) {
             firstLoad = false;
         } else {
-            enableReloadButton();
-            enableRunButton();
+            enableButtons();
         }
         if (results.includes(true)) {
             const element = document.querySelector('.inputs');
             element.classList.remove('unconnected');
             document.getElementById('add').style.display = 'inline-block';
             document.getElementById('edit').style.display = 'inline-block';
-        } 
+            document.getElementById('search-span').style.display = '';
+        }
         else {
             const charts = document.getElementById('charts')
             charts.style.height = '0px';
         }
-    })
+    });
 }
 
 function resize() {
@@ -1266,58 +1145,98 @@ function resize() {
 
 new ResizeObserver(resize).observe(document.body);
 
-function disableReloadButton() {
-    const reloadButton = document.getElementById('reload')
-    reloadButton.value = 'Reloading…'
-    reloadButton.disabled = true
-    reloadButton.classList.add('disabled')
+function disableButtons() {
+    const reloadButton = document.getElementById('reload');
+    reloadButton.value = 'Reloading…';
+    reloadButton.disabled = true;
+    reloadButton.classList.add('disabled');
+
+    const runButton = document.getElementById('run');
+    runButton.value = 'Reloading…';
+    runButton.disabled = true;
+    runButton.classList.add('disabled');
+
+    const searchButton = document.getElementById('search');
+    searchButton.value = '…';
+    searchButton.disabled = true;
+    searchButton.classList.add('disabled');
 }
 
-function disableRunButton() {
-    const runButton = document.getElementById('run')
-    runButton.value = 'Reloading…'
-    runButton.disabled = true
-    runButton.classList.add('disabled')
+function enableButtons() {
+    const reloadButton = document.getElementById('reload');
+    reloadButton.value = 'Reload';
+    reloadButton.disabled = false;
+    reloadButton.classList.remove('disabled');
+
+    const runButton = document.getElementById('run');
+    runButton.value = 'Ok';
+    runButton.disabled = false;
+    runButton.classList.remove('disabled');
+
+    const searchButton = document.getElementById('search');
+    searchButton.value = '🔎';
+    searchButton.disabled = false;
+    searchButton.classList.remove('disabled');
 }
 
-function enableReloadButton() {
-    const reloadButton = document.getElementById('reload')
-    reloadButton.value = 'Reload'
-    reloadButton.disabled = false
-    reloadButton.classList.remove('disabled')
-}
-
-function enableRunButton() {
-    const runButton = document.getElementById('run')
-    runButton.value = 'Ok'
-    runButton.disabled = false
-    runButton.classList.remove('disabled')
-}
-
-function reloadAll() {
-    updateParams();
-    drawAll();
-    saveState();
-    disableReloadButton();
-    disableRunButton();
+async function reloadAll(do_search) {
+    disableButtons();
+    try {
+        if (do_search) {
+            await searchQueries(document.getElementById('search-query').value);
+        }
+        updateParams();
+        await drawAll();
+        saveState();
+    } catch (e) {
+        showAuthError(e.toString());
+    }
+    enableButtons();
 }
 
 document.getElementById('params').onsubmit = function(event) {
-    reloadAll();
+    let do_search = document.activeElement === document.getElementById('search-query');
+    reloadAll(do_search);
     event.preventDefault();
 }
 
 
 function saveState() {
-    const state = { host: host, user: user, queries: queries, params: params };
+    const state = { host: host, user: user, queries: queries, params: params, search_query: search_query };
     history.pushState(state, '',
         window.location.pathname + (window.location.search || '') + '#' + btoa(JSON.stringify(state)));
 }
 
+async function searchQueries(query) {
+    search_query = query;
+
+    let {data, error} = await doFetch(search_query);
+    if (error) {
+        throw new Error(error);
+    }
+    if (!Array.isArray(data)) {
+        throw new Error("Search query should return an array.");
+    } else if (data.length == 0) {
+        throw new Error("Search query returned empty result.");
+    } else if (data.length != 2) {
+        throw new Error("Search query should return exactly two columns: title and query.");
+    } else if (!Array.isArray(data[0]) || !Array.isArray(data[1]) || data[0].length != data[1].length) {
+        throw new Error("Wrong data format of the search query.");
+    }
+
+    queries = [];
+    for (let i = 0; i < data[0].length; i++) {
+        queries.push({title: data[0][i], query: data[1][i]});
+    }
+
+    regenerate();
+}
+
 function regenerate() {
     document.getElementById('url').value = host;
     document.getElementById('user').value = user;
     document.getElementById('password').value = password;
+    document.getElementById('search-query').value = search_query;
 
     findParamsInQueries();
     buildParams();
@@ -1336,7 +1255,7 @@ function regenerate() {
 
 window.onpopstate = function(event) {
     if (!event.state) { return; }
-    ({host, user, queries, params} = event.state);
+    ({host, user, queries, params, search_query} = event.state);
 
     regenerate();
     drawAll();
@@ -1344,19 +1263,30 @@ window.onpopstate = function(event) {
 
 if (window.location.hash) {
     try {
-        ({host, user, queries, params} = JSON.parse(atob(window.location.hash.substring(1))));
+        ({host, user, queries, params, search_query} = JSON.parse(atob(window.location.hash.substring(1))));
     } catch {}
 }
 
-regenerate();
-
-let new_theme = window.localStorage.getItem('theme');
-if (new_theme && new_theme != theme) {
-    setTheme(new_theme);
-} else {
-    drawAll();
+async function start() {
+    try {
+        if (queries.length == 0) {
+            await searchQueries(search_query);
+        } else {
+            regenerate();
+        }
+        let new_theme = window.localStorage.getItem('theme');
+        if (new_theme && new_theme != theme) {
+            setTheme(new_theme);
+        } else {
+            drawAll();
+        }
+    } catch (e) {
+        showAuthError(e.toString());
+    }
 }
 
+start();
+
 </script>
 </body>
 </html>
diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp
new file mode 100644
index 00000000000..3e22a6c0664
--- /dev/null
+++ b/src/Storages/System/StorageSystemDashboards.cpp
@@ -0,0 +1,226 @@
+#include <string_view>
+#include <Storages/System/StorageSystemDashboards.h>
+#include <Common/StringUtils/StringUtils.h>
+
+namespace DB
+{
+
+NamesAndTypesList StorageSystemDashboards::getNamesAndTypes()
+{
+    return {
+        {"dashboard", std::make_shared<DataTypeString>()},
+        {"title", std::make_shared<DataTypeString>()},
+        {"query", std::make_shared<DataTypeString>()},
+    };
+}
+
+String trim(const char * text)
+{
+    std::string_view view(text);
+    ::trim(view, '\n');
+    return String(view);
+}
+
+void StorageSystemDashboards::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const
+{
+    static const std::vector<std::map<String, String>> dashboards
+    {
+        {
+            { "dashboard", "overview" },
+            { "title", "Queries/second" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_Query)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "CPU Usage (cores)" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUVirtualTimeMicroseconds) / 1000000
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Queries Running" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_Query)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Merges Running" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_Merge)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Selected Bytes/second" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_SelectedBytes)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "IO Wait" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSIOWaitMicroseconds) / 1000000
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "CPU Wait" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUWaitMicroseconds) / 1000000
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "OS CPU Usage (Userspace)" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'OSUserTimeNormalized'
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "OS CPU Usage (Kernel)" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'OSSystemTimeNormalized'
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Read From Disk" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSReadBytes)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Read From Filesystem" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSReadChars)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Memory (tracked)" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_MemoryTracking)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Load Average (15 minutes)" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'LoadAverage15'
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Selected Rows/second" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_SelectedRows)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Inserted Rows/second" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_InsertedRows)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Total MergeTree Parts" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'TotalPartsOfMergeTreeTables'
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        },
+        {
+            { "dashboard", "overview" },
+            { "title", "Max Parts For Partition" },
+            { "query", trim(R"EOQ(
+SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, max(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'MaxPartCountForPartition'
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+)EOQ") }
+        }
+    };
+
+    for (const auto & row : dashboards)
+    {
+        size_t i = 0;
+        res_columns[i++]->insert(row.at("dashboard"));
+        res_columns[i++]->insert(row.at("title"));
+        res_columns[i++]->insert(row.at("query"));
+    }
+}
+
+}
diff --git a/src/Storages/System/StorageSystemDashboards.h b/src/Storages/System/StorageSystemDashboards.h
new file mode 100644
index 00000000000..cbd7c5bbf57
--- /dev/null
+++ b/src/Storages/System/StorageSystemDashboards.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeArray.h>
+#include <Storages/System/IStorageSystemOneBlock.h>
+
+
+namespace DB
+{
+
+class Context;
+
+
+class StorageSystemDashboards final : public IStorageSystemOneBlock<StorageSystemDashboards>
+{
+public:
+    std::string getName() const override { return "SystemDashboards"; }
+
+    static NamesAndTypesList getNamesAndTypes();
+
+protected:
+    using IStorageSystemOneBlock::IStorageSystemOneBlock;
+
+    void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
+};
+
+}
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index 6d875208fbb..1c4d1e16c92 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -85,6 +85,7 @@
 #include <Storages/System/StorageSystemJemalloc.h>
 #include <Storages/System/StorageSystemScheduler.h>
 #include <Storages/System/StorageSystemS3Queue.h>
+#include <Storages/System/StorageSystemDashboards.h>
 
 #if defined(__ELF__) && !defined(OS_FREEBSD)
 #include <Storages/System/StorageSystemSymbols.h>
@@ -205,6 +206,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b
     attach<StorageSystemUserProcesses>(context, system_database, "user_processes");
     attach<StorageSystemJemallocBins>(context, system_database, "jemalloc_bins");
     attach<StorageSystemS3Queue>(context, system_database, "s3queue");
+    attach<StorageSystemDashboards>(context, system_database, "dashboards");
 
     if (has_zookeeper)
     {

From 4efef5b39f85af9128f61f8b47a7ebab8d935e6c Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Wed, 15 Nov 2023 02:20:44 +0100
Subject: [PATCH 0557/1097] Disable test with Replicated database

---
 tests/queries/0_stateless/02911_row_policy_on_cluster.sql | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02911_row_policy_on_cluster.sql b/tests/queries/0_stateless/02911_row_policy_on_cluster.sql
index 6e94e301733..0c60bb5a6b9 100644
--- a/tests/queries/0_stateless/02911_row_policy_on_cluster.sql
+++ b/tests/queries/0_stateless/02911_row_policy_on_cluster.sql
@@ -1,4 +1,5 @@
--- Tags: no-parallel, zookeeper
+-- Tags: no-parallel, zookeeper, no-replicated-database
+-- Tag no-replicated-database: distributed_ddl_output_mode is none
 
 DROP ROW POLICY IF EXISTS 02911_rowpolicy ON default.* ON CLUSTER test_shard_localhost;
 DROP USER IF EXISTS 02911_user ON CLUSTER test_shard_localhost;

From ed7d1dbb82db73892b7cb765cadc293874b3d29b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 15 Nov 2023 07:43:55 +0100
Subject: [PATCH 0558/1097] Pick up linker's trace files

---
 utils/prepare-time-trace/prepare-time-trace.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/prepare-time-trace/prepare-time-trace.sh b/utils/prepare-time-trace/prepare-time-trace.sh
index 7e585db2000..5f4aad4c0b9 100755
--- a/utils/prepare-time-trace/prepare-time-trace.sh
+++ b/utils/prepare-time-trace/prepare-time-trace.sh
@@ -44,9 +44,9 @@ ORDER BY (date, file, name, args_name);
 INPUT_DIR=$1
 OUTPUT_DIR=$2
 
-find "$INPUT_DIR" -name '*.json' | grep -P '\.(c|cpp|cc|cxx)\.json$' | xargs -P $(nproc) -I{} bash -c "
+find "$INPUT_DIR" -name '*.json' -or -name '*.time-trace' | grep -P '\.(c|cpp|cc|cxx)\.json|\.time-trace$' | xargs -P $(nproc) -I{} bash -c "
 
-    ORIGINAL_FILENAME=\$(echo '{}' | sed -r -e 's!\.json\$!!; s!/CMakeFiles/[^/]+\.dir!!')
+    ORIGINAL_FILENAME=\$(echo '{}' | sed -r -e 's!\.(json|time-trace)\$!!; s!/CMakeFiles/[^/]+\.dir!!')
     LIBRARY_NAME=\$(echo '{}' | sed -r -e 's!^.*/CMakeFiles/([^/]+)\.dir/.*\$!\1!')
     START_TIME=\$(jq '.beginningOfTime' '{}')
 

From 5309dc05ef44da063ad095b91776dbfb3e849fb0 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 15 Nov 2023 07:42:19 +0000
Subject: [PATCH 0559/1097] Fix test

---
 tests/integration/test_keeper_four_word_command/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index 71501133ae7..84dd2a2fd93 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -287,7 +287,7 @@ def test_cmd_conf(started_cluster):
         assert result["quorum_reads"] == "false"
         assert result["force_sync"] == "true"
 
-        assert result["compress_logs"] == "true"
+        assert result["compress_logs"] == "false"
         assert result["compress_snapshots_with_zstd_format"] == "true"
         assert result["configuration_change_tries_count"] == "20"
 

From aea43bdfadd74e8fa81a8f1f4a6cc4a3fd8704c2 Mon Sep 17 00:00:00 2001
From: Mikhail Koviazin <mikhail.koviazin@aiven.io>
Date: Wed, 15 Nov 2023 10:06:00 +0200
Subject: [PATCH 0560/1097] fix typo in ClickHouseDictionarySource

hostnmae -> hostname
---
 src/Dictionaries/ClickHouseDictionarySource.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp
index 92fae2bc495..b788124d22c 100644
--- a/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/src/Dictionaries/ClickHouseDictionarySource.cpp
@@ -222,7 +222,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
         {
             validateNamedCollection(
                 *named_collection, {}, ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>{
-                    "secure", "host", "hostnmae", "port", "user", "username", "password", "quota_key", "name",
+                    "secure", "host", "hostname", "port", "user", "username", "password", "quota_key", "name",
                     "db", "database", "table","query", "where", "invalidate_query", "update_field", "update_lag"});
 
             const auto secure = named_collection->getOrDefault("secure", false);

From 9d965368a2a337ef6cc2566462670e26fc9e2799 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 15 Nov 2023 08:36:24 +0000
Subject: [PATCH 0561/1097] Fix build

---
 src/Backups/RestoreCoordinationRemote.cpp | 2 +-
 src/Common/escapeForFileName.cpp          | 5 -----
 src/Common/escapeForFileName.h            | 1 -
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp
index b54231afcf7..60a83c580f0 100644
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@@ -245,7 +245,7 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String &
             with_retries.renewZooKeeper(zk);
 
             /// we need to remove leading '/' from root_zk_path
-            auto normalized_root_zk_path = std::string_view{root_zk_path}.substr(1);
+            auto normalized_root_zk_path = root_zk_path.substr(1);
             std::string restore_lock_path = fs::path(zookeeper_path) / "keeper_map_tables" / escapeForFileName(normalized_root_zk_path);
             zk->createAncestors(restore_lock_path);
             auto code = zk->tryCreate(restore_lock_path, table_unique_id, zkutil::CreateMode::Persistent);
diff --git a/src/Common/escapeForFileName.cpp b/src/Common/escapeForFileName.cpp
index 790d46a93ec..a1f9bff28d0 100644
--- a/src/Common/escapeForFileName.cpp
+++ b/src/Common/escapeForFileName.cpp
@@ -6,11 +6,6 @@ namespace DB
 {
 
 std::string escapeForFileName(const std::string & s)
-{
-    return escapeForFileName(std::string_view{s});
-}
-
-std::string escapeForFileName(std::string_view s)
 {
     std::string res;
     const char * pos = s.data();
diff --git a/src/Common/escapeForFileName.h b/src/Common/escapeForFileName.h
index 279275f55d5..9ae29650804 100644
--- a/src/Common/escapeForFileName.h
+++ b/src/Common/escapeForFileName.h
@@ -11,7 +11,6 @@ namespace DB
   */
 
 std::string escapeForFileName(const std::string & s);
-std::string escapeForFileName(std::string_view s);
 std::string unescapeForFileName(const std::string & s);
 
 }

From eb4add06bad171d09198df67bcbb97ef9d7549e2 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 15 Nov 2023 10:35:39 +0100
Subject: [PATCH 0562/1097] Rewrite `repo.organization._url` to a proper
 repo._makeStringAttribute

---
 tests/ci/merge_pr.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py
index 35b0614b01f..f35c07d84e7 100644
--- a/tests/ci/merge_pr.py
+++ b/tests/ci/merge_pr.py
@@ -230,8 +230,8 @@ def main():
     # An ugly and not nice fix to patch the wrong organization URL,
     # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710
     # pylint: disable=protected-access
-    repo.organization._url.value = repo.organization.url.replace(  # type: ignore
-        "/users/", "/orgs/", 1
+    repo.organization._url = repo._makeStringAttribute(
+        repo.organization.url.replace("/users/", "/orgs/", 1)
     )
     # pylint: enable=protected-access
     pr = repo.get_pull(args.pr)

From ad3f422cff2b135fc70b77ac86b6838cb93e4414 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 15 Nov 2023 10:46:40 +0100
Subject: [PATCH 0563/1097] Get rid of `datetime` in calculation of
 since_updated

---
 tests/ci/cherry_pick.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py
index 2ff502f52dc..9ee63a98f94 100644
--- a/tests/ci/cherry_pick.py
+++ b/tests/ci/cherry_pick.py
@@ -290,17 +290,18 @@ close it.
             self.cherrypick_pr.number,
         )
         # The `updated_at` is Optional[datetime]
-        cherrypick_updated_at = self.cherrypick_pr.updated_at or datetime.now()
-        since_updated = datetime.now() - cherrypick_updated_at
+        cherrypick_updated_ts = (
+            self.cherrypick_pr.updated_at or datetime.now()
+        ).timestamp()
+        since_updated = int(datetime.now().timestamp() - cherrypick_updated_ts)
         since_updated_str = (
-            f"{since_updated.days}d{since_updated.seconds // 3600}"
-            f"h{since_updated.seconds // 60 % 60}m{since_updated.seconds % 60}s"
+            f"{since_updated // 86400}d{since_updated // 3600}"
+            f"h{since_updated // 60 % 60}m{since_updated % 60}s"
         )
-        if since_updated < timedelta(days=1):
+        if since_updated < 86400:
             logging.info(
-                "The cherry-pick PR was updated at %s %s ago, "
+                "The cherry-pick PR was updated %s ago, "
                 "waiting for the next running",
-                cherrypick_updated_at.isoformat(),
                 since_updated_str,
             )
             return

From f859b90826f835816a7bb1b13abd936b03644a58 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 15 Nov 2023 09:59:25 +0000
Subject: [PATCH 0564/1097] Undo duplicate test that also times out

---
 tests/queries/1_stateful/00178_gcd_codec.reference |  0
 tests/queries/1_stateful/00178_gcd_codec.sql       | 13 -------------
 2 files changed, 13 deletions(-)
 delete mode 100644 tests/queries/1_stateful/00178_gcd_codec.reference
 delete mode 100644 tests/queries/1_stateful/00178_gcd_codec.sql

diff --git a/tests/queries/1_stateful/00178_gcd_codec.reference b/tests/queries/1_stateful/00178_gcd_codec.reference
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/queries/1_stateful/00178_gcd_codec.sql b/tests/queries/1_stateful/00178_gcd_codec.sql
deleted file mode 100644
index 04715939d15..00000000000
--- a/tests/queries/1_stateful/00178_gcd_codec.sql
+++ /dev/null
@@ -1,13 +0,0 @@
-DROP TABLE IF EXISTS hits_gcd;
-
-CREATE TABLE hits_gcd (`WatchID` UInt64 CODEC (GCD,LZ4), `JavaEnable` UInt8 CODEC (GCD,LZ4), `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32 CODEC (GCD,LZ4), `ClientIP` UInt32 CODEC (GCD,LZ4), `ClientIP6` FixedString(16), `RegionID` UInt32 CODEC (GCD,LZ4), `UserID` UInt64 CODEC (GCD,LZ4), `CounterClass` Int8, `OS` UInt8 CODEC (GCD,LZ4), `UserAgent` UInt8 CODEC (GCD,LZ4), `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8 CODEC (GCD,LZ4), `IsRobot` UInt8 CODEC (GCD,LZ4), `RefererCategories` Array(UInt16) CODEC (GCD,LZ4), `URLCategories` Array(UInt16) CODEC (GCD,LZ4), `URLRegions` Array(UInt32) CODEC (GCD,LZ4), `RefererRegions` Array(UInt32) CODEC (GCD,LZ4), `ResolutionWidth` UInt16 CODEC (GCD,LZ4), `ResolutionHeight` UInt16 CODEC (GCD,LZ4), `ResolutionDepth` UInt8 CODEC (GCD,LZ4), `FlashMajor` UInt8 CODEC (GCD,LZ4), `FlashMinor` UInt8 CODEC (GCD,LZ4), `FlashMinor2` String, `NetMajor` UInt8 CODEC (GCD,LZ4), `NetMinor` UInt8 CODEC (GCD,LZ4), `UserAgentMajor` UInt16 CODEC (GCD,LZ4), `UserAgentMinor` FixedString(2), `CookieEnable` UInt8 CODEC (GCD,LZ4), `JavascriptEnable` UInt8 CODEC (GCD,LZ4), `IsMobile` UInt8 CODEC (GCD,LZ4), `MobilePhone` UInt8 CODEC (GCD,LZ4), `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32 CODEC (GCD,LZ4), `TraficSourceID` Int8, `SearchEngineID` UInt16 CODEC (GCD,LZ4), `SearchPhrase` String, `AdvEngineID` UInt8 CODEC (GCD,LZ4), `IsArtifical` UInt8 CODEC (GCD,LZ4), `WindowClientWidth` UInt16 CODEC (GCD,LZ4), `WindowClientHeight` UInt16 CODEC (GCD,LZ4), `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8 CODEC (GCD,LZ4), `SilverlightVersion2` UInt8 CODEC (GCD,LZ4), `SilverlightVersion3` UInt32 CODEC (GCD,LZ4), `SilverlightVersion4` UInt16 CODEC (GCD,LZ4), `PageCharset` String, `CodeVersion` UInt32 CODEC (GCD,LZ4), `IsLink` UInt8 CODEC (GCD,LZ4), `IsDownload` UInt8 CODEC (GCD,LZ4), `IsNotBounce` UInt8 CODEC (GCD,LZ4), `FUniqID` UInt64 CODEC (GCD,LZ4), `HID` UInt32 CODEC (GCD,LZ4), `IsOldCounter` UInt8 CODEC (GCD,LZ4), `IsEvent` UInt8 CODEC (GCD,LZ4), `IsParameter` UInt8 CODEC (GCD,LZ4), `DontCountHits` UInt8 CODEC (GCD,LZ4), `WithHash` UInt8 CODEC (GCD,LZ4), `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8 CODEC (GCD,LZ4), `Sex` UInt8 CODEC (GCD,LZ4), `Income` UInt8 CODEC (GCD,LZ4), `Interests` UInt16 CODEC (GCD,LZ4), `Robotness` UInt8 CODEC (GCD,LZ4), `GeneralInterests` Array(UInt16) CODEC (GCD,LZ4), `RemoteIP` UInt32 CODEC (GCD,LZ4), `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16 CODEC (GCD,LZ4), `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8 CODEC (GCD,LZ4), `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16 CODEC (GCD,LZ4), `GoalsReached` Array(UInt32) CODEC (GCD,LZ4), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8 CODEC (GCD,LZ4), `RefererHash` UInt64 CODEC (GCD,LZ4), `URLHash` UInt64 CODEC (GCD,LZ4), `CLID` UInt32 CODEC (GCD,LZ4), `YCLID` UInt64 CODEC (GCD,LZ4), `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32 CODEC (GCD,LZ4), `RequestTry` UInt8)
-    ENGINE = MergeTree()
-        PARTITION BY toYYYYMM(EventDate)
-        ORDER BY (CounterID, EventDate, intHash32(UserID))
-        SAMPLE BY intHash32(UserID);
-
-
-INSERT INTO hits_gcd SELECT * FROM test.hits;
-SELECT * FROM hits_gcd FORMAT Null;
-
-DROP TABLE IF EXISTS hits_gcd;

From 30c6cea8955f4727ee15abac49041847c1f5ebee Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 15 Nov 2023 11:09:13 +0100
Subject: [PATCH 0565/1097] Fix another place with offset-aware datetime

---
 tests/ci/merge_pr.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py
index f35c07d84e7..772821f4960 100644
--- a/tests/ci/merge_pr.py
+++ b/tests/ci/merge_pr.py
@@ -129,7 +129,7 @@ class Reviews:
         logging.info("The PR is changed at %s", last_changed.isoformat())
 
         approved_at = max(review.submitted_at for review in approved.values())
-        if approved_at == datetime.fromtimestamp(0):
+        if approved_at.timestamp() == 0:
             logging.info(
                 "Unable to get `datetime.fromtimestamp(0)`, "
                 "here's debug info about reviews: %s",
@@ -138,7 +138,7 @@ class Reviews:
         else:
             logging.info("The PR is approved at %s", approved_at.isoformat())
 
-        if approved_at < last_changed:
+        if approved_at.timestamp() < last_changed.timestamp():
             logging.info(
                 "There are changes done at %s after approval at %s",
                 last_changed.isoformat(),

From 308654195ca3ff5f078af8ca84115524acc48071 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Wed, 15 Nov 2023 10:58:51 +0100
Subject: [PATCH 0566/1097] Add config setting
 "wait_dictionaries_load_at_startup".

---
 programs/server/Server.cpp                    |  3 ++
 programs/server/config.xml                    | 10 +++++
 src/Interpreters/Context.cpp                  | 15 +++++++
 src/Interpreters/Context.h                    |  1 +
 .../__init__.py                               |  0
 .../configs/wait_for_dictionaries_load.xml    |  3 ++
 .../dictionaries/long_loading_dictionary.xml  | 29 +++++++++++++
 .../test_dictionaries_wait_for_load/test.py   | 41 +++++++++++++++++++
 8 files changed, 102 insertions(+)
 create mode 100644 tests/integration/test_dictionaries_wait_for_load/__init__.py
 create mode 100644 tests/integration/test_dictionaries_wait_for_load/configs/wait_for_dictionaries_load.xml
 create mode 100644 tests/integration/test_dictionaries_wait_for_load/dictionaries/long_loading_dictionary.xml
 create mode 100644 tests/integration/test_dictionaries_wait_for_load/test.py

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 85ae6d7796c..9f59315ea45 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1816,6 +1816,9 @@ try
         try
         {
             global_context->loadOrReloadDictionaries(config());
+
+            if (config().getBool("wait_dictionaries_load_at_startup", false))
+                global_context->waitForDictionariesLoad();
         }
         catch (...)
         {
diff --git a/programs/server/config.xml b/programs/server/config.xml
index d7ad1545201..8ad04e6fcff 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1266,6 +1266,16 @@
     -->
     <dictionaries_config>*_dictionary.*ml</dictionaries_config>
 
+    <!-- Load dictionaries lazily, i.e. a dictionary will be loaded when it's used for the first time.
+         "0" means ClickHouse will start loading dictionaries immediately as it finds them.
+    -->
+    <dictionaries_lazy_load>1</dictionaries_lazy_load>
+
+    <!-- Wait at startup until all the dictionaries are loaded (or failed to load)
+         before listening to connections. Setting this to 1 can make ClickHouse start slowly.
+    -->
+    <wait_dictionaries_load_at_startup>0</wait_dictionaries_load_at_startup>
+
     <!-- Configuration of user defined executable functions -->
     <user_defined_executable_functions_config>*_function.*ml</user_defined_executable_functions_config>
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index ac552a3969c..ea36dde26d9 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2324,6 +2324,21 @@ void Context::loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration &
     shared->dictionaries_xmls = external_dictionaries_loader.addConfigRepository(std::move(repository));
 }
 
+void Context::waitForDictionariesLoad() const
+{
+    LOG_TRACE(shared->log, "Waiting for dictionaries to be loaded");
+    auto results = getExternalDictionariesLoader().tryLoadAll<ExternalLoader::LoadResults>();
+    for (const auto & result : results)
+    {
+        if ((result.status != ExternalLoaderStatus::LOADED) && (result.status != ExternalLoaderStatus::LOADED_AND_RELOADING))
+        {
+            LOG_WARNING(shared->log, "Some dictionaries were not loaded ({}, {})", result.name, result.status);
+            return;
+        }
+    }
+    LOG_INFO(shared->log, "All dictionaries have been loaded");
+}
+
 void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config)
 {
     auto patterns_values = getMultipleValuesFromConfig(config, "", "user_defined_executable_functions_config");
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index e12a5c4b69b..b4a086c4b0e 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -792,6 +792,7 @@ public:
     EmbeddedDictionaries & getEmbeddedDictionaries();
     void tryCreateEmbeddedDictionaries(const Poco::Util::AbstractConfiguration & config) const;
     void loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration & config);
+    void waitForDictionariesLoad() const;
 
     const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const;
     ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader();
diff --git a/tests/integration/test_dictionaries_wait_for_load/__init__.py b/tests/integration/test_dictionaries_wait_for_load/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_dictionaries_wait_for_load/configs/wait_for_dictionaries_load.xml b/tests/integration/test_dictionaries_wait_for_load/configs/wait_for_dictionaries_load.xml
new file mode 100644
index 00000000000..a446b730123
--- /dev/null
+++ b/tests/integration/test_dictionaries_wait_for_load/configs/wait_for_dictionaries_load.xml
@@ -0,0 +1,3 @@
+<clickhouse>
+    <wait_dictionaries_load_at_startup>1</wait_dictionaries_load_at_startup>
+</clickhouse>
diff --git a/tests/integration/test_dictionaries_wait_for_load/dictionaries/long_loading_dictionary.xml b/tests/integration/test_dictionaries_wait_for_load/dictionaries/long_loading_dictionary.xml
new file mode 100644
index 00000000000..8b9d47833e4
--- /dev/null
+++ b/tests/integration/test_dictionaries_wait_for_load/dictionaries/long_loading_dictionary.xml
@@ -0,0 +1,29 @@
+<dictionaries>
+    <dictionary>
+        <name>long_loading_dictionary</name>
+
+        <source>
+            <executable>
+                <command>sleep 9 &amp;&amp; echo "key,value" &amp;&amp; echo "1,aa" &amp;&amp; echo "2,bb"</command>
+            	<format>CSVWithNames</format>
+            </executable>
+        </source>
+
+        <lifetime>600</lifetime>
+
+        <layout>
+            <flat/>
+        </layout>
+
+        <structure>
+            <id>
+                <name>key</name>
+            </id>
+            <attribute>
+                <name>value</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </structure>
+    </dictionary>
+</dictionaries>
diff --git a/tests/integration/test_dictionaries_wait_for_load/test.py b/tests/integration/test_dictionaries_wait_for_load/test.py
new file mode 100644
index 00000000000..975e9ca3e56
--- /dev/null
+++ b/tests/integration/test_dictionaries_wait_for_load/test.py
@@ -0,0 +1,41 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+
+
+DICTIONARY_FILES = [
+    "dictionaries/long_loading_dictionary.xml",
+]
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=["configs/wait_for_dictionaries_load.xml"],
+    dictionaries=DICTIONARY_FILES,
+)
+
+node0 = cluster.add_instance("node0", dictionaries=DICTIONARY_FILES)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def get_status(instance, dictionary_name):
+    return instance.query(
+        "SELECT status FROM system.dictionaries WHERE name='" + dictionary_name + "'"
+    ).rstrip("\n")
+
+
+def test_wait_for_dictionaries_load():
+    assert get_status(node0, "long_loading_dictionary") == "NOT_LOADED"
+
+    assert get_status(node1, "long_loading_dictionary") == "LOADED"
+    assert node1.query("SELECT * FROM dictionary(long_loading_dictionary)") == TSV(
+        [[1, "aa"], [2, "bb"]]
+    )

From 671a401ded9fa1ef18c5b38e5605dc51c65229dc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 15 Nov 2023 12:26:14 +0100
Subject: [PATCH 0567/1097] Ask linker to remove garbage from external
 libraries

---
 CMakeLists.txt         | 2 +-
 contrib/CMakeLists.txt | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e548c5a6d0..36fd3a00eba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -164,7 +164,7 @@ if (OS_LINUX)
     #   and whatever is poisoning it by LD_PRELOAD should not link to our symbols.
     # - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries
     #   should not expose their symbols to ODBC drivers and libraries.
-    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic -Wl,--gc-sections")
 endif ()
 
 if (OS_DARWIN)
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 390b0241e7d..0273a93b044 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -1,7 +1,7 @@
 #"${folder}/CMakeLists.txt" Third-party libraries may have substandard code.
 
-set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w")
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -ffunction-sections -fdata-sections")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -ffunction-sections -fdata-sections")
 
 if (WITH_COVERAGE)
   set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE})

From e6281147290ae249ba9573d2385a606f1e83a99a Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 15 Nov 2023 12:17:29 +0100
Subject: [PATCH 0568/1097] Add test for avoided recursion

---
 tests/ci/test_digest.py                  | 16 ++++++++++------
 tests/ci/tests/digests/dir1/subdir1_1/14 |  1 +
 2 files changed, 11 insertions(+), 6 deletions(-)
 create mode 100644 tests/ci/tests/digests/dir1/subdir1_1/14

diff --git a/tests/ci/test_digest.py b/tests/ci/test_digest.py
index 246a3226721..278b2712917 100644
--- a/tests/ci/test_digest.py
+++ b/tests/ci/test_digest.py
@@ -8,6 +8,7 @@ import digest_helper as dh
 
 _12 = b"12\n"
 _13 = b"13\n"
+_14 = b"14\n"
 
 
 # pylint:disable=protected-access
@@ -50,12 +51,13 @@ class TestDigests(unittest.TestCase):
 
         # dir1
         hash_expected = md5()
-        hash_expected.update(_12)
+        hash_expected.update(_12 + _14)
         dh._digest_directory(self.tests_dir / "dir1", hash_tested)
         self.assertEqual(hash_expected.digest(), hash_tested.digest())
 
         # dir2 contains 12 and 13
-        hash_expected.update(_13)
+        hash_expected = md5()
+        hash_expected.update(_12 + _13)
         hash_tested = md5()
         dh._digest_directory(self.tests_dir / "dir2", hash_tested)
         self.assertEqual(hash_expected.digest(), hash_tested.digest())
@@ -78,7 +80,7 @@ class TestDigests(unittest.TestCase):
         )
         # Test directory works fine
         hash_expected = md5()
-        hash_expected.update(_12)
+        hash_expected.update(_12 + _14)
         self.assertEqual(
             hash_expected.digest(), dh.digest_path(self.tests_dir / "dir1").digest()
         )
@@ -86,20 +88,22 @@ class TestDigests(unittest.TestCase):
         hash_tested = hash_expected.copy()
         dh.digest_path(self.tests_dir / "dir3", hash_tested)
         hash_expected = md5()
-        hash_expected.update(_12 + _12 + _13)
+        hash_expected.update(_12 + _14 + _12 + _13)
         self.assertEqual(hash_expected.digest(), hash_tested.digest())
         # Test the full content of the following structure
         # tests/digests
         # ├── 12
         # ├── dir1
-        # │   └── 12
+        # │   ├── 12
+        # │   └── subdir1_1
+        # │       └── 14
         # ├── dir2
         # │   ├── 12
         # │   └── 13
         # ├── dir3 -> dir2
         # └── symlink-12 -> 12
         hash_expected = md5()
-        hash_expected.update(_12 * 3 + (_13 + _12) * 2)
+        hash_expected.update(_12 * 2 + _14 + (_12 + _13) * 2 + _12)
         self.assertEqual(
             hash_expected.digest(), dh.digest_path(self.tests_dir).digest()
         )
diff --git a/tests/ci/tests/digests/dir1/subdir1_1/14 b/tests/ci/tests/digests/dir1/subdir1_1/14
new file mode 100644
index 00000000000..8351c19397f
--- /dev/null
+++ b/tests/ci/tests/digests/dir1/subdir1_1/14
@@ -0,0 +1 @@
+14

From 5ee3bed87f99cd1070baf55ca94511a2bf91e0d5 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Wed, 15 Nov 2023 12:38:03 +0100
Subject: [PATCH 0569/1097] Improve logs after review.

---
 src/Interpreters/Context.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index ea36dde26d9..368545e0101 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2328,15 +2328,19 @@ void Context::waitForDictionariesLoad() const
 {
     LOG_TRACE(shared->log, "Waiting for dictionaries to be loaded");
     auto results = getExternalDictionariesLoader().tryLoadAll<ExternalLoader::LoadResults>();
+    bool all_dictionaries_loaded = true;
     for (const auto & result : results)
     {
         if ((result.status != ExternalLoaderStatus::LOADED) && (result.status != ExternalLoaderStatus::LOADED_AND_RELOADING))
         {
-            LOG_WARNING(shared->log, "Some dictionaries were not loaded ({}, {})", result.name, result.status);
-            return;
+            LOG_WARNING(shared->log, "Dictionary {} was not loaded ({})", result.name, result.status);
+            all_dictionaries_loaded = false;
         }
     }
-    LOG_INFO(shared->log, "All dictionaries have been loaded");
+    if (all_dictionaries_loaded)
+        LOG_INFO(shared->log, "All dictionaries have been loaded");
+    else
+        LOG_INFO(shared->log, "Some dictionaries were not loaded");
 }
 
 void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config)

From 9376ac56037107f25ae92df6b79bf2cc29690630 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Wed, 15 Nov 2023 11:42:52 +0000
Subject: [PATCH 0570/1097] added implementation for negative nulls

---
 src/Processors/Formats/Impl/NpyRowInputFormat.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
index 8b40cd12282..b31dfe2eda5 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
@@ -37,7 +37,11 @@ float convertFloat16ToFloat32(uint16_t float16_value)
     uint16_t fraction = float16_value & 0x3FF;
 
     if (exponent == 0 && fraction == 0)
-        return float(sign << 31);
+    {
+        if (sign)
+            return float(-0.0);
+        return float(0.0);
+    }
 
     // Handling special cases for exponent
     if (exponent == 0x1F)

From a8284109930439bdd5ce9ecd62d427c4b42d740c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 15 Nov 2023 12:14:48 +0000
Subject: [PATCH 0571/1097] Fix randomization of Keeper configs in stress tests

---
 docker/test/stateless/stress_tests.lib | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib
index fc35bff5e40..edcf2bc7bee 100644
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@@ -63,16 +63,16 @@ function configure()
 
     if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
         # Randomize all Keeper feature flags
-        randomize_config_boolean_value filtered_list keeper_port
-        randomize_config_boolean_value multi_read keeper_port
-        randomize_config_boolean_value check_not_exists keeper_port
-        randomize_config_boolean_value create_if_not_exists keeper_port
+        randomize_keeper_config_boolean_value filtered_list keeper_port
+        randomize_keeper_config_boolean_value multi_read keeper_port
+        randomize_keeper_config_boolean_value check_not_exists keeper_port
+        randomize_keeper_config_boolean_value create_if_not_exists keeper_port
     fi
 
     sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
     sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
 
-    randomize_config_boolean_value use_compression zookeeper
+    randomize_keeper_config_boolean_value use_compression zookeeper
 
     # for clickhouse-server (via service)
     echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment

From 4e48df6bfc5c872e956b77bf0d14d7a387372fb0 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Wed, 15 Nov 2023 12:15:21 +0000
Subject: [PATCH 0572/1097] improved nulls

---
 src/Processors/Formats/Impl/NpyRowInputFormat.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
index b31dfe2eda5..1edf2438ab6 100644
--- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp
@@ -38,9 +38,8 @@ float convertFloat16ToFloat32(uint16_t float16_value)
 
     if (exponent == 0 && fraction == 0)
     {
-        if (sign)
-            return float(-0.0);
-        return float(0.0);
+        uint32_t float32_value = sign << 31;
+        return std::bit_cast<float>(float32_value);
     }
 
     // Handling special cases for exponent

From 178d23b9511d953e84a2b0b7923699cfcb841cc6 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 15 Nov 2023 13:53:52 +0100
Subject: [PATCH 0573/1097] Concat with arbitrary types + tests

---
 src/Functions/concat.cpp                      | 64 ++++++++++---------
 src/Functions/concatWithSeparator.cpp         | 19 +++---
 src/Functions/formatString.h                  | 13 ++--
 .../0_stateless/00727_concat.reference        | 51 ++++++++++++++-
 tests/queries/0_stateless/00727_concat.sql    | 59 ++++++++++++++++-
 ...75_show_columns_called_from_clickhouse.sql |  2 +-
 6 files changed, 157 insertions(+), 51 deletions(-)

diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp
index 350cbee58a3..37311e6c09b 100644
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@@ -1,16 +1,14 @@
 #include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeString.h>
-#include <Functions/FunctionsConversion.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionsConversion.h>
 #include <Functions/GatherUtils/Algorithms.h>
 #include <Functions/GatherUtils/Sinks.h>
-#include <Functions/GatherUtils/Slices.h>
 #include <Functions/GatherUtils/Sources.h>
 #include <Functions/IFunction.h>
 #include <IO/WriteHelpers.h>
 #include <base/map.h>
-#include <base/range.h>
 
 #include "formatString.h"
 
@@ -18,9 +16,9 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int ILLEGAL_COLUMN;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int ILLEGAL_COLUMN;
 }
 
 using namespace GatherUtils;
@@ -33,7 +31,7 @@ class ConcatImpl : public IFunction
 {
 public:
     static constexpr auto name = Name::name;
-    explicit ConcatImpl(ContextPtr context_) : context(context_) {}
+    explicit ConcatImpl(ContextPtr context_) : context(context_) { }
     static FunctionPtr create(ContextPtr context) { return std::make_shared<ConcatImpl>(context); }
 
     String getName() const override { return name; }
@@ -68,8 +66,7 @@ public:
         /// For 3 and more arguments FormatStringImpl is much faster (up to 50-60%).
         if (arguments.size() == 2)
             return executeBinary(arguments, input_rows_count);
-        else
-            return executeFormatImpl(arguments, input_rows_count);
+        return executeFormatImpl(arguments, input_rows_count);
     }
 
 private:
@@ -113,6 +110,7 @@ private:
         std::vector<const ColumnString::Offsets *> offsets(num_arguments);
         std::vector<size_t> fixed_string_sizes(num_arguments);
         std::vector<std::optional<String>> constant_strings(num_arguments);
+        std::vector<ColumnPtr> converted_col_ptrs(num_arguments);
         bool has_column_string = false;
         bool has_column_fixed_string = false;
         for (size_t i = 0; i < num_arguments; ++i)
@@ -136,16 +134,27 @@ private:
             }
             else
             {
-                // An arbitrary type argument: converting it to a StringColumn as if `toString` was called
-                ColumnsWithTypeAndName args;
-                args.emplace_back(column, arguments[i].type, "tmp");
-                const ColumnPtr converted_col_ptr = ConvertImplGenericToString<ColumnString>::execute(
-                    args, std::make_shared<DataTypeString>(), column->size());
-                const ColumnString * converted_col_str = assert_cast<const ColumnString *>(converted_col_ptr.get());
+                // An arbitrary type argument: converting it to a StringColumn first
+                const auto serialization = arguments[i].type->getDefaultSerialization();
+                ColumnString::MutablePtr converted_col_str = ColumnString::create();
+                static FormatSettings format_settings;
+
+                ColumnStringHelpers::WriteHelper write_helper(*converted_col_str, column->size());
+                auto & write_buffer = write_helper.getWriteBuffer();
+                for (size_t j = 0; j < column->size(); ++j)
+                {
+                    serialization->serializeText(*column, j, write_buffer, format_settings);
+                    write_helper.rowWritten();
+                }
+                write_helper.finalize();
+
                 // Same as the normal `ColumnString` branch
                 has_column_string = true;
                 data[i] = &converted_col_str->getChars();
                 offsets[i] = &converted_col_str->getOffsets();
+
+                // keep the refcounted-pointer around (to be able to use data/offsets later)
+                converted_col_ptrs[i] = std::move(converted_col_str);
             }
         }
 
@@ -193,7 +202,7 @@ public:
     static constexpr auto name = "concat";
     static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique<ConcatOverloadResolver>(context); }
 
-    explicit ConcatOverloadResolver(ContextPtr context_) : context(context_) {}
+    explicit ConcatOverloadResolver(ContextPtr context_) : context(context_) { }
 
     String getName() const override { return name; }
     size_t getNumberOfArguments() const override { return 0; }
@@ -202,28 +211,25 @@ public:
     FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
     {
         if (isArray(arguments.at(0).type))
-        {
             return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
-        }
-        else if (isMap(arguments.at(0).type))
-        {
+        if (isMap(arguments.at(0).type))
             return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments);
-        }
-        else if (isTuple(arguments.at(0).type))
-        {
+        if (isTuple(arguments.at(0).type))
             return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments);
-        }
-        else
-            return std::make_unique<FunctionToFunctionBaseAdaptor>(
-                FunctionConcat::create(context), collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }), return_type);
+        return std::make_unique<FunctionToFunctionBaseAdaptor>(
+            FunctionConcat::create(context),
+            collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
+            return_type);
     }
 
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         if (arguments.size() < 2)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                 "Number of arguments for function {} doesn't match: passed {}, should be at least 2.",
-                getName(), arguments.size());
+                getName(),
+                arguments.size());
 
         /// We always return Strings from concat, even if arguments were fixed strings.
         return std::make_shared<DataTypeString>();
diff --git a/src/Functions/concatWithSeparator.cpp b/src/Functions/concatWithSeparator.cpp
index f0a7afbbaa7..f295d86943f 100644
--- a/src/Functions/concatWithSeparator.cpp
+++ b/src/Functions/concatWithSeparator.cpp
@@ -1,5 +1,5 @@
-#include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
@@ -14,9 +14,9 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int ILLEGAL_COLUMN;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int ILLEGAL_COLUMN;
 }
 
 namespace
@@ -26,7 +26,7 @@ class ConcatWithSeparatorImpl : public IFunction
 {
 public:
     static constexpr auto name = Name::name;
-    explicit ConcatWithSeparatorImpl(ContextPtr context_) : context(context_) {}
+    explicit ConcatWithSeparatorImpl(ContextPtr context_) : context(context_) { }
 
     static FunctionPtr create(ContextPtr context) { return std::make_shared<ConcatWithSeparatorImpl>(context); }
 
@@ -113,8 +113,7 @@ public:
             else if (const ColumnConst * const_col = checkAndGetColumnConstStringOrFixedString(column.get()))
                 constant_strings[2 * i] = const_col->getValue<String>();
             else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                    "Illegal column {} of argument of function {}", column->getName(), getName());
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column->getName(), getName());
         }
 
         String pattern;
@@ -156,14 +155,14 @@ using FunctionConcatWithSeparatorAssumeInjective = ConcatWithSeparatorImpl<NameC
 REGISTER_FUNCTION(ConcatWithSeparator)
 {
     factory.registerFunction<FunctionConcatWithSeparator>(FunctionDocumentation{
-        .description=R"(
+        .description = R"(
 Returns the concatenation strings separated by string separator. Syntax: concatWithSeparator(sep, expr1, expr2, expr3...)
         )",
         .examples{{"concatWithSeparator", "SELECT concatWithSeparator('a', '1', '2', '3')", ""}},
         .categories{"String"}});
 
     factory.registerFunction<FunctionConcatWithSeparatorAssumeInjective>(FunctionDocumentation{
-        .description=R"(
+        .description = R"(
 Same as concatWithSeparator, the difference is that you need to ensure that concatWithSeparator(sep, expr1, expr2, expr3...) → result is injective, it will be used for optimization of GROUP BY.
 
 The function is named “injective” if it always returns different result for different values of arguments. In other words: different arguments never yield identical result.
@@ -171,7 +170,7 @@ The function is named “injective” if it always returns different result for
         .examples{{"concatWithSeparatorAssumeInjective", "SELECT concatWithSeparatorAssumeInjective('a', '1', '2', '3')", ""}},
         .categories{"String"}});
 
-    /// Compatibility with Spark:
+    /// Compatibility with Spark and MySQL:
     factory.registerAlias("concat_ws", "concatWithSeparator", FunctionFactory::CaseInsensitive);
 }
 
diff --git a/src/Functions/formatString.h b/src/Functions/formatString.h
index 30149e9a5b0..4bdb672caf4 100644
--- a/src/Functions/formatString.h
+++ b/src/Functions/formatString.h
@@ -1,18 +1,13 @@
 #pragma once
 
-#include <Columns/ColumnString.h>
-#include <base/types.h>
-#include <Common/Exception.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Common/format.h>
-#include <Common/memcpySmall.h>
-
-
 #include <algorithm>
 #include <optional>
 #include <string>
-#include <utility>
 #include <vector>
+#include <Columns/ColumnString.h>
+#include <base/types.h>
+#include <Common/format.h>
+#include <Common/memcpySmall.h>
 
 
 namespace DB
diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference
index af5626b4a11..4785f67bdd9 100644
--- a/tests/queries/0_stateless/00727_concat.reference
+++ b/tests/queries/0_stateless/00727_concat.reference
@@ -1 +1,50 @@
-Hello, world!
+-- Const string + non-const arbitrary type
+With 42
+With 43
+With 44
+With 45
+With 46
+With 47
+With 48
+With 49
+With 50
+With 51
+With 52
+With 53
+With 42.42
+With 43.43
+With 44
+With true
+With false
+With foo
+With bar
+With foo
+With bar
+With foo
+With bar
+With foo
+With bar
+With fae310ca-d52a-4923-9e9b-02bf67f4b009
+With 2023-11-14
+With 2123-11-14
+With 2023-11-14 05:50:12
+With 2023-11-14 05:50:12.123
+With hallo
+With [\'foo\',\'bar\']
+With {"foo":"bar"}
+With (42,\'foo\')
+With {42:\'foo\'}
+With 122.233.64.201
+With 2001:1:130f:2:3:9c0:876a:130b
+With (42,43)
+With [(0,0),(10,0),(10,10),(0,10)]
+With [[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]
+With [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]]
+\N
+-- Miscellaneous tests
+Non-const strings
+Three arguments test
+3 arguments test with int type
+Testing the alias
+\N
+\N
diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql
index 800ebd5ec53..3119bd76c0c 100644
--- a/tests/queries/0_stateless/00727_concat.sql
+++ b/tests/queries/0_stateless/00727_concat.sql
@@ -1 +1,58 @@
-SELECT CONCAT('Hello', ', ', 'world!');
+-- Tags: no-fasttest
+-- no-fasttest: json type needs rapidjson library, geo types need s2 geometry
+
+-- not tested here: (Simple)AggregateFunction, Nested
+
+SET allow_experimental_object_type = 1;
+
+SELECT '-- Const string + non-const arbitrary type';
+SELECT concat('With ', materialize(42 :: Int8));
+SELECT concat('With ', materialize(43 :: Int16));
+SELECT concat('With ', materialize(44 :: Int32));
+SELECT concat('With ', materialize(45 :: Int64));
+SELECT concat('With ', materialize(46 :: Int128));
+SELECT concat('With ', materialize(47 :: Int256));
+SELECT concat('With ', materialize(48 :: UInt8));
+SELECT concat('With ', materialize(49 :: UInt16));
+SELECT concat('With ', materialize(50 :: UInt32));
+SELECT concat('With ', materialize(51 :: UInt64));
+SELECT concat('With ', materialize(52 :: UInt128));
+SELECT concat('With ', materialize(53 :: UInt256));
+SELECT concat('With ', materialize(42.42 :: Float32));
+SELECT concat('With ', materialize(43.43 :: Float64));
+SELECT concat('With ', materialize(44.44 :: Decimal(2)));
+SELECT concat('With ', materialize(true :: Bool));
+SELECT concat('With ', materialize(false :: Bool));
+SELECT concat('With ', materialize('foo' :: String));
+SELECT concat('With ', materialize('bar' :: FixedString(3)));
+SELECT concat('With ', materialize('foo' :: Nullable(String)));
+SELECT concat('With ', materialize('bar' :: Nullable(FixedString(3))));
+SELECT concat('With ', materialize('foo' :: LowCardinality(String)));
+SELECT concat('With ', materialize('bar' :: LowCardinality(FixedString(3))));
+SELECT concat('With ', materialize('foo' :: LowCardinality(Nullable(String))));
+SELECT concat('With ', materialize('bar' :: LowCardinality(Nullable(FixedString(3)))));
+SELECT concat('With ', materialize('fae310ca-d52a-4923-9e9b-02bf67f4b009' :: UUID));
+SELECT concat('With ', materialize('2023-11-14' :: Date));
+SELECT concat('With ', materialize('2123-11-14' :: Date32));
+SELECT concat('With ', materialize('2023-11-14 05:50:12' :: DateTime));
+SELECT concat('With ', materialize('2023-11-14 05:50:12.123' :: DateTime64(3)));
+SELECT concat('With ', materialize('hallo' :: Enum('hallo' = 1)));
+SELECT concat('With ', materialize(['foo', 'bar'] :: Array(String)));
+SELECT concat('With ', materialize('{"foo": "bar"}' :: JSON));
+SELECT concat('With ', materialize((42, 'foo') :: Tuple(Int32, String)));
+SELECT concat('With ', materialize(map(42, 'foo') :: Map(Int32, String)));
+SELECT concat('With ', materialize('122.233.64.201' :: IPv4));
+SELECT concat('With ', materialize('2001:0001:130F:0002:0003:09C0:876A:130B' :: IPv6));
+SELECT concat('With ', materialize((42, 43) :: Point));
+SELECT concat('With ', materialize([(0,0),(10,0),(10,10),(0,10)] :: Ring));
+SELECT concat('With ', materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]] :: Polygon));
+SELECT concat('With ', materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]] :: MultiPolygon));
+SELECT concat('With ', materialize(NULL :: Nullable(UInt64)));
+
+SELECT '-- Miscellaneous tests';
+SELECT concat(materialize('Non-const'), materialize(' strings'));
+SELECT concat('Three ', 'arguments', ' test');
+SELECT concat(materialize(3 :: Int64), ' arguments test', ' with int type');
+SELECT CONCAT('Testing the ', 'alias');
+SELECT concat(materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));
+SELECT concat(42, materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));
diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql
index 89073bd2943..752367517af 100644
--- a/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql
+++ b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql
@@ -5,7 +5,7 @@
 -- Tests the output of SHOW COLUMNS when called through the ClickHouse protocol.
 
 -- -----------------------------------------------------------------------------------
--- Please keep this test in-sync with 02775_show_columns_called_through_mysql.sql
+-- Please keep this test in-sync with 02775_show_columns_called_from_mysql.expect
 -- -----------------------------------------------------------------------------------
 
 DROP TABLE IF EXISTS tab;

From d62048031facb33d8199325437cd2be344f88946 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Wed, 15 Nov 2023 13:57:28 +0100
Subject: [PATCH 0574/1097] Add documentation.

---
 .../settings.md                               | 26 +++++++++++++++++--
 .../settings.md                               | 24 +++++++++++++++--
 programs/server/config.xml                    | 11 ++++----
 3 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 98636a653fb..cfc5a939a0e 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -961,9 +961,13 @@ See also “[Executable User Defined Functions](../../sql-reference/functions/in
 
 Lazy loading of dictionaries.
 
-If `true`, then each dictionary is created on first use. If dictionary creation failed, the function that was using the dictionary throws an exception.
+If `true`, then each dictionary is loaded on the first use. If the loading is failed, the function that was using the dictionary throws an exception.
 
-If `false`, all dictionaries are created when the server starts, if the dictionary or dictionaries are created too long or are created with errors, then the server boots without of these dictionaries and continues to try to create these dictionaries.
+If `false`, then the server starts loading all dictionaries at startup.
+Dictionaries are loaded in background.
+The server doesn't wait at startup until all the dictionaries finish their loading
+(exception: if `wait_dictionaries_load_at_startup` is set to `true` - see below).
+When a dictionary is used in a query for the first time then the query waits until the dictionary is loaded if it's not loaded yet.
 
 The default is `true`.
 
@@ -2391,6 +2395,24 @@ Path to the file that contains:
 <users_config>users.xml</users_config>
 ```
 
+## wait_dictionaries_load_at_startup {#wait_dictionaries_load_at_startup}
+
+If `false`, then the server will not wait at startup until all the dictionaries finish their loading.
+This allows to start ClickHouse faster.
+
+If `true`, then the server will wait at startup until all the dictionaries finish their loading (successfully or not)
+before listening to any connections.
+This can make ClickHouse start slowly, however after that some queries can be executed faster
+(because they won't have to wait for the used dictionaries to be load).
+
+The default is `false`.
+
+**Example**
+
+``` xml
+<wait_dictionaries_load_at_startup>false</wait_dictionaries_load_at_startup>
+```
+
 ## zookeeper {#server-settings_zookeeper}
 
 Contains settings that allow ClickHouse to interact with a [ZooKeeper](http://zookeeper.apache.org/) cluster.
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 2c7f0b773e8..99ea7894ef8 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -277,8 +277,10 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 
 Если `true`, то каждый словарь создаётся при первом использовании. Если словарь не удалось создать, то вызов функции, использующей словарь, сгенерирует исключение.
 
-Если `false`, то все словари создаются при старте сервера, если словарь или словари создаются слишком долго или создаются с ошибкой, то сервер загружается без
-этих словарей и продолжает попытки создать эти словари.
+Если `false`, сервер начнет загрузку всех словарей на старте сервера.
+Словари загружаются в фоне. Сервер не ждет на старте, пока словари закончат загружаться
+(исключение: если `wait_dictionaries_load_at_startup` установлена в `true` - см. ниже).
+Когда словарь используется в запросе первый раз, этот запрос будет ждать окончания загрузки словаря, если он еще не загрузился.
 
 По умолчанию - `true`.
 
@@ -1718,6 +1720,24 @@ TCP порт для защищённого обмена данными с кли
 <users_config>users.xml</users_config>
 ```
 
+## wait_dictionaries_load_at_startup {#wait_dictionaries_load_at_startup}
+
+Если `false`, то сервер не будет ждать на старте, пока словари закончат загружаться.
+Это позволяет ClickHouse стартовать быстрее.
+
+Если `true`, то ClickHouse будет ждать на старте до окончания загрузки всех словарей (успешно или нет)
+перед тем, как начать принимать соединения.
+Это может привести к медленному старту ClickHouse, однако после этого некоторые запросы могут выполняться быстрее
+(потому что им не придется ждать окончания загрузки используемых словарей).
+
+По умолчанию - `false`.
+
+**Пример**
+
+``` xml
+<wait_dictionaries_load_at_startup>false</wait_dictionaries_load_at_startup>
+```
+
 ## zookeeper {#server-settings_zookeeper}
 
 Содержит параметры, позволяющие ClickHouse взаимодействовать с кластером [ZooKeeper](http://zookeeper.apache.org/).
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 8ad04e6fcff..2af0384645c 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1267,14 +1267,15 @@
     <dictionaries_config>*_dictionary.*ml</dictionaries_config>
 
     <!-- Load dictionaries lazily, i.e. a dictionary will be loaded when it's used for the first time.
-         "0" means ClickHouse will start loading dictionaries immediately as it finds them.
+         "false" means ClickHouse will start loading dictionaries immediately at startup.
     -->
-    <dictionaries_lazy_load>1</dictionaries_lazy_load>
+    <dictionaries_lazy_load>true</dictionaries_lazy_load>
 
-    <!-- Wait at startup until all the dictionaries are loaded (or failed to load)
-         before listening to connections. Setting this to 1 can make ClickHouse start slowly.
+    <!-- Wait at startup until all the dictionaries finish their loading (successfully or not)
+         before listening to connections. Setting this to 1 can make ClickHouse start slowly,
+         however some queries can be executed faster (because it won't have to wait for the used dictionaries to be load).
     -->
-    <wait_dictionaries_load_at_startup>0</wait_dictionaries_load_at_startup>
+    <wait_dictionaries_load_at_startup>false</wait_dictionaries_load_at_startup>
 
     <!-- Configuration of user defined executable functions -->
     <user_defined_executable_functions_config>*_function.*ml</user_defined_executable_functions_config>

From 49e6bad2bd707fa0839c0ee5144e01a48223df44 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 15 Nov 2023 16:14:33 +0300
Subject: [PATCH 0575/1097] Fixed tests

---
 .../02476_fuse_sum_count.reference            | 23 +++++++++---
 .../02477_fuse_quantiles.reference            | 36 ++++++++++++++-----
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/tests/queries/0_stateless/02476_fuse_sum_count.reference b/tests/queries/0_stateless/02476_fuse_sum_count.reference
index c62583e8c88..43a39e8b7e5 100644
--- a/tests/queries/0_stateless/02476_fuse_sum_count.reference
+++ b/tests/queries/0_stateless/02476_fuse_sum_count.reference
@@ -256,14 +256,27 @@ QUERY id: 0
         QUERY id: 14, is_subquery: 1
           PROJECTION COLUMNS
             x Int64
+            count(b) UInt64
           PROJECTION
-            LIST id: 15, nodes: 1
-              FUNCTION id: 16, function_name: sum, function_type: aggregate, result_type: Int64
+            LIST id: 15, nodes: 2
+              FUNCTION id: 16, function_name: tupleElement, function_type: ordinary, result_type: Int64
                 ARGUMENTS
-                  LIST id: 17, nodes: 1
-                    COLUMN id: 18, column_name: b, result_type: Int8, source_id: 19
+                  LIST id: 17, nodes: 2
+                    FUNCTION id: 18, function_name: sumCount, function_type: aggregate, result_type: Tuple(Int64, UInt64)
+                      ARGUMENTS
+                        LIST id: 19, nodes: 1
+                          COLUMN id: 20, column_name: b, result_type: Int8, source_id: 21
+                    CONSTANT id: 22, constant_value: UInt64_1, constant_value_type: UInt8
+              FUNCTION id: 23, function_name: tupleElement, function_type: ordinary, result_type: UInt64
+                ARGUMENTS
+                  LIST id: 24, nodes: 2
+                    FUNCTION id: 18, function_name: sumCount, function_type: aggregate, result_type: Tuple(Int64, UInt64)
+                      ARGUMENTS
+                        LIST id: 19, nodes: 1
+                          COLUMN id: 20, column_name: b, result_type: Int8, source_id: 21
+                    CONSTANT id: 25, constant_value: UInt64_2, constant_value_type: UInt8
           JOIN TREE
-            TABLE id: 19, table_name: default.fuse_tbl
+            TABLE id: 21, table_name: default.fuse_tbl
 0	0	nan
 0	0	nan
 45	10	4.5	Decimal(38, 0)	UInt64	Float64
diff --git a/tests/queries/0_stateless/02477_fuse_quantiles.reference b/tests/queries/0_stateless/02477_fuse_quantiles.reference
index 8384df9f04a..7c7d581f7fb 100644
--- a/tests/queries/0_stateless/02477_fuse_quantiles.reference
+++ b/tests/queries/0_stateless/02477_fuse_quantiles.reference
@@ -48,17 +48,35 @@ QUERY id: 0
         QUERY id: 19, is_subquery: 1
           PROJECTION COLUMNS
             x Float64
+            quantile(0.9)(b) Float64
           PROJECTION
-            LIST id: 21, nodes: 1
-              FUNCTION id: 22, function_name: quantile, function_type: aggregate, result_type: Float64
-                PARAMETERS
-                  LIST id: 23, nodes: 1
-                    CONSTANT id: 24, constant_value: Float64_0.5, constant_value_type: Float64
+            LIST id: 21, nodes: 2
+              FUNCTION id: 22, function_name: arrayElement, function_type: ordinary, result_type: Float64
                 ARGUMENTS
-                  LIST id: 25, nodes: 1
-                    COLUMN id: 26, column_name: b, result_type: Int32, source_id: 27
+                  LIST id: 23, nodes: 2
+                    FUNCTION id: 24, function_name: quantiles, function_type: aggregate, result_type: Array(Float64)
+                      PARAMETERS
+                        LIST id: 25, nodes: 2
+                          CONSTANT id: 26, constant_value: Float64_0.5, constant_value_type: Float64
+                          CONSTANT id: 27, constant_value: Float64_0.9, constant_value_type: Float64
+                      ARGUMENTS
+                        LIST id: 28, nodes: 1
+                          COLUMN id: 29, column_name: b, result_type: Int32, source_id: 30
+                    CONSTANT id: 31, constant_value: UInt64_1, constant_value_type: UInt8
+              FUNCTION id: 32, function_name: arrayElement, function_type: ordinary, result_type: Float64
+                ARGUMENTS
+                  LIST id: 33, nodes: 2
+                    FUNCTION id: 24, function_name: quantiles, function_type: aggregate, result_type: Array(Float64)
+                      PARAMETERS
+                        LIST id: 25, nodes: 2
+                          CONSTANT id: 26, constant_value: Float64_0.5, constant_value_type: Float64
+                          CONSTANT id: 27, constant_value: Float64_0.9, constant_value_type: Float64
+                      ARGUMENTS
+                        LIST id: 28, nodes: 1
+                          COLUMN id: 29, column_name: b, result_type: Int32, source_id: 30
+                    CONSTANT id: 34, constant_value: UInt64_2, constant_value_type: UInt8
           JOIN TREE
-            TABLE id: 27, table_name: default.fuse_tbl
+            TABLE id: 30, table_name: default.fuse_tbl
       GROUP BY
-        LIST id: 28, nodes: 1
+        LIST id: 35, nodes: 1
           COLUMN id: 18, column_name: x, result_type: Float64, source_id: 19

From 0aaea6e51ddbd13ad5becafd010e07dd652ab3c7 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 15 Nov 2023 14:42:38 +0100
Subject: [PATCH 0576/1097] Fix ColumnConst serialization issues, more tests

---
 src/Functions/concat.cpp                      |  9 +++----
 .../0_stateless/00727_concat.reference        | 17 ++++++++++---
 tests/queries/0_stateless/00727_concat.sql    | 24 +++++++++++++++----
 3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp
index 37311e6c09b..346f96e4f03 100644
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@@ -135,6 +135,7 @@ private:
             else
             {
                 // An arbitrary type argument: converting it to a StringColumn first
+                const auto full_column = column->convertToFullIfNeeded();
                 const auto serialization = arguments[i].type->getDefaultSerialization();
                 ColumnString::MutablePtr converted_col_str = ColumnString::create();
                 static FormatSettings format_settings;
@@ -143,7 +144,7 @@ private:
                 auto & write_buffer = write_helper.getWriteBuffer();
                 for (size_t j = 0; j < column->size(); ++j)
                 {
-                    serialization->serializeText(*column, j, write_buffer, format_settings);
+                    serialization->serializeText(*full_column, j, write_buffer, format_settings);
                     write_helper.rowWritten();
                 }
                 write_helper.finalize();
@@ -210,11 +211,11 @@ public:
 
     FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
     {
-        if (isArray(arguments.at(0).type))
+        if (std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); }))
             return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
-        if (isMap(arguments.at(0).type))
+        if (std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); }))
             return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments);
-        if (isTuple(arguments.at(0).type))
+        if (std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); }))
             return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments);
         return std::make_unique<FunctionToFunctionBaseAdaptor>(
             FunctionConcat::create(context),
diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference
index 4785f67bdd9..9b6a8b3857b 100644
--- a/tests/queries/0_stateless/00727_concat.reference
+++ b/tests/queries/0_stateless/00727_concat.reference
@@ -24,6 +24,7 @@ With foo
 With bar
 With foo
 With bar
+With 42
 With fae310ca-d52a-4923-9e9b-02bf67f4b009
 With 2023-11-14
 With 2123-11-14
@@ -40,11 +41,21 @@ With (42,43)
 With [(0,0),(10,0),(10,10),(0,10)]
 With [[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]
 With [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]]
+-- NULL arguments
 \N
--- Miscellaneous tests
+\N
+\N
+\N
+\N
+\N
+\N
+-- Various arguments tests
 Non-const strings
+Two arguments test
 Three arguments test
 3 arguments test with int type
+42144
+42144255
+42144
+42144255
 Testing the alias
-\N
-\N
diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql
index 3119bd76c0c..ba76ff53884 100644
--- a/tests/queries/0_stateless/00727_concat.sql
+++ b/tests/queries/0_stateless/00727_concat.sql
@@ -4,6 +4,7 @@
 -- not tested here: (Simple)AggregateFunction, Nested
 
 SET allow_experimental_object_type = 1;
+SET allow_suspicious_low_cardinality_types=1;
 
 SELECT '-- Const string + non-const arbitrary type';
 SELECT concat('With ', materialize(42 :: Int8));
@@ -31,6 +32,7 @@ SELECT concat('With ', materialize('foo' :: LowCardinality(String)));
 SELECT concat('With ', materialize('bar' :: LowCardinality(FixedString(3))));
 SELECT concat('With ', materialize('foo' :: LowCardinality(Nullable(String))));
 SELECT concat('With ', materialize('bar' :: LowCardinality(Nullable(FixedString(3)))));
+SELECT concat('With ', materialize(42 :: LowCardinality(Nullable(UInt32))));
 SELECT concat('With ', materialize('fae310ca-d52a-4923-9e9b-02bf67f4b009' :: UUID));
 SELECT concat('With ', materialize('2023-11-14' :: Date));
 SELECT concat('With ', materialize('2123-11-14' :: Date32));
@@ -47,12 +49,26 @@ SELECT concat('With ', materialize((42, 43) :: Point));
 SELECT concat('With ', materialize([(0,0),(10,0),(10,10),(0,10)] :: Ring));
 SELECT concat('With ', materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]] :: Polygon));
 SELECT concat('With ', materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]] :: MultiPolygon));
-SELECT concat('With ', materialize(NULL :: Nullable(UInt64)));
 
-SELECT '-- Miscellaneous tests';
+SELECT '-- NULL arguments';
+SELECT concat(NULL, NULL);
+SELECT concat(NULL, materialize(NULL :: Nullable(UInt64)));
+SELECT concat(materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));
+
+SELECT concat(42, materialize(NULL :: Nullable(UInt64)));
+SELECT concat('42', materialize(NULL :: Nullable(UInt64)));
+
+SELECT concat(42, materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));
+SELECT concat('42', materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));
+
+SELECT '-- Various arguments tests';
 SELECT concat(materialize('Non-const'), materialize(' strings'));
+SELECT concat('Two arguments ', 'test');
 SELECT concat('Three ', 'arguments', ' test');
 SELECT concat(materialize(3 :: Int64), ' arguments test', ' with int type');
+SELECT concat(materialize(42 :: Int32), materialize(144 :: UInt64));
+SELECT concat(materialize(42 :: Int32), materialize(144 :: UInt64), materialize(255 :: UInt32));
+SELECT concat(42, 144);
+SELECT concat(42, 144, 255);
+
 SELECT CONCAT('Testing the ', 'alias');
-SELECT concat(materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));
-SELECT concat(42, materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));

From 62378b421928268c6285cc1cd65b446a799291fb Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 15 Nov 2023 13:56:51 +0000
Subject: [PATCH 0577/1097] Fixing style.

---
 src/Storages/StorageInput.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h
index 9ac082a9add..82ddda4cdd1 100644
--- a/src/Storages/StorageInput.h
+++ b/src/Storages/StorageInput.h
@@ -10,7 +10,7 @@ namespace DB
 
 class StorageInput final : public IStorage
 {
-  friend class ReadFromInput;
+    friend class ReadFromInput;
 public:
     StorageInput(const StorageID & table_id, const ColumnsDescription & columns_);
 

From c7cd4fa972893ad6dead0a6f3fe9038c2c61ebad Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 15 Nov 2023 14:57:34 +0100
Subject: [PATCH 0578/1097] Update concat docs

---
 .../sql-reference/functions/string-functions.md  | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 4df987b5e2a..dc324e8e331 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -439,7 +439,7 @@ concat(s1, s2, ...)
 
 **Arguments**
 
-Values of type String or FixedString.
+Values of arbitrary types. If an argument is not a String or FixedString, it is converted to the String type using the default serialization.
 
 **Returned values**
 
@@ -461,6 +461,20 @@ Result:
 └─────────────────────────────┘
 ```
 
+**Example**
+
+```sql
+SELECT concat(42, 144);
+```
+
+Result:
+
+```result
+┌─concat(42, 144)─┐
+│ 42144           │
+└─────────────────┘
+```
+
 ## concatAssumeInjective
 
 Like [concat](#concat) but assumes that `concat(s1, s2, ...) → sn` is injective. Can be used for optimization of GROUP BY.

From 1ea74cee3a5b664687fc8c059da0fed144fd9ea9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 15 Nov 2023 15:04:07 +0100
Subject: [PATCH 0579/1097] Early disconnect if there is authentication failure
 with interserver secret

---
 src/Server/TCPHandler.cpp | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index f929d0f5ff9..e7c40092077 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -9,7 +9,6 @@
 #include <mutex>
 #include <vector>
 #include <string_view>
-#include <cstring>
 #include <Poco/Net/NetException.h>
 #include <Poco/Net/SocketAddress.h>
 #include <Poco/Util/LayeredConfiguration.h>
@@ -588,6 +587,10 @@ void TCPHandler::runImpl()
         }
         catch (const Exception & e)
         {
+            /// Authentication failure with interserver secret.
+            if (e.code() == ErrorCodes::AUTHENTICATION_FAILED)
+                throw;
+
             state.io.onException();
             exception.reset(e.clone());
 
@@ -1717,7 +1720,18 @@ void TCPHandler::receiveQuery()
     {
         client_info.interface = ClientInfo::Interface::TCP_INTERSERVER;
 #if USE_SSL
-        String cluster_secret = server.context()->getCluster(cluster)->getSecret();
+
+        String cluster_secret;
+        try
+        {
+            cluster_secret = server.context()->getCluster(cluster)->getSecret();
+        }
+        catch (const Exception & e)
+        {
+            auto exception = Exception::createRuntime(ErrorCodes::AUTHENTICATION_FAILED, e.message());
+            session->onAuthenticationFailure(/* user_name= */ std::nullopt, socket().peerAddress(), exception);
+            throw exception; /// NOLINT
+        }
 
         if (salt.empty() || cluster_secret.empty())
         {

From 993c3e6dba20306c711f900e2a4aa06d6564f5cc Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 15 Nov 2023 15:03:16 +0100
Subject: [PATCH 0580/1097] Update outdated script in troubleshooting

---
 docs/en/operations/_troubleshooting.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/docs/en/operations/_troubleshooting.md b/docs/en/operations/_troubleshooting.md
index dbb0dad7976..b3846643e7a 100644
--- a/docs/en/operations/_troubleshooting.md
+++ b/docs/en/operations/_troubleshooting.md
@@ -17,12 +17,8 @@
 
 - The issue may be happened when the GPG key is changed.
 
-Please use the following scripts to resolve the issue:
+Please use the manual from the [setup](../getting-started/install.md#setup-the-debian-repository) page to update the repository configuration.
 
-```bash
-sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
-sudo apt-get update
-```
 
 ### You Get Different Warnings with `apt-get update` {#you-get-different-warnings-with-apt-get-update}
 

From 20cfe91ff967733bdbc34244759eb26d379c8869 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 15 Nov 2023 15:21:19 +0100
Subject: [PATCH 0581/1097] Remove unused error codes

---
 src/Functions/concat.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp
index 346f96e4f03..081096b745e 100644
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@@ -16,9 +16,7 @@ namespace DB
 {
 namespace ErrorCodes
 {
-extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-extern const int ILLEGAL_COLUMN;
 }
 
 using namespace GatherUtils;

From e3842ebc893fe36941a1151547b886d86acbf899 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 15 Nov 2023 15:45:04 +0100
Subject: [PATCH 0582/1097] Remove substitution attrs from target node if
 source already has a value

---
 src/Common/Config/ConfigProcessor.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index 3d64eac6fa6..9eb2e5ac033 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -326,15 +326,19 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
                     NodePtr new_node = config->importNode(with_node, true);
                     config_root->replaceChild(new_node, config_node);
                 }
-                else if (with_element.hasChildNodes() && with_element.firstChild()->nodeType() == Node::TEXT_NODE)
-                {
-                    NodePtr new_node = config->importNode(with_node, true);
-                    config_root->replaceChild(new_node, config_node);
-                }
                 else
                 {
                     Element & config_element = dynamic_cast<Element &>(*config_node);
 
+                    /// Remove substitution attributes from the merge target node if source node already has a value
+                    bool source_has_value = with_element.hasChildNodes();
+                    if (source_has_value)
+                        for (const auto & attr_name: SUBSTITUTION_ATTRS)
+                        {
+                            if (config_element.hasAttribute(attr_name))
+                                config_element.removeAttribute(attr_name);
+                        }
+
                     mergeAttributes(config_element, with_element);
                     mergeRecursive(config, config_node, with_node);
                 }

From 9edcdf41d5d370fa21da0f751ab8f773abbd4495 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 15 Nov 2023 15:45:46 +0100
Subject: [PATCH 0583/1097] Add exceptions during substitution processing if
 node already has a value

---
 src/Common/Config/ConfigProcessor.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index 9eb2e5ac033..2e7372fa9a2 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -522,6 +522,9 @@ void ConfigProcessor::doIncludesRecursive(
 
     if (attr_nodes["from_zk"]) /// we have zookeeper subst
     {
+        if (node->hasChildNodes()) /// only allow substitution for nodes with no value
+            throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_zk substitution");
+
         contributing_zk_paths.insert(attr_nodes["from_zk"]->getNodeValue());
 
         if (zk_node_cache)
@@ -544,6 +547,9 @@ void ConfigProcessor::doIncludesRecursive(
 
     if (attr_nodes["from_env"]) /// we have env subst
     {
+        if (node->hasChildNodes()) /// only allow substitution for nodes with no value
+            throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_env substitution");
+
         XMLDocumentPtr env_document;
         auto get_env_node = [&](const std::string & name) -> const Node *
         {

From a089e6181024a8f48e2e415de8033cca2fa98f50 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 15 Nov 2023 15:55:56 +0100
Subject: [PATCH 0584/1097] Fix crash in FPC codec

---
 src/Compression/CompressionCodecFPC.cpp       | 168 +++++++++---------
 .../0_stateless/02915_fpc_overflow.reference  |   2 +
 .../queries/0_stateless/02915_fpc_overflow.sh |  11 ++
 3 files changed, 101 insertions(+), 80 deletions(-)
 create mode 100644 tests/queries/0_stateless/02915_fpc_overflow.reference
 create mode 100755 tests/queries/0_stateless/02915_fpc_overflow.sh

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index 506093bbe49..ec8efa0fb38 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -153,23 +153,23 @@ void registerCodecFPC(CompressionCodecFactory & factory)
 namespace
 {
 
-template <std::unsigned_integral TUint>
-requires (sizeof(TUint) >= 4)
+template <std::unsigned_integral TUInt>
+requires (sizeof(TUInt) >= 4)
 class DfcmPredictor
 {
 public:
-    explicit DfcmPredictor(std::size_t table_size)
+    explicit DfcmPredictor(size_t table_size)
         : table(table_size, 0), prev_value{0}, hash{0}
     {
     }
 
     [[nodiscard]]
-    TUint predict() const noexcept
+    TUInt predict() const noexcept
     {
         return table[hash] + prev_value;
     }
 
-    void add(TUint value) noexcept
+    void add(TUInt value) noexcept
     {
         table[hash] = value - prev_value;
         recalculateHash();
@@ -180,38 +180,38 @@ private:
     void recalculateHash() noexcept
     {
         auto value = table[hash];
-        if constexpr (sizeof(TUint) >= 8)
+        if constexpr (sizeof(TUInt) >= 8)
         {
-            hash = ((hash << 2) ^ static_cast<std::size_t>(value >> 40)) & (table.size() - 1);
+            hash = ((hash << 2) ^ static_cast<size_t>(value >> 40)) & (table.size() - 1);
         }
         else
         {
-            hash = ((hash << 4) ^ static_cast<std::size_t>(value >> 23)) & (table.size() - 1);
+            hash = ((hash << 4) ^ static_cast<size_t>(value >> 23)) & (table.size() - 1);
         }
     }
 
-    std::vector<TUint> table;
-    TUint prev_value;
-    std::size_t hash;
+    std::vector<TUInt> table;
+    TUInt prev_value;
+    size_t hash;
 };
 
-template <std::unsigned_integral TUint>
-requires (sizeof(TUint) >= 4)
+template <std::unsigned_integral TUInt>
+requires (sizeof(TUInt) >= 4)
 class FcmPredictor
 {
 public:
-    explicit FcmPredictor(std::size_t table_size)
+    explicit FcmPredictor(size_t table_size)
         : table(table_size, 0), hash{0}
     {
     }
 
     [[nodiscard]]
-    TUint predict() const noexcept
+    TUInt predict() const noexcept
     {
         return table[hash];
     }
 
-    void add(TUint value) noexcept
+    void add(TUInt value) noexcept
     {
         table[hash] = value;
         recalculateHash();
@@ -221,31 +221,31 @@ private:
     void recalculateHash() noexcept
     {
         auto value = table[hash];
-        if constexpr (sizeof(TUint) >= 8)
+        if constexpr (sizeof(TUInt) >= 8)
         {
-            hash = ((hash << 6) ^ static_cast<std::size_t>(value >> 48)) & (table.size() - 1);
+            hash = ((hash << 6) ^ static_cast<size_t>(value >> 48)) & (table.size() - 1);
         }
         else
         {
-            hash = ((hash << 1) ^ static_cast<std::size_t>(value >> 22)) & (table.size() - 1);
+            hash = ((hash << 1) ^ static_cast<size_t>(value >> 22)) & (table.size() - 1);
         }
     }
 
-    std::vector<TUint> table;
-    std::size_t hash;
+    std::vector<TUInt> table;
+    size_t hash;
 };
 
-template <std::unsigned_integral TUint>
+template <std::unsigned_integral TUInt>
 class FPCOperation
 {
-    static constexpr auto VALUE_SIZE = sizeof(TUint);
+    static constexpr size_t VALUE_SIZE = sizeof(TUInt);
     static constexpr std::byte FCM_BIT{0};
     static constexpr std::byte DFCM_BIT{1u << 3};
-    static constexpr auto DFCM_BIT_1 = DFCM_BIT << 4;
-    static constexpr auto DFCM_BIT_2 = DFCM_BIT;
-    static constexpr unsigned MAX_ZERO_BYTE_COUNT = 0b111u;
+    static constexpr std::byte DFCM_BIT_1 = DFCM_BIT << 4;
+    static constexpr std::byte DFCM_BIT_2 = DFCM_BIT;
+    static constexpr UInt32 MAX_ZERO_BYTE_COUNT = 0b111u;
     static constexpr std::endian ENDIAN = std::endian::little;
-    static constexpr std::size_t CHUNK_SIZE = 64;
+    static constexpr size_t CHUNK_SIZE = 64;
 
 public:
     FPCOperation(std::span<std::byte> destination, UInt8 compression_level)
@@ -253,12 +253,12 @@ public:
     {
     }
 
-    std::size_t encode(std::span<const std::byte> data) &&
+    size_t encode(std::span<const std::byte> data) &&
     {
         auto initial_size = result.size();
 
         std::span chunk_view(chunk);
-        for (std::size_t i = 0; i < data.size(); i += chunk_view.size_bytes())
+        for (size_t i = 0; i < data.size(); i += chunk_view.size_bytes())
         {
             auto written_values_count = importChunk(data.subspan(i), chunk_view);
             encodeChunk(chunk_view.subspan(0, written_values_count));
@@ -267,12 +267,12 @@ public:
         return initial_size - result.size();
     }
 
-    void decode(std::span<const std::byte> values, std::size_t decoded_size) &&
+    void decode(std::span<const std::byte> values, size_t decoded_size) &&
     {
-        std::size_t read_bytes = 0;
+        size_t read_bytes = 0;
 
-        std::span<TUint> chunk_view(chunk);
-        for (std::size_t i = 0; i < decoded_size; i += chunk_view.size_bytes())
+        std::span<TUInt> chunk_view(chunk);
+        for (size_t i = 0; i < decoded_size; i += chunk_view.size_bytes())
         {
             if (i + chunk_view.size_bytes() > decoded_size)
                 chunk_view = chunk_view.first(ceilBytesToEvenValues(decoded_size - i));
@@ -282,50 +282,50 @@ public:
     }
 
 private:
-    static std::size_t ceilBytesToEvenValues(std::size_t bytes_count)
+    static size_t ceilBytesToEvenValues(size_t bytes_count)
     {
-        auto values_count = (bytes_count + VALUE_SIZE - 1) / VALUE_SIZE;
+        size_t values_count = (bytes_count + VALUE_SIZE - 1) / VALUE_SIZE;
         return values_count % 2 == 0 ? values_count : values_count + 1;
     }
 
-    std::size_t importChunk(std::span<const std::byte> values, std::span<TUint> chnk)
+    size_t importChunk(std::span<const std::byte> values, std::span<TUInt> current_chunk)
     {
-        if (auto chunk_view = std::as_writable_bytes(chnk); chunk_view.size() <= values.size())
+        if (auto chunk_view = std::as_writable_bytes(current_chunk); chunk_view.size() <= values.size())
         {
-            std::memcpy(chunk_view.data(), values.data(), chunk_view.size());
+            memcpy(chunk_view.data(), values.data(), chunk_view.size());
             return chunk_view.size() / VALUE_SIZE;
         }
         else
         {
-            std::memset(chunk_view.data(), 0, chunk_view.size());
-            std::memcpy(chunk_view.data(), values.data(), values.size());
+            memset(chunk_view.data(), 0, chunk_view.size());
+            memcpy(chunk_view.data(), values.data(), values.size());
             return ceilBytesToEvenValues(values.size());
         }
     }
 
-    void exportChunk(std::span<const TUint> chnk)
+    void exportChunk(std::span<const TUInt> current_chunk)
     {
-        auto chunk_view = std::as_bytes(chnk).first(std::min(result.size(), chnk.size_bytes()));
-        std::memcpy(result.data(), chunk_view.data(), chunk_view.size());
+        auto chunk_view = std::as_bytes(current_chunk).first(std::min(result.size(), current_chunk.size_bytes()));
+        memcpy(result.data(), chunk_view.data(), chunk_view.size());
         result = result.subspan(chunk_view.size());
     }
 
-    void encodeChunk(std::span<const TUint> seq)
+    void encodeChunk(std::span<const TUInt> sequence)
     {
-        for (std::size_t i = 0; i < seq.size(); i += 2)
+        for (size_t i = 0; i < sequence.size(); i += 2)
         {
-            encodePair(seq[i], seq[i + 1]);
+            encodePair(sequence[i], sequence[i + 1]);
         }
     }
 
     struct CompressedValue
     {
-        TUint value;
-        unsigned compressed_size;
+        TUInt value;
+        UInt32 compressed_size;
         std::byte predictor;
     };
 
-    unsigned encodeCompressedZeroByteCount(unsigned compressed)
+    UInt32 encodeCompressedZeroByteCount(UInt32 compressed)
     {
         if constexpr (VALUE_SIZE == MAX_ZERO_BYTE_COUNT + 1)
         {
@@ -335,7 +335,7 @@ private:
         return std::min(compressed, MAX_ZERO_BYTE_COUNT);
     }
 
-    unsigned decodeCompressedZeroByteCount(unsigned encoded_size)
+    UInt32 decodeCompressedZeroByteCount(UInt32 encoded_size)
     {
         if constexpr (VALUE_SIZE == MAX_ZERO_BYTE_COUNT + 1)
         {
@@ -345,22 +345,22 @@ private:
         return encoded_size;
     }
 
-    CompressedValue compressValue(TUint value) noexcept
+    CompressedValue compressValue(TUInt value) noexcept
     {
         static constexpr auto BITS_PER_BYTE = std::numeric_limits<unsigned char>::digits;
 
-        TUint compressed_dfcm = dfcm_predictor.predict() ^ value;
-        TUint compressed_fcm = fcm_predictor.predict() ^ value;
+        TUInt compressed_dfcm = dfcm_predictor.predict() ^ value;
+        TUInt compressed_fcm = fcm_predictor.predict() ^ value;
         dfcm_predictor.add(value);
         fcm_predictor.add(value);
         auto zeroes_dfcm = std::countl_zero(compressed_dfcm);
         auto zeroes_fcm = std::countl_zero(compressed_fcm);
         if (zeroes_dfcm > zeroes_fcm)
-            return {compressed_dfcm, encodeCompressedZeroByteCount(static_cast<unsigned>(zeroes_dfcm) / BITS_PER_BYTE), DFCM_BIT};
-        return {compressed_fcm, encodeCompressedZeroByteCount(static_cast<unsigned>(zeroes_fcm) / BITS_PER_BYTE), FCM_BIT};
+            return {compressed_dfcm, encodeCompressedZeroByteCount(static_cast<UInt32>(zeroes_dfcm) / BITS_PER_BYTE), DFCM_BIT};
+        return {compressed_fcm, encodeCompressedZeroByteCount(static_cast<UInt32>(zeroes_fcm) / BITS_PER_BYTE), FCM_BIT};
     }
 
-    void encodePair(TUint first, TUint second)
+    void encodePair(TUInt first, TUInt second)
     {
         auto [compressed_value1, zero_byte_count1, predictor1] = compressValue(first);
         auto [compressed_value2, zero_byte_count2, predictor2] = compressValue(second);
@@ -374,24 +374,24 @@ private:
         auto tail_size1 = VALUE_SIZE - zero_byte_count1;
         auto tail_size2 = VALUE_SIZE - zero_byte_count2;
 
-        std::memcpy(result.data() + 1, valueTail(compressed_value1, zero_byte_count1), tail_size1);
-        std::memcpy(result.data() + 1 + tail_size1, valueTail(compressed_value2, zero_byte_count2), tail_size2);
+        memcpy(result.data() + 1, valueTail(compressed_value1, zero_byte_count1), tail_size1);
+        memcpy(result.data() + 1 + tail_size1, valueTail(compressed_value2, zero_byte_count2), tail_size2);
         result = result.subspan(1 + tail_size1 + tail_size2);
     }
 
-    std::size_t decodeChunk(std::span<const std::byte> values, std::span<TUint> seq)
+    size_t decodeChunk(std::span<const std::byte> values, std::span<TUInt> sequence)
     {
-        std::size_t read_bytes = 0;
-        for (std::size_t i = 0; i < seq.size(); i += 2)
+        size_t read_bytes = 0;
+        for (size_t i = 0; i < sequence.size(); i += 2)
         {
-            read_bytes += decodePair(values.subspan(read_bytes), seq[i], seq[i + 1]);
+            read_bytes += decodePair(values.subspan(read_bytes), sequence[i], sequence[i + 1]);
         }
         return read_bytes;
     }
 
-    TUint decompressValue(TUint value, bool isDfcmPredictor)
+    TUInt decompressValue(TUInt value, bool isDfcmPredictor)
     {
-        TUint decompressed;
+        TUInt decompressed;
         if (isDfcmPredictor)
         {
             decompressed = dfcm_predictor.predict() ^ value;
@@ -405,27 +405,35 @@ private:
         return decompressed;
     }
 
-    std::size_t decodePair(std::span<const std::byte> bytes, TUint& first, TUint& second)
+    size_t decodePair(std::span<const std::byte> bytes, TUInt & first, TUInt & second)
     {
         if (bytes.empty()) [[unlikely]]
             throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
 
-        auto zero_byte_count1 = decodeCompressedZeroByteCount(
-            std::to_integer<unsigned>(bytes.front() >> 4) & MAX_ZERO_BYTE_COUNT);
-        auto zero_byte_count2 = decodeCompressedZeroByteCount(
-            std::to_integer<unsigned>(bytes.front()) & MAX_ZERO_BYTE_COUNT);
+        UInt32 zero_byte_count1 = decodeCompressedZeroByteCount(
+            std::to_integer<UInt32>(bytes.front() >> 4) & MAX_ZERO_BYTE_COUNT);
+        UInt32 zero_byte_count2 = decodeCompressedZeroByteCount(
+            std::to_integer<UInt32>(bytes.front()) & MAX_ZERO_BYTE_COUNT);
 
-        auto tail_size1 = VALUE_SIZE - zero_byte_count1;
-        auto tail_size2 = VALUE_SIZE - zero_byte_count2;
+        if (zero_byte_count1 > VALUE_SIZE || zero_byte_count2 > VALUE_SIZE) [[unlikely]]
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Invalid compressed data");
 
-        if (bytes.size() < 1 + tail_size1 + tail_size2) [[unlikely]]
+        size_t tail_size1 = VALUE_SIZE - zero_byte_count1;
+        size_t tail_size2 = VALUE_SIZE - zero_byte_count2;
+
+        size_t expected_size = 0;
+        if (__builtin_add_overflow(tail_size1, tail_size2, &expected_size)
+            || __builtin_add_overflow(expected_size, 1, &expected_size)) [[unlikely]]
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Invalid compressed data");
+
+        if (bytes.size() < expected_size) [[unlikely]]
             throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");
 
-        TUint value1 = 0;
-        TUint value2 = 0;
+        TUInt value1 = 0;
+        TUInt value2 = 0;
 
-        std::memcpy(valueTail(value1, zero_byte_count1), bytes.data() + 1, tail_size1);
-        std::memcpy(valueTail(value2, zero_byte_count2), bytes.data() + 1 + tail_size1, tail_size2);
+        memcpy(valueTail(value1, zero_byte_count1), bytes.data() + 1, tail_size1);
+        memcpy(valueTail(value2, zero_byte_count2), bytes.data() + 1 + tail_size1, tail_size2);
 
         auto is_dfcm_predictor1 = std::to_integer<unsigned char>(bytes.front() & DFCM_BIT_1) != 0;
         auto is_dfcm_predictor2 = std::to_integer<unsigned char>(bytes.front() & DFCM_BIT_2) != 0;
@@ -435,7 +443,7 @@ private:
         return 1 + tail_size1 + tail_size2;
     }
 
-    static void* valueTail(TUint& value, unsigned compressed_size)
+    static void* valueTail(TUInt& value, UInt32 compressed_size)
     {
         if constexpr (ENDIAN == std::endian::little)
         {
@@ -447,11 +455,11 @@ private:
         }
     }
 
-    DfcmPredictor<TUint> dfcm_predictor;
-    FcmPredictor<TUint> fcm_predictor;
+    DfcmPredictor<TUInt> dfcm_predictor;
+    FcmPredictor<TUInt> fcm_predictor;
 
     // memcpy the input into this buffer to align reads, this improves performance compared to unaligned reads (bit_cast) by ~10%
-    std::array<TUint, CHUNK_SIZE> chunk{};
+    std::array<TUInt, CHUNK_SIZE> chunk{};
 
     std::span<std::byte> result{};
 };
diff --git a/tests/queries/0_stateless/02915_fpc_overflow.reference b/tests/queries/0_stateless/02915_fpc_overflow.reference
new file mode 100644
index 00000000000..73011ecb641
--- /dev/null
+++ b/tests/queries/0_stateless/02915_fpc_overflow.reference
@@ -0,0 +1,2 @@
+Exc
+Exc
diff --git a/tests/queries/0_stateless/02915_fpc_overflow.sh b/tests/queries/0_stateless/02915_fpc_overflow.sh
new file mode 100755
index 00000000000..a10543ed8c4
--- /dev/null
+++ b/tests/queries/0_stateless/02915_fpc_overflow.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+echo -ne 'checksumchecksum\x98\x90\x00\x00\x00\x11\x11\x11\x11\x04\x0f\x51                                                                                                                                       ' |
+    ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- 2>&1 | grep -oF 'Exc'
+
+echo -ne 'checksumchecksum\x98\x90\x00\x00\x00\x11\x11\x11\x11\x04\x0f\x16                                                                                                                                       ' |
+    ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- 2>&1 | grep -oF 'Exc'

From a7543e3c7c36eee293c169c408a7ec2607fd86ad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 15 Nov 2023 16:05:30 +0100
Subject: [PATCH 0585/1097] Fix test

---
 .../0_stateless/01555_system_distribution_queue_mask.sql      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
index 3a90765226a..7ade1d24c59 100644
--- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
+++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
@@ -17,7 +17,7 @@ system stop distributed sends dist_01555;
 
 insert into dist_01555 values (1)(2);
 -- since test_cluster_with_incorrect_pw contains incorrect password ignore error
-system flush distributed dist_01555; -- { serverError 516 }
+system flush distributed dist_01555; -- { clientError ATTEMPT_TO_READ_AFTER_EOF }
 select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 3600 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV;
 
 drop table dist_01555;
@@ -30,7 +30,7 @@ create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect
 
 insert into dist_01555 values (1)(2);
 -- since test_cluster_with_incorrect_pw contains incorrect password ignore error
-system flush distributed dist_01555; -- { serverError 516 }
+system flush distributed dist_01555; -- { clientError ATTEMPT_TO_READ_AFTER_EOF }
 select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 3600 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV;
 
 drop table dist_01555;

From 5c4e58d27dc311c5115148e82b6cc690c0c7c972 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 15 Nov 2023 13:08:57 +0000
Subject: [PATCH 0586/1097] Try fix No user

---
 src/Functions/currentProfiles.cpp             | 51 ++++++++++--------
 src/Functions/currentRoles.cpp                | 52 +++++++++++--------
 .../System/StorageSystemFunctions.cpp         |  2 -
 3 files changed, 60 insertions(+), 45 deletions(-)

diff --git a/src/Functions/currentProfiles.cpp b/src/Functions/currentProfiles.cpp
index 71b0eda5f26..77c8a20ccee 100644
--- a/src/Functions/currentProfiles.cpp
+++ b/src/Functions/currentProfiles.cpp
@@ -45,8 +45,34 @@ namespace
             return toString(kind);
         }
 
-        explicit FunctionProfiles(const ContextPtr & context, Kind kind_)
+        explicit FunctionProfiles(const ContextPtr & context_, Kind kind_)
             : kind(kind_)
+            , context(context_)
+        {}
+
+        size_t getNumberOfArguments() const override { return 0; }
+        bool isDeterministic() const override { return false; }
+
+        DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
+        {
+            return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
+        }
+
+        ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
+        {
+            std::call_once(initialized_flag, [&]{ initialize(); });
+
+            auto col_res = ColumnArray::create(ColumnString::create());
+            ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
+            ColumnArray::Offsets & res_offsets = col_res->getOffsets();
+            for (const String & profile_name : profile_names)
+                res_strings.insertData(profile_name.data(), profile_name.length());
+            res_offsets.push_back(res_strings.size());
+            return ColumnConst::create(std::move(col_res), input_rows_count);
+        }
+
+    private:
+        void initialize() const
         {
             const auto & manager = context->getAccessControl();
 
@@ -62,28 +88,11 @@ namespace
             profile_names = manager.tryReadNames(profile_ids);
         }
 
-        size_t getNumberOfArguments() const override { return 0; }
-        bool isDeterministic() const override { return false; }
+        mutable std::once_flag initialized_flag;
 
-        DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
-        {
-            return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
-        }
-
-        ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
-        {
-            auto col_res = ColumnArray::create(ColumnString::create());
-            ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
-            ColumnArray::Offsets & res_offsets = col_res->getOffsets();
-            for (const String & profile_name : profile_names)
-                res_strings.insertData(profile_name.data(), profile_name.length());
-            res_offsets.push_back(res_strings.size());
-            return ColumnConst::create(std::move(col_res), input_rows_count);
-        }
-
-    private:
         Kind kind;
-        Strings profile_names;
+        ContextPtr context;
+        mutable Strings profile_names;
     };
 }
 
diff --git a/src/Functions/currentRoles.cpp b/src/Functions/currentRoles.cpp
index 45d2000d088..e577bd5377a 100644
--- a/src/Functions/currentRoles.cpp
+++ b/src/Functions/currentRoles.cpp
@@ -35,7 +35,33 @@ namespace
 
         String getName() const override { return name; }
 
-        explicit FunctionCurrentRoles(const ContextPtr & context)
+        explicit FunctionCurrentRoles(const ContextPtr & context_)
+            : context(context_)
+        {}
+
+        size_t getNumberOfArguments() const override { return 0; }
+        bool isDeterministic() const override { return false; }
+
+        DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
+        {
+            return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
+        }
+
+        ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
+        {
+            std::call_once(initialized_flag, [&]{ initialize(); });
+
+            auto col_res = ColumnArray::create(ColumnString::create());
+            ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
+            ColumnArray::Offsets & res_offsets = col_res->getOffsets();
+            for (const String & role_name : role_names)
+                res_strings.insertData(role_name.data(), role_name.length());
+            res_offsets.push_back(res_strings.size());
+            return ColumnConst::create(std::move(col_res), input_rows_count);
+        }
+
+    private:
+        void initialize() const
         {
             if constexpr (kind == Kind::CURRENT_ROLES)
             {
@@ -57,27 +83,9 @@ namespace
             ::sort(role_names.begin(), role_names.end());
         }
 
-        size_t getNumberOfArguments() const override { return 0; }
-        bool isDeterministic() const override { return false; }
-
-        DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
-        {
-            return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
-        }
-
-        ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
-        {
-            auto col_res = ColumnArray::create(ColumnString::create());
-            ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
-            ColumnArray::Offsets & res_offsets = col_res->getOffsets();
-            for (const String & role_name : role_names)
-                res_strings.insertData(role_name.data(), role_name.length());
-            res_offsets.push_back(res_strings.size());
-            return ColumnConst::create(std::move(col_res), input_rows_count);
-        }
-
-    private:
-        Strings role_names;
+        mutable std::once_flag initialized_flag;
+        ContextPtr context;
+        mutable Strings role_names;
     };
 }
 
diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp
index 107f0c31ee0..9809b9435f2 100644
--- a/src/Storages/System/StorageSystemFunctions.cpp
+++ b/src/Storages/System/StorageSystemFunctions.cpp
@@ -20,7 +20,6 @@ namespace ErrorCodes
 {
     extern const int DICTIONARIES_WAS_NOT_LOADED;
     extern const int FUNCTION_NOT_ALLOWED;
-    extern const int LOGICAL_ERROR;
     extern const int NOT_IMPLEMENTED;
     extern const int SUPPORT_IS_DISABLED;
     extern const int ACCESS_DENIED;
@@ -150,7 +149,6 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c
             /// Some functions throw because they need special configuration or setup before use.
             if (e.code() == ErrorCodes::DICTIONARIES_WAS_NOT_LOADED
                 || e.code() == ErrorCodes::FUNCTION_NOT_ALLOWED
-                || e.code() == ErrorCodes::LOGICAL_ERROR
                 || e.code() == ErrorCodes::NOT_IMPLEMENTED
                 || e.code() == ErrorCodes::SUPPORT_IS_DISABLED
                 || e.code() == ErrorCodes::ACCESS_DENIED)

From 3e47a54a94e32c293ad8f98166f829453e238320 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Mon, 30 Oct 2023 21:13:38 +0100
Subject: [PATCH 0587/1097] Added a shortcut for no-wildcard globs in file
 storage

---
 src/Storages/StorageFile.cpp | 101 ++++++++++++++++++++---------------
 1 file changed, 58 insertions(+), 43 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 58c9a973575..fcac7673b16 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -106,34 +106,44 @@ namespace ErrorCodes
 
 namespace
 {
-/// Forward-declare to use in expandSelector()
-void listFilesWithRegexpMatchingImpl(
-    const std::string & path_for_ls,
-    const std::string & for_match,
-    size_t & total_bytes_to_read,
-    std::vector<std::string> & result,
-    bool recursive = false);
-
 /// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
-void expandSelector(const std::string & path_for_ls,
-                    const std::string & for_match,
-                    size_t & total_bytes_to_read,
-                    std::vector<std::string> & result,
-                    bool recursive)
+void expandSelector(const std::string & path, Strings & for_match_paths_expanded)
 {
+    /// regexp for {expr1,expr2,expr3}, expr.. should be without "{", "}", "*" and ","
+    static const re2::RE2 selector_regex(R"({([^{}*,]+,[^{}*]*[^{}*,])})");
+
+    std::string_view path_view(path);
+    std::string_view matched;
+
+    if (RE2::FindAndConsume(&path_view, selector_regex, &matched))
+        std::string buffer(matched);
+    else
+    {
+        for_match_paths_expanded.push_back(path);
+        return;
+    }
+
+    Strings expanded_paths;
+
     std::vector<size_t> anchor_positions = {};
     bool opened = false, closed = false;
 
-    for (std::string::const_iterator it = for_match.begin(); it != for_match.end(); it++)
+    for (std::string::const_iterator it = path.begin(); it != path.end(); it++)
     {
         if (*it == '{')
         {
-            anchor_positions.push_back(std::distance(for_match.begin(), it));
+            if (opened)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                "Unexpected '{{' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
+            anchor_positions.push_back(std::distance(path.begin(), it));
             opened = true;
         }
         else if (*it == '}')
         {
-            anchor_positions.push_back(std::distance(for_match.begin(), it));
+            if (!opened)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                "Unexpected '}}' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
+            anchor_positions.push_back(std::distance(path.begin(), it));
             closed = true;
             break;
         }
@@ -141,22 +151,22 @@ void expandSelector(const std::string & path_for_ls,
         {
             if (!opened)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                "Unexpected ''' found in path '{}' at position {}.", for_match, std::distance(for_match.begin(), it));
-            anchor_positions.push_back(std::distance(for_match.begin(), it));
+                                "Unexpected ',' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
+            anchor_positions.push_back(std::distance(path.begin(), it));
         }
     }
     if (!opened || !closed)
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Invalid {{}} glob in path {}.", for_match);
+                        "Invalid {{}} glob in path {}.", path);
 
-    std::string common_prefix = for_match.substr(0, anchor_positions[0]);
-    std::string common_suffix = for_match.substr(anchor_positions[anchor_positions.size()-1] + 1);
+    std::string common_prefix = path.substr(0, anchor_positions[0]);
+    std::string common_suffix = path.substr(anchor_positions[anchor_positions.size()-1] + 1);
     for (size_t i = 1; i < anchor_positions.size(); ++i)
     {
         std::string expanded_matcher = common_prefix
-            + for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
+            + path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
             + common_suffix;
-        listFilesWithRegexpMatchingImpl(path_for_ls, expanded_matcher, total_bytes_to_read, result, recursive);
+        expandSelector(expanded_matcher, for_match_paths_expanded);
     }
 }
 
@@ -168,25 +178,25 @@ void listFilesWithRegexpMatchingImpl(
     const std::string & for_match,
     size_t & total_bytes_to_read,
     std::vector<std::string> & result,
-    bool recursive)
+    bool recursive = false)
 {
-    /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
-    static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})");
-
-    std::string_view for_match_view(for_match);
-    std::string_view matched;
-    if (RE2::FindAndConsume(&for_match_view, enum_or_range, &matched))
-    {
-        std::string buffer(matched);
-        if (buffer.find(',') != std::string::npos)
-        {
-            expandSelector(path_for_ls, for_match, total_bytes_to_read, result, recursive);
-            return;
-        }
-    }
-
     const size_t first_glob_pos = for_match.find_first_of("*?{");
 
+    if (first_glob_pos == std::string::npos)
+    {
+        try
+        {
+            fs::path path = fs::canonical(path_for_ls + for_match);
+            result.push_back(path.string());
+        }
+        catch (const std::exception &)
+        {
+            /// There is no such file, but we just ignore this.
+//            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", for_match);
+        }
+        return;
+    }
+
     const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
     const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
 
@@ -201,7 +211,7 @@ void listFilesWithRegexpMatchingImpl(
         throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
             "Cannot compile regex from glob ({}): {}", for_match, matcher.error());
 
-    bool skip_regex = current_glob == "/*" ? true : false;
+    bool skip_regex = current_glob == "/*";
     if (!recursive)
         recursive = current_glob == "/**" ;
 
@@ -245,12 +255,17 @@ void listFilesWithRegexpMatchingImpl(
 }
 
 std::vector<std::string> listFilesWithRegexpMatching(
-    const std::string & path_for_ls,
     const std::string & for_match,
     size_t & total_bytes_to_read)
 {
     std::vector<std::string> result;
-    listFilesWithRegexpMatchingImpl(path_for_ls, for_match, total_bytes_to_read, result);
+
+    Strings for_match_paths_expanded;
+    expandSelector(for_match, for_match_paths_expanded);
+
+    for (const auto & for_match_expanded : for_match_paths_expanded)
+        listFilesWithRegexpMatchingImpl("/", for_match_expanded, total_bytes_to_read, result);
+
     return result;
 }
 
@@ -415,7 +430,7 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
     else
     {
         /// We list only non-directory files.
-        paths = listFilesWithRegexpMatching("/", path, total_bytes_to_read);
+        paths = listFilesWithRegexpMatching(path, total_bytes_to_read);
         can_be_directory = false;
     }
 

From 70aa6e267234f39467a72166467a96d9cdc5abfd Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Thu, 2 Nov 2023 09:58:15 +0100
Subject: [PATCH 0588/1097] Added a shortcut for no-wildcard globs in HDFS

---
 src/Common/parseGlobs.cpp         | 68 +++++++++++++++++++++++
 src/Common/parseGlobs.h           |  6 +-
 src/Storages/HDFS/StorageHDFS.cpp | 92 +++++++++----------------------
 src/Storages/StorageFile.cpp      | 66 +---------------------
 4 files changed, 100 insertions(+), 132 deletions(-)

diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp
index e9460c03d0a..07f78730591 100644
--- a/src/Common/parseGlobs.cpp
+++ b/src/Common/parseGlobs.cpp
@@ -17,6 +17,11 @@
 
 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
 /* Transforms string from grep-wildcard-syntax ("{N..M}", "{a,b,c}" as in remote table function and "*", "?") to perl-regexp for using re2 library for matching
  * with such steps:
  * 1) search intervals like {0..9} and enums like {abc,xyz,qwe} in {}, replace them by regexp with pipe (expr1|expr2|expr3),
@@ -116,4 +121,67 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
     }
     return buf_final_processing.str();
 }
+
+void expandSelector(const std::string & path, Strings & for_match_paths_expanded)
+{
+    /// regexp for {expr1,expr2,expr3}, expr.. should be without "{", "}", "*" and ","
+    static const re2::RE2 selector_regex(R"({([^{}*,]+,[^{}*]*[^{}*,])})");
+
+    std::string_view path_view(path);
+    std::string_view matched;
+
+    if (RE2::FindAndConsume(&path_view, selector_regex, &matched))
+        std::string buffer(matched);
+    else
+    {
+        for_match_paths_expanded.push_back(path);
+        return;
+    }
+
+    Strings expanded_paths;
+
+    std::vector<size_t> anchor_positions = {};
+    bool opened = false, closed = false;
+
+    for (std::string::const_iterator it = path.begin(); it != path.end(); it++)
+    {
+        if (*it == '{')
+        {
+            if (opened)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                "Unexpected '{{' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
+            anchor_positions.push_back(std::distance(path.begin(), it));
+            opened = true;
+        }
+        else if (*it == '}')
+        {
+            if (!opened)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                "Unexpected '}}' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
+            anchor_positions.push_back(std::distance(path.begin(), it));
+            closed = true;
+            break;
+        }
+        else if (*it == ',')
+        {
+            if (!opened)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                "Unexpected ',' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
+            anchor_positions.push_back(std::distance(path.begin(), it));
+        }
+    }
+    if (!opened || !closed)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Invalid {{}} glob in path {}.", path);
+
+    std::string common_prefix = path.substr(0, anchor_positions[0]);
+    std::string common_suffix = path.substr(anchor_positions[anchor_positions.size()-1] + 1);
+    for (size_t i = 1; i < anchor_positions.size(); ++i)
+    {
+        std::string expanded_matcher = common_prefix
+                                       + path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
+                                       + common_suffix;
+        expandSelector(expanded_matcher, for_match_paths_expanded);
+    }
+}
 }
diff --git a/src/Common/parseGlobs.h b/src/Common/parseGlobs.h
index 043a87884cf..1397d84c8a4 100644
--- a/src/Common/parseGlobs.h
+++ b/src/Common/parseGlobs.h
@@ -6,5 +6,9 @@ namespace DB
 {
 /* Parse globs in string and make a regexp for it.
  */
-std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs);
+    std::string makeRegexpPatternFromGlobs(const std::string &initial_str_with_globs);
+
+
+/// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
+    void expandSelector(const std::string &path, std::vector<std::string> &for_match_paths_expanded);
 }
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index d827353ad8e..55aad03b7f7 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -75,59 +75,6 @@ namespace ErrorCodes
 }
 namespace
 {
-    /// Forward-declare to use in expandSelector()
-    std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls,
-                                                                const HDFSFSPtr & fs,
-                                                                const String & for_match);
-
-    /// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
-    std::vector<StorageHDFS::PathWithInfo> expandSelector(const String & path_for_ls,
-                                                          const HDFSFSPtr & fs,
-                                                          const String & for_match)
-    {
-        std::vector<size_t> anchor_positions = {};
-        bool opened = false, closed = false;
-
-        for (std::string::const_iterator it = for_match.begin(); it != for_match.end(); it++)
-        {
-            if (*it == '{')
-            {
-                anchor_positions.push_back(std::distance(for_match.begin(), it));
-                opened = true;
-            }
-            else if (*it == '}')
-            {
-                anchor_positions.push_back(std::distance(for_match.begin(), it));
-                closed = true;
-                break;
-            }
-            else if (*it == ',')
-            {
-                if (!opened)
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                    "Unexpected ''' found in path '{}' at position {}.", for_match, std::distance(for_match.begin(), it));
-                anchor_positions.push_back(std::distance(for_match.begin(), it));
-            }
-        }
-        if (!opened || !closed)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                            "Invalid {{}} glob in path {}.", for_match);
-
-        std::vector<StorageHDFS::PathWithInfo> ret = {};
-
-        std::string common_prefix = for_match.substr(0, anchor_positions[0]);
-        std::string common_suffix = for_match.substr(anchor_positions[anchor_positions.size()-1] + 1);
-        for (size_t i = 1; i < anchor_positions.size(); ++i)
-        {
-            std::string expanded_matcher = common_prefix
-                + for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
-                + common_suffix;
-            std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(path_for_ls, fs, expanded_matcher);
-            ret.insert(ret.end(), result_part.begin(), result_part.end());
-        }
-        return ret;
-    }
-
     /* Recursive directory listing with matched paths as a result.
      * Have the same method in StorageFile.
      */
@@ -136,20 +83,24 @@ namespace
         const HDFSFSPtr & fs,
         const String & for_match)
     {
-        /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
-        static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})");
-
-        std::string_view for_match_view(for_match);
-        std::string_view matched;
-        if (RE2::FindAndConsume(&for_match_view, enum_or_range, &matched))
-        {
-            std::string buffer(matched);
-            if (buffer.find(',') != std::string::npos)
-                return expandSelector(path_for_ls, fs, for_match);
-        }
+        std::vector<StorageHDFS::PathWithInfo> result;
 
         const size_t first_glob_pos = for_match.find_first_of("*?{");
 
+        if (first_glob_pos == std::string::npos)
+        {
+            const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal();
+            HDFSFileInfo ls;
+            ls.file_info = hdfsGetPathInfo(fs.get(), path.c_str());
+            if (ls.file_info != nullptr) // NOLINT
+            {
+                result.push_back(StorageHDFS::PathWithInfo{
+                        String(path),
+                        StorageHDFS::PathInfo{ls.file_info->mLastMod, static_cast<size_t>(ls.file_info->mSize)}});
+            }
+            return result;
+        }
+
         const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
         const String suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
         const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
@@ -171,7 +122,7 @@ namespace
             throw Exception(
                 ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError()));
         }
-        std::vector<StorageHDFS::PathWithInfo> result;
+
         if (!ls.file_info && ls.length > 0)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
         for (int i = 0; i < ls.length; ++i)
@@ -222,7 +173,16 @@ namespace
         HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
         HDFSFSPtr fs = createHDFSFS(builder.get());
 
-        auto res = LSWithRegexpMatching("/", fs, path_from_uri);
+        Strings paths;
+        expandSelector(path_from_uri, paths);
+
+        std::vector<StorageHDFS::PathWithInfo> res;
+
+        for (const auto & path : paths)
+        {
+            auto part_of_res = LSWithRegexpMatching("/", fs, path);
+            res.insert(res.end(), part_of_res.begin(), part_of_res.end());
+        }
         return res;
     }
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index fcac7673b16..4c982473255 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -106,70 +106,6 @@ namespace ErrorCodes
 
 namespace
 {
-/// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
-void expandSelector(const std::string & path, Strings & for_match_paths_expanded)
-{
-    /// regexp for {expr1,expr2,expr3}, expr.. should be without "{", "}", "*" and ","
-    static const re2::RE2 selector_regex(R"({([^{}*,]+,[^{}*]*[^{}*,])})");
-
-    std::string_view path_view(path);
-    std::string_view matched;
-
-    if (RE2::FindAndConsume(&path_view, selector_regex, &matched))
-        std::string buffer(matched);
-    else
-    {
-        for_match_paths_expanded.push_back(path);
-        return;
-    }
-
-    Strings expanded_paths;
-
-    std::vector<size_t> anchor_positions = {};
-    bool opened = false, closed = false;
-
-    for (std::string::const_iterator it = path.begin(); it != path.end(); it++)
-    {
-        if (*it == '{')
-        {
-            if (opened)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                "Unexpected '{{' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
-            anchor_positions.push_back(std::distance(path.begin(), it));
-            opened = true;
-        }
-        else if (*it == '}')
-        {
-            if (!opened)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                "Unexpected '}}' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
-            anchor_positions.push_back(std::distance(path.begin(), it));
-            closed = true;
-            break;
-        }
-        else if (*it == ',')
-        {
-            if (!opened)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                "Unexpected ',' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
-            anchor_positions.push_back(std::distance(path.begin(), it));
-        }
-    }
-    if (!opened || !closed)
-        throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Invalid {{}} glob in path {}.", path);
-
-    std::string common_prefix = path.substr(0, anchor_positions[0]);
-    std::string common_suffix = path.substr(anchor_positions[anchor_positions.size()-1] + 1);
-    for (size_t i = 1; i < anchor_positions.size(); ++i)
-    {
-        std::string expanded_matcher = common_prefix
-            + path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
-            + common_suffix;
-        expandSelector(expanded_matcher, for_match_paths_expanded);
-    }
-}
-
 /* Recursive directory listing with matched paths as a result.
  * Have the same method in StorageHDFS.
  */
@@ -189,7 +125,7 @@ void listFilesWithRegexpMatchingImpl(
             fs::path path = fs::canonical(path_for_ls + for_match);
             result.push_back(path.string());
         }
-        catch (const std::exception &)
+        catch (const std::exception &) // NOLINT
         {
             /// There is no such file, but we just ignore this.
 //            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", for_match);

From eb08bba6077a41f974843739ce8a13f5b463b746 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 15 Nov 2023 13:32:31 +0000
Subject: [PATCH 0589/1097] Update docs

---
 docs/en/sql-reference/table-functions/file.md | 73 +++++++++----------
 .../sql-reference/table-functions/remote.md   | 45 +++++++-----
 src/Common/parseGlobs.h                       | 10 +--
 3 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index 7e869af82ef..a871bdaafa9 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -1,4 +1,4 @@
----
+ --
 slug: /en/sql-reference/table-functions/file
 sidebar_position: 60
 sidebar_label: file
@@ -6,7 +6,7 @@ sidebar_label: file
 
 # file
 
-Provides a table-like interface to SELECT from and INSERT to files. This table function is similar to the [s3](/docs/en/sql-reference/table-functions/url.md) table function.  Use file() when working with local files, and s3() when working with buckets in S3, GCS, or MinIO.
+A table engine which provides a table-like interface to SELECT from and INSERT into files, similar to the [s3](/docs/en/sql-reference/table-functions/url.md) table function.  Use `file()` when working with local files, and `s3()` when working with buckets in object storage such as S3, GCS, or MinIO.
 
 The `file` function can be used in `SELECT` and `INSERT` queries to read from or write to files.
 
@@ -18,18 +18,18 @@ file([path_to_archive ::] path [,format] [,structure] [,compression])
 
 **Parameters**
 
-- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
-- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`.
+- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs_in_path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers).
+- `path_to_archive` - The relative path to a zip/tar/7z archive. Supports the same globs as `path`.
 - `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
 - `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
-- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query.  The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
+- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
 
 
 **Returned value**
 
-A table with the specified structure for reading or writing data in the specified file.
+A table for reading or writing data in a file.
 
-## File Write Examples
+## Examples for Writing to a File
 
 ### Write to a TSV file
 
@@ -48,9 +48,9 @@ As a result, the data is written into the file `test.tsv`:
 1	3	2
 ```
 
-### Partitioned Write to multiple TSV files
+### Partitioned write to multiple TSV files
 
-If you specify `PARTITION BY` expression when inserting data into a file() function, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
+If you specify a `PARTITION BY` expression when inserting data into a table function of type `file()`, then a separate file is created for each partition. Splitting the data into separate files helps to improve performance of read operations.
 
 ```sql
 INSERT INTO TABLE FUNCTION
@@ -72,11 +72,11 @@ As a result, the data is written into three files: `test_1.tsv`, `test_2.tsv`, a
 1	2	3
 ```
 
-## File Read Examples
+## Examples for Reading from a File
 
 ### SELECT from a CSV file
 
-Setting `user_files_path` and the contents of the file `test.csv`:
+First, set `user_files_path` in the server configuration and prepare a file `test.csv`:
 
 ``` bash
 $ grep user_files_path /etc/clickhouse-server/config.xml
@@ -88,7 +88,7 @@ $ cat /var/lib/clickhouse/user_files/test.csv
     78,43,45
 ```
 
-Getting data from a table in `test.csv` and selecting the first two rows from it:
+Then, read data from `test.csv` into a table and select its first two rows:
 
 ``` sql
 SELECT * FROM
@@ -103,14 +103,6 @@ LIMIT 2;
 └─────────┴─────────┴─────────┘
 ```
 
-Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file:
-
-``` sql
-SELECT * FROM
-file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
-LIMIT 10;
-```
-
 ### Inserting data from a file into a table:
 
 ``` sql
@@ -130,41 +122,42 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
 └─────────┴─────────┴─────────┘
 ```
 
-Getting data from table in table.csv, located in archive1.zip or/and archive2.zip
+Reading data from `table.csv`, located in `archive1.zip` or/and `archive2.zip`:
+
 ``` sql
 SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
 ```
 
-## Globs in Path {#globs_in_path}
+## Globbing {#globs_in_path}
 
-Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
+Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix.
 
-- `*` — Substitutes any number of any characters except `/` including empty string.
-- `?` — Substitutes any single character.
-- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol.
-- `{N..M}` — Substitutes any number in range from N to M including both borders.
-- `**` - Fetches all files inside the folder recursively.
+- `*` — Represents arbitrarily many characters except `/` but including the empty string.
+- `?` — Represents an arbitrary single character.
+- `{some_string,another_string,yet_another_one}` — Represents any of alternative strings `'some_string', 'another_string', 'yet_another_one'`. The strings may contain `/`.
+- `{N..M}` — Represents any number `>= N` and `<= M`.
+- `**` - Represents all files inside a folder recursively.
 
 Constructions with `{}` are similar to the [remote](remote.md) table function.
 
 **Example**
 
-Suppose we have several files with the following relative paths:
+Suppose there are these files with the following relative paths:
 
-- 'some_dir/some_file_1'
-- 'some_dir/some_file_2'
-- 'some_dir/some_file_3'
-- 'another_dir/some_file_1'
-- 'another_dir/some_file_2'
-- 'another_dir/some_file_3'
+- `some_dir/some_file_1`
+- `some_dir/some_file_2`
+- `some_dir/some_file_3`
+- `another_dir/some_file_1`
+- `another_dir/some_file_2`
+- `another_dir/some_file_3`
 
-Query the number of rows in these files:
+Query the total number of rows in all files:
 
 ``` sql
 SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
 ```
 
-Query the number of rows in all files of these two directories:
+An alternative path expression which achieves the same:
 
 ``` sql
 SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
@@ -176,7 +169,7 @@ If your listing of files contains number ranges with leading zeros, use the cons
 
 **Example**
 
-Query the data from files named `file000`, `file001`, … , `file999`:
+Query the total number of rows in files named `file000`, `file001`, … , `file999`:
 
 ``` sql
 SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
@@ -184,7 +177,7 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
 
 **Example**
 
-Query the data from all files inside `big_dir` directory recursively:
+Query the total number of rows from all files inside directory `big_dir/` recursively:
 
 ``` sql
 SELECT count(*) FROM file('big_dir/**', 'CSV', 'name String, value UInt32');
@@ -192,7 +185,7 @@ SELECT count(*) FROM file('big_dir/**', 'CSV', 'name String, value UInt32');
 
 **Example**
 
-Query the data from all `file002` files from any folder inside `big_dir` directory recursively:
+Query the total number of rows from all files `file002` inside any folder in directory `big_dir/` recursively:
 
 ``` sql
 SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt32');
diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md
index 59ed4bf1985..3ca177050d3 100644
--- a/docs/en/sql-reference/table-functions/remote.md
+++ b/docs/en/sql-reference/table-functions/remote.md
@@ -6,7 +6,7 @@ sidebar_label: remote
 
 # remote, remoteSecure
 
-Allows accessing remote servers, including migration of data, without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with a secured connection.
+Table function `remote` allows to access remote servers on-the-fly, i.e. without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. Table function `remoteSecure` is same as `remote` but over a secure connection.
 
 Both functions can be used in `SELECT` and `INSERT` queries.
 
@@ -21,36 +21,36 @@ remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
 
 ## Parameters
 
-- `addresses_expr` — An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`.
+- `addresses_expr` — A remote server address or an expression that generates multiple addresses of remote servers. Format: `host` or `host:port`.
 
-    The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets.
+    The `host` can be specified as a server name, or as a IPv4 or IPv6 address. An IPv6 address must be specified in square brackets.
 
-    The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server’s config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440).
+    The `port` is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server config file for table function `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) for table function `remoteSecure` (by default, 9440).
 
-    The port is required for an IPv6 address.
+    For IPv6 addresses, a port is required.
 
-    If only specify this parameter, `db` and `table` will use `system.one` by default.
+    If only parameter `addresses_expr` is specified, `db` and `table` will use `system.one` by default.
 
     Type: [String](../../sql-reference/data-types/string.md).
 
 - `db` — Database name. Type: [String](../../sql-reference/data-types/string.md).
 - `table` — Table name. Type: [String](../../sql-reference/data-types/string.md).
-- `user` — User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
-- `password` — User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
+- `user` — User name. If not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
+- `password` — User password. If not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
 - `sharding_key` — Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
 
 ## Returned value
 
-The dataset from remote servers.
+A table located on a remote server.
 
 ## Usage
 
-Unless you are migrating data from one system to another, using the `remote` table function is less optimal than creating a `Distributed` table because in this case the server connection is re-established for every request. Also, if hostnames are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and do not use the `remote` table function.
+As table functions `remote` and `remoteSecure` re-establish the connection for each request, it is recommended to use a `Distributed` table instead. Also, if hostnames are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and do not use the `remote` table function.
 
 The `remote` table function can be useful in the following cases:
 
-- Migrating data from one system to another
-- Accessing a specific server for data comparison, debugging, and testing.
+- One-time data migration from one system to another
+- Accessing a specific server for data comparison, debugging, and testing, i.e. ad-hoc connections.
 - Queries between various ClickHouse clusters for research purposes.
 - Infrequent distributed requests that are made manually.
 - Distributed requests where the set of servers is re-defined each time.
@@ -68,7 +68,7 @@ localhost
 [2a02:6b8:0:1111::11]:9000
 ```
 
-Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like shards with different data). Example:
+Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing and send the query to all specified addresses (like shards with different data). Example:
 
 ``` text
 example01-01-1,example01-02-1
@@ -91,10 +91,13 @@ SELECT * FROM remote_table;
 ```
 
 ### Migration of tables from one system to another:
+
 This example uses one table from a sample dataset.  The database is `imdb`, and the table is `actors`.
 
 #### On the source ClickHouse system (the system that currently hosts the data)
+
 - Verify the source database and table name (`imdb.actors`)
+
   ```sql
   show databases
   ```
@@ -104,6 +107,7 @@ This example uses one table from a sample dataset.  The database is `imdb`, and
   ```
 
 - Get the CREATE TABLE statement from the source:
+
   ```
   select create_table_query
   from system.tables
@@ -111,6 +115,7 @@ This example uses one table from a sample dataset.  The database is `imdb`, and
   ```
 
   Response
+
   ```sql
   CREATE TABLE imdb.actors (`id` UInt32,
                             `first_name` String,
@@ -123,11 +128,13 @@ This example uses one table from a sample dataset.  The database is `imdb`, and
 #### On the destination ClickHouse system:
 
 - Create the destination database:
+
   ```sql
   CREATE DATABASE imdb
   ```
 
 - Using the CREATE TABLE statement from the source, create the destination:
+
   ```sql
   CREATE TABLE imdb.actors (`id` UInt32,
                             `first_name` String,
@@ -140,21 +147,23 @@ This example uses one table from a sample dataset.  The database is `imdb`, and
 #### Back on the source deployment:
 
 Insert into the new database and table created on the remote system.  You will need the host, port, username, password, destination database, and destination table.
+
 ```sql
 INSERT INTO FUNCTION
 remoteSecure('remote.clickhouse.cloud:9440', 'imdb.actors', 'USER', 'PASSWORD')
 SELECT * from imdb.actors
 ```
 
-## Globs in Addresses {#globs-in-addresses}
+## Globbing {#globs-in-addresses}
 
 Patterns in curly brackets `{ }` are used to generate a set of shards and to specify replicas. If there are multiple pairs of curly brackets, then the direct product of the corresponding sets is generated.
+
 The following pattern types are supported.
 
-- {*a*,*b*} - Any number of variants separated by a comma. The pattern is replaced with *a* in the first shard address and it is replaced with *b* in the second shard address and so on. For instance, `example0{1,2}-1` generates addresses `example01-1` and `example02-1`.
-- {*n*..*m*} - A range of numbers. This pattern generates shard addresses with incrementing indices from *n* to *m*. `example0{1..2}-1` generates `example01-1` and `example02-1`.
-- {*0n*..*0m*} - A range of numbers with leading zeroes. This modification preserves leading zeroes in indices. The pattern `example{01..03}-1` generates `example01-1`, `example02-1` and `example03-1`.
-- {*a*|*b*} - Any number of variants separated by a `|`. The pattern specifies replicas. For instance, `example01-{1|2}` generates replicas `example01-1` and `example01-2`.
+- `{a,b,c}` - Represents any of alternative strings `a`, `b` or `c`. The pattern is replaced with `a` in the first shard address and replaced with `b` in the second shard address and so on. For instance, `example0{1,2}-1` generates addresses `example01-1` and `example02-1`.
+- `{N..M}` - A range of numbers. This pattern generates shard addresses with incrementing indices from `N` to (and including) `M`. For instance, `example0{1..2}-1` generates `example01-1` and `example02-1`.
+- `{0n..0m}` - A range of numbers with leading zeroes. This pattern preserves leading zeroes in indices. For instance, `example{01..03}-1` generates `example01-1`, `example02-1` and `example03-1`.
+- `{a|b}` - Any number of variants separated by a `|`. The pattern specifies replicas. For instance, `example01-{1|2}` generates replicas `example01-1` and `example01-2`.
 
 The query will be sent to the first healthy replica. However, for `remote` the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#settings-load_balancing) setting.
 The number of generated addresses is limited by [table_function_remote_max_addresses](../../operations/settings/settings.md#table_function_remote_max_addresses) setting.
diff --git a/src/Common/parseGlobs.h b/src/Common/parseGlobs.h
index 1397d84c8a4..d109b146b4d 100644
--- a/src/Common/parseGlobs.h
+++ b/src/Common/parseGlobs.h
@@ -4,11 +4,9 @@
 
 namespace DB
 {
-/* Parse globs in string and make a regexp for it.
- */
-    std::string makeRegexpPatternFromGlobs(const std::string &initial_str_with_globs);
+    /// Parse globs in string and make a regexp for it.
+    std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs);
 
-
-/// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
-    void expandSelector(const std::string &path, std::vector<std::string> &for_match_paths_expanded);
+    /// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
+    void expandSelector(const std::string & path, std::vector<std::string> & for_match_paths_expanded);
 }

From 20163e6aba4a2ec6ecc34ad7643af33dfbffb186 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Wed, 15 Nov 2023 15:34:52 +0100
Subject: [PATCH 0590/1097] add globbing to aspell-ignore

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index aedb267b3fb..0d3d4fef3dc 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1542,6 +1542,7 @@ github
 glibc
 globalIn
 globalNotIn
+globbing
 glushkovds
 golang
 googletest

From a8aa206cfdca23c395f292f3d525fec7e7ed16c1 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Wed, 15 Nov 2023 16:35:25 +0100
Subject: [PATCH 0591/1097] Update after review

expandSelector is now returning vector<string> directly
minor changes
---
 src/Common/parseGlobs.cpp         | 23 ++++++++++++++++-------
 src/Common/parseGlobs.h           |  6 ++++--
 src/Storages/HDFS/StorageHDFS.cpp |  3 +--
 src/Storages/StorageFile.cpp      |  3 +--
 4 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp
index 07f78730591..e19393478b4 100644
--- a/src/Common/parseGlobs.cpp
+++ b/src/Common/parseGlobs.cpp
@@ -122,17 +122,18 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
     return buf_final_processing.str();
 }
 
-void expandSelector(const std::string & path, Strings & for_match_paths_expanded)
+namespace
 {
-    /// regexp for {expr1,expr2,expr3}, expr.. should be without "{", "}", "*" and ","
+void expandSelectorGlobImpl(const std::string & path, std::vector<std::string> & for_match_paths_expanded)
+{
+    /// regexp for {expr1,expr2,....};
+    /// expr1, expr2,... cannot contain any of these: '{', '}', ','
     static const re2::RE2 selector_regex(R"({([^{}*,]+,[^{}*]*[^{}*,])})");
 
     std::string_view path_view(path);
     std::string_view matched;
 
-    if (RE2::FindAndConsume(&path_view, selector_regex, &matched))
-        std::string buffer(matched);
-    else
+    if (!RE2::FindAndConsume(&path_view, selector_regex, &matched))
     {
         for_match_paths_expanded.push_back(path);
         return;
@@ -140,7 +141,7 @@ void expandSelector(const std::string & path, Strings & for_match_paths_expanded
 
     Strings expanded_paths;
 
-    std::vector<size_t> anchor_positions = {};
+    std::vector<size_t> anchor_positions;
     bool opened = false, closed = false;
 
     for (std::string::const_iterator it = path.begin(); it != path.end(); it++)
@@ -181,7 +182,15 @@ void expandSelector(const std::string & path, Strings & for_match_paths_expanded
         std::string expanded_matcher = common_prefix
                                        + path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
                                        + common_suffix;
-        expandSelector(expanded_matcher, for_match_paths_expanded);
+        expandSelectorGlobImpl(expanded_matcher, for_match_paths_expanded);
     }
 }
 }
+
+std::vector<std::string> expandSelectionGlob(const std::string & path)
+{
+    std::vector<std::string> result;
+    expandSelectorGlobImpl(path, result);
+    return result;
+}
+}
diff --git a/src/Common/parseGlobs.h b/src/Common/parseGlobs.h
index d109b146b4d..83607766223 100644
--- a/src/Common/parseGlobs.h
+++ b/src/Common/parseGlobs.h
@@ -7,6 +7,8 @@ namespace DB
     /// Parse globs in string and make a regexp for it.
     std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs);
 
-    /// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
-    void expandSelector(const std::string & path, std::vector<std::string> & for_match_paths_expanded);
+    /// Process {a,b,c...} globs:
+    /// Don't match it against regex, but generate a,b,c strings instead and process each of them separately.
+    /// E.g. for a string like `file{1,2,3}.csv` return vector of strings: {`file1.csv`,`file2.csv`,`file3.csv`}
+    std::vector<std::string> expandSelectionGlob(const std::string & path);
 }
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 55aad03b7f7..7d845dac57d 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -173,8 +173,7 @@ namespace
         HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
         HDFSFSPtr fs = createHDFSFS(builder.get());
 
-        Strings paths;
-        expandSelector(path_from_uri, paths);
+        Strings paths = expandSelectionGlob(path_from_uri);
 
         std::vector<StorageHDFS::PathWithInfo> res;
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 4c982473255..2dffdfa8be9 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -196,8 +196,7 @@ std::vector<std::string> listFilesWithRegexpMatching(
 {
     std::vector<std::string> result;
 
-    Strings for_match_paths_expanded;
-    expandSelector(for_match, for_match_paths_expanded);
+    Strings for_match_paths_expanded = expandSelectionGlob(for_match);
 
     for (const auto & for_match_expanded : for_match_paths_expanded)
         listFilesWithRegexpMatchingImpl("/", for_match_expanded, total_bytes_to_read, result);

From f21dd37d1838c2041a4dcd7671d616a3dca817a8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 15 Nov 2023 15:42:00 +0000
Subject: [PATCH 0592/1097] Some fixups

---
 .../en/sql-reference/functions/string-functions.md | 12 +++++++++---
 src/Columns/ColumnStringHelpers.h                  |  2 +-
 src/Functions/concat.cpp                           | 14 ++++++--------
 src/Functions/formatString.h                       | 10 +++++-----
 .../02775_show_columns_called_from_clickhouse.sql  |  2 +-
 .../02775_show_columns_called_from_mysql.expect    |  2 +-
 6 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index dc324e8e331..4b6e0356301 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -429,7 +429,7 @@ SELECT format('{} {}', 'Hello', 'World')
 
 ## concat
 
-Concatenates the strings listed in the arguments without separator.
+Concatenates the given arguments.
 
 **Syntax**
 
@@ -439,7 +439,9 @@ concat(s1, s2, ...)
 
 **Arguments**
 
-Values of arbitrary types. If an argument is not a String or FixedString, it is converted to the String type using the default serialization.
+At least two values of arbitrary type.
+
+Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
 
 **Returned values**
 
@@ -449,6 +451,8 @@ If any of arguments is `NULL`, the function returns `NULL`.
 
 **Example**
 
+Query:
+
 ``` sql
 SELECT concat('Hello, ', 'World!');
 ```
@@ -461,7 +465,7 @@ Result:
 └─────────────────────────────┘
 ```
 
-**Example**
+Query:
 
 ```sql
 SELECT concat(42, 144);
@@ -540,6 +544,8 @@ Concatenates the given strings with a given separator.
 concatWithSeparator(sep, expr1, expr2, expr3...)
 ```
 
+Alias: `concat_ws`
+
 **Arguments**
 
 - sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
diff --git a/src/Columns/ColumnStringHelpers.h b/src/Columns/ColumnStringHelpers.h
index 851486e490a..97b52506ae0 100644
--- a/src/Columns/ColumnStringHelpers.h
+++ b/src/Columns/ColumnStringHelpers.h
@@ -62,7 +62,7 @@ public:
         return buffer;
     }
 
-    inline void rowWritten()
+    void rowWritten()
     {
         if constexpr (std::is_same_v<ColumnType, ColumnFixedString>)
         {
diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp
index 081096b745e..9aa6de5d219 100644
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@@ -91,7 +91,6 @@ private:
         else
         {
             /// Fallback: use generic implementation for not very important cases.
-            /// Concat of arbitrary types also goes here.
             return executeFormatImpl(arguments, input_rows_count);
         }
 
@@ -108,7 +107,7 @@ private:
         std::vector<const ColumnString::Offsets *> offsets(num_arguments);
         std::vector<size_t> fixed_string_sizes(num_arguments);
         std::vector<std::optional<String>> constant_strings(num_arguments);
-        std::vector<ColumnPtr> converted_col_ptrs(num_arguments);
+        std::vector<ColumnString::MutablePtr> converted_col_ptrs(num_arguments);
         bool has_column_string = false;
         bool has_column_fixed_string = false;
         for (size_t i = 0; i < num_arguments; ++i)
@@ -132,14 +131,13 @@ private:
             }
             else
             {
-                // An arbitrary type argument: converting it to a StringColumn first
+                /// A non-String/non-FixedString-type argument: use the default serialization to convert it to String
                 const auto full_column = column->convertToFullIfNeeded();
                 const auto serialization = arguments[i].type->getDefaultSerialization();
-                ColumnString::MutablePtr converted_col_str = ColumnString::create();
-                static FormatSettings format_settings;
-
+                auto converted_col_str = ColumnString::create();
                 ColumnStringHelpers::WriteHelper write_helper(*converted_col_str, column->size());
                 auto & write_buffer = write_helper.getWriteBuffer();
+                FormatSettings format_settings;
                 for (size_t j = 0; j < column->size(); ++j)
                 {
                     serialization->serializeText(*full_column, j, write_buffer, format_settings);
@@ -147,12 +145,12 @@ private:
                 }
                 write_helper.finalize();
 
-                // Same as the normal `ColumnString` branch
+                /// Same as the normal `ColumnString` branch
                 has_column_string = true;
                 data[i] = &converted_col_str->getChars();
                 offsets[i] = &converted_col_str->getOffsets();
 
-                // keep the refcounted-pointer around (to be able to use data/offsets later)
+                /// Keep the refcounted-pointer alive
                 converted_col_ptrs[i] = std::move(converted_col_str);
             }
         }
diff --git a/src/Functions/formatString.h b/src/Functions/formatString.h
index 4bdb672caf4..315e5c06227 100644
--- a/src/Functions/formatString.h
+++ b/src/Functions/formatString.h
@@ -1,14 +1,14 @@
 #pragma once
 
+#include <Columns/ColumnString.h>
+#include <Common/format.h>
+#include <Common/memcpySmall.h>
+#include <base/types.h>
+
 #include <algorithm>
 #include <optional>
 #include <string>
 #include <vector>
-#include <Columns/ColumnString.h>
-#include <base/types.h>
-#include <Common/format.h>
-#include <Common/memcpySmall.h>
-
 
 namespace DB
 {
diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql
index 752367517af..3bbcbb1a535 100644
--- a/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql
+++ b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql
@@ -5,7 +5,7 @@
 -- Tests the output of SHOW COLUMNS when called through the ClickHouse protocol.
 
 -- -----------------------------------------------------------------------------------
--- Please keep this test in-sync with 02775_show_columns_called_from_mysql.expect
+-- Please keep this test in-sync with 02775_show_columns_called_from_clickhouse.expect
 -- -----------------------------------------------------------------------------------
 
 DROP TABLE IF EXISTS tab;
diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect b/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect
index bef5bd10ff3..8ba5774820e 100755
--- a/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect
+++ b/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect
@@ -6,7 +6,7 @@
 # Tests the output of SHOW COLUMNS when called through the MySQL protocol.
 
 # -----------------------------------------------------------------------------------
-# Please keep this test in-sync with 02775_show_columns_called_through_clickhouse.sql
+# Please keep this test in-sync with 02775_show_columns_called_from_clickhouse.sql
 # -----------------------------------------------------------------------------------
 
 set basedir [file dirname $argv0]

From 604b10238bee73fa64465373ad7859320c69cd3f Mon Sep 17 00:00:00 2001
From: Mikhail Koviazin <mikhail.koviazin@aiven.io>
Date: Wed, 15 Nov 2023 17:45:28 +0200
Subject: [PATCH 0593/1097] SensitiveDataMasker: reduce the scope of mutex

---
 src/Common/SensitiveDataMasker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp
index fd7f45b11c6..c4d6952f31d 100644
--- a/src/Common/SensitiveDataMasker.cpp
+++ b/src/Common/SensitiveDataMasker.cpp
@@ -99,11 +99,11 @@ std::mutex SensitiveDataMasker::instance_mutex;
 
 void SensitiveDataMasker::setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_)
 {
-    std::lock_guard lock(instance_mutex);
 
     if (!sensitive_data_masker_)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: the 'sensitive_data_masker' is not set");
 
+    std::lock_guard lock(instance_mutex);
     if (sensitive_data_masker_->rulesCount() > 0)
     {
         sensitive_data_masker = std::move(sensitive_data_masker_);

From 08d0a160aedba69a04d258850947a434f67c9fac Mon Sep 17 00:00:00 2001
From: Mikhail Koviazin <mikhail.koviazin@aiven.io>
Date: Wed, 15 Nov 2023 17:48:24 +0200
Subject: [PATCH 0594/1097] test_reload_query_masking_rules: don't rely on num
 of events

Due to the fact that integration tests are run in parallel and there
might be no ClickHouse restart between several tests, relying on
`system.events` in the test is fragile as it can be affected by previous
tests. This commit removes any assumptions regarding `system.events`
from the test and tries to keep it robust.
---
 .../test_reload_query_masking_rules/test.py   | 23 +++----------------
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/tests/integration/test_reload_query_masking_rules/test.py b/tests/integration/test_reload_query_masking_rules/test.py
index 0f29bd0825e..f269aefbacb 100644
--- a/tests/integration/test_reload_query_masking_rules/test.py
+++ b/tests/integration/test_reload_query_masking_rules/test.py
@@ -37,15 +37,8 @@ def test_reload_query_masking_rules():
     # event will not be registered
     node.query("SELECT 'TOPSECRET.TOPSECRET'")
     assert_logs_contain_with_retry(node, "SELECT 'TOPSECRET.TOPSECRET'")
-
-    # If there were no 'QueryMaskingRulesMatch' events, the query below returns
-    # 0 rows
-    assert (
-        node.query(
-            "SELECT count(value) FROM system.events WHERE name = 'QueryMaskingRulesMatch'"
-        )
-        == "0\n"
-    )
+    assert not node.contains_in_log(r"SELECT '\[hidden\]'")
+    node.rotate_logs()
 
     node.copy_file_to_container(
         os.path.join(SCRIPT_DIR, "configs/changed_settings.xml"),
@@ -58,17 +51,7 @@ def test_reload_query_masking_rules():
     # will be incremented
     node.query("SELECT 'TOPSECRET.TOPSECRET'")
 
-    assert_eq_with_retry(
-        node,
-        "SELECT count(value) FROM system.events WHERE name = 'QueryMaskingRulesMatch'",
-        "1",
-    )
     assert_logs_contain_with_retry(node, r"SELECT '\[hidden\]'")
-    assert (
-        node.query(
-            "SELECT value FROM system.events WHERE name = 'QueryMaskingRulesMatch'"
-        )
-        == "1\n"
-    )
+    assert not node.contains_in_log("SELECT 'TOPSECRET.TOPSECRET'")
 
     node.rotate_logs()

From fbaa6a4e54d8bc1435d78cf031525093cb945684 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 9 Nov 2023 16:39:41 +0000
Subject: [PATCH 0595/1097] Optimize equality with is null check in JOIN ON
 section

---
 .../Passes/LogicalExpressionOptimizerPass.cpp | 187 ++++++++++++++++++
 .../Passes/LogicalExpressionOptimizerPass.h   |  11 ++
 ...11_join_on_nullsafe_optimization.reference |  25 +++
 .../02911_join_on_nullsafe_optimization.sql   |  27 +++
 4 files changed, 250 insertions(+)
 create mode 100644 tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference
 create mode 100644 tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 46056aeaf6f..372c760a20a 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -5,11 +5,17 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/FunctionNode.h>
 #include <Analyzer/ConstantNode.h>
+#include <Analyzer/JoinNode.h>
 #include <Analyzer/HashUtils.h>
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<LogicalExpressionOptimizerVisitor>
 {
 public:
@@ -21,6 +27,15 @@ public:
 
     void enterImpl(QueryTreeNodePtr & node)
     {
+        if (auto * join_node = node->as<JoinNode>())
+        {
+            join_stack.push_back(join_node);
+            return;
+        }
+
+        if (!join_stack.empty() && join_stack.back()->getJoinExpression().get() == node.get())
+            is_inside_on_section = true;
+
         auto * function_node = node->as<FunctionNode>();
 
         if (!function_node)
@@ -29,6 +44,10 @@ public:
         if (function_node->getFunctionName() == "or")
         {
             tryReplaceOrEqualsChainWithIn(node);
+
+            /// Operator <=> is not supported outside of JOIN ON section
+            if (is_inside_on_section)
+                tryOptimizeIsNotDistinctOrIsNull(node);
             return;
         }
 
@@ -38,6 +57,20 @@ public:
             return;
         }
     }
+
+    void leaveImpl(QueryTreeNodePtr & node)
+    {
+        if (!join_stack.empty() && join_stack.back()->getJoinExpression().get() == node.get())
+            is_inside_on_section = false;
+
+        if (auto * join_node = node->as<JoinNode>())
+        {
+            assert(join_stack.back() == join_node);
+            join_stack.pop_back();
+            return;
+        }
+    }
+
 private:
     void tryReplaceAndEqualsChainsWithConstant(QueryTreeNodePtr & node)
     {
@@ -231,6 +264,160 @@ private:
         function_node.getArguments().getNodes() = std::move(or_operands);
         function_node.resolveAsFunction(or_function_resolver);
     }
+
+    void tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node)
+    {
+        auto & function_node = node->as<FunctionNode &>();
+        assert(function_node.getFunctionName() == "or");
+
+        QueryTreeNodes or_operands;
+
+        /// Indices of `equals` or `isNotDistinctFrom` functions in the vector above
+        std::vector<size_t> equals_functions_indices;
+
+        /** Map from `isNull` argument to indices of operands that contains that `isNull` functions
+          * `a = b OR (a IS NULL AND b IS NULL) OR (a IS NULL AND c IS NULL)`
+          * will be mapped to
+          * {
+          *     a => [(a IS NULL AND b IS NULL), (a IS NULL AND c IS NULL)]
+          *     b => [(a IS NULL AND b IS NULL)]
+          *     c => [(a IS NULL AND c IS NULL)]
+          * }
+          * Then for each a <=> b we can find all operands that contains both a IS NULL and b IS NULL
+          */
+        QueryTreeNodePtrWithHashMap<std::vector<size_t>> is_null_argument_to_indices;
+
+        for (const auto & argument : function_node.getArguments())
+        {
+            or_operands.push_back(argument);
+
+            auto * argument_function = argument->as<FunctionNode>();
+            if (!argument_function)
+                continue;
+
+            const auto & func_name = argument_function->getFunctionName();
+            if (func_name == "equals" || func_name == "isNotDistinctFrom")
+                equals_functions_indices.push_back(or_operands.size() - 1);
+
+            if (func_name == "and")
+            {
+                for (const auto & and_argument : argument_function->getArguments().getNodes())
+                {
+                    auto * and_argument_function = and_argument->as<FunctionNode>();
+                    if (and_argument_function && and_argument_function->getFunctionName() == "isNull")
+                    {
+                        const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0];
+                        is_null_argument_to_indices[is_null_argument].push_back(or_operands.size() - 1);
+                    }
+                }
+            }
+        }
+
+        /// OR operands that are changed to and needs to be re-resolved
+        std::unordered_set<size_t> arguments_to_reresolve;
+
+        for (size_t equals_function_idx : equals_functions_indices)
+        {
+            auto * equals_function = or_operands[equals_function_idx]->as<FunctionNode>();
+
+            /// For a <=> b we are looking for expressions containing both `a IS NULL` and `b IS NULL` combined with AND
+            const auto & argument_nodes = equals_function->getArguments().getNodes();
+            const auto & lhs_is_null_parents = is_null_argument_to_indices[argument_nodes[0]];
+            const auto & rhs_is_null_parents = is_null_argument_to_indices[argument_nodes[1]];
+            std::unordered_set<size_t> operands_to_optimize;
+            std::set_intersection(lhs_is_null_parents.begin(), lhs_is_null_parents.end(),
+                                  rhs_is_null_parents.begin(), rhs_is_null_parents.end(),
+                                  std::inserter(operands_to_optimize, operands_to_optimize.begin()));
+
+            /// If we have `a = b OR (a IS NULL AND b IS NULL)` we can optimize it to `a <=> b`
+            if (!operands_to_optimize.empty() && equals_function->getFunctionName() == "equals")
+                arguments_to_reresolve.insert(equals_function_idx);
+
+            for (size_t to_optimize_idx : operands_to_optimize)
+            {
+                /// We are looking for operand `a IS NULL AND b IS NULL AND ...`
+                auto * operand_to_optimize = or_operands[to_optimize_idx]->as<FunctionNode>();
+
+                /// Remove `a IS NULL` and `b IS NULL` arguments from AND
+                QueryTreeNodes new_arguments;
+                for (const auto & and_argument : operand_to_optimize->getArguments().getNodes())
+                {
+                    bool to_eliminate = false;
+
+                    const auto * and_argument_function = and_argument->as<FunctionNode>();
+                    if (and_argument_function && and_argument_function->getFunctionName() == "isNull")
+                    {
+                        const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0];
+                        to_eliminate = (is_null_argument->isEqual(*argument_nodes[0]) || is_null_argument->isEqual(*argument_nodes[1]));
+                    }
+
+                    if (to_eliminate)
+                        arguments_to_reresolve.insert(to_optimize_idx);
+                    else
+                        new_arguments.emplace_back(and_argument);
+                }
+                /// If less than two arguments left, we will remove or replace the whole AND below
+                operand_to_optimize->getArguments().getNodes() = std::move(new_arguments);
+            }
+        }
+
+
+        if (arguments_to_reresolve.empty())
+            /// Nothing have been changed
+            return;
+
+        auto and_function_resolver = FunctionFactory::instance().get("and", getContext());
+        auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", getContext());
+        QueryTreeNodes new_or_operands;
+        for (size_t i = 0; i < or_operands.size(); ++i)
+        {
+            if (arguments_to_reresolve.contains(i))
+            {
+                auto * function = or_operands[i]->as<FunctionNode>();
+                if (function->getFunctionName() == "equals")
+                {
+                    /// Because we removed checks for IS NULL, we should replace `a = b` with `a <=> b`
+                    function->resolveAsFunction(strict_equals_function_resolver);
+                    new_or_operands.emplace_back(std::move(or_operands[i]));
+                }
+                else if (function->getFunctionName() == "and")
+                {
+                    const auto & and_arguments = function->getArguments().getNodes();
+                    if (and_arguments.size() > 1)
+                    {
+                        function->resolveAsFunction(and_function_resolver);
+                        new_or_operands.emplace_back(std::move(or_operands[i]));
+                    }
+                    else if (and_arguments.size() == 1)
+                    {
+                        /// Replace AND with a single argument with the argument itself
+                        new_or_operands.emplace_back(std::move(and_arguments[0]));
+                    }
+                }
+                else
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name: '{}'", function->getFunctionName());
+            }
+            else
+            {
+                new_or_operands.emplace_back(std::move(or_operands[i]));
+            }
+        }
+
+        if (new_or_operands.size() == 1)
+        {
+            node = std::move(new_or_operands[0]);
+            return;
+        }
+
+        /// Rebuild OR function
+        auto or_function_resolver = FunctionFactory::instance().get("or", getContext());
+        function_node.getArguments().getNodes() = std::move(new_or_operands);
+        function_node.resolveAsFunction(or_function_resolver);
+    }
+
+private:
+    bool is_inside_on_section = false;
+    std::deque<const JoinNode *> join_stack;
 };
 
 void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
index 05c10ddc685..80062f38eac 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
@@ -67,6 +67,17 @@ namespace DB
  * FROM TABLE
  * WHERE a = 1 AND b = 'test';
  * -------------------------------
+ *
+ * 5. Remove unnecessary IS NULL checks in JOIN ON clause
+ *   - equality check with explicit IS NULL check replaced with <=> operator
+ * -------------------------------
+ * SELECT * FROM t1 JOIN t2 ON a = b OR (a IS NULL AND b IS NULL)
+ * SELECT * FROM t1 JOIN t2 ON a <=> b OR (a IS NULL AND b IS NULL)
+ *
+ * will be transformed into
+ *
+ * SELECT * FROM t1 JOIN t2 ON a <=> b
+ * -------------------------------
  */
 
 class LogicalExpressionOptimizerPass final : public IQueryTreePass
diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference
new file mode 100644
index 00000000000..976c1503b02
--- /dev/null
+++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference
@@ -0,0 +1,25 @@
+-- { echoOn }
+SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) ORDER BY t1.x NULLS LAST;
+2	2	2	2
+3	3	3	33
+\N	\N	\N	\N
+SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST;
+1	42	4	42
+2	2	2	2
+3	3	3	33
+\N	\N	\N	\N
+SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST;
+2	2	2	2
+\N	\N	\N	\N
+SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST;
+1	42	4	42
+2	2	2	2
+3	3	3	33
+\N	\N	\N	\N
+SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) AND (t1.y == t2.y OR (t1.y IS NULL AND t2.y IS NULL)) AND COALESCE(t1.x, 0) != 2  ORDER BY t1.x NULLS LAST;
+\N	\N	\N	\N
+SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST;
+0
+1
+1
+1
diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql
new file mode 100644
index 00000000000..6a98a7bb57b
--- /dev/null
+++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql
@@ -0,0 +1,27 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+
+CREATE TABLE t1 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog;
+CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog;
+
+INSERT INTO t1 VALUES (1,42), (2,2), (3,3), (NULL,NULL);
+INSERT INTO t2 VALUES (NULL,NULL), (2,2), (3,33), (4,42);
+
+SET allow_experimental_analyzer = 1;
+
+-- { echoOn }
+SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) ORDER BY t1.x NULLS LAST;
+
+SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST;
+
+SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST;
+
+SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST;
+
+SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) AND (t1.y == t2.y OR (t1.y IS NULL AND t2.y IS NULL)) AND COALESCE(t1.x, 0) != 2  ORDER BY t1.x NULLS LAST;
+
+SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST;
+-- { echoOff }
+
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;

From 05163be79c3aa7b106c93f084103be6af403ae80 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 10 Nov 2023 15:50:21 +0000
Subject: [PATCH 0596/1097] fix clang tidy

---
 src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 372c760a20a..9602ef8a743 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -391,7 +391,7 @@ private:
                     else if (and_arguments.size() == 1)
                     {
                         /// Replace AND with a single argument with the argument itself
-                        new_or_operands.emplace_back(std::move(and_arguments[0]));
+                        new_or_operands.emplace_back(and_arguments[0]);
                     }
                 }
                 else
@@ -415,7 +415,6 @@ private:
         function_node.resolveAsFunction(or_function_resolver);
     }
 
-private:
     bool is_inside_on_section = false;
     std::deque<const JoinNode *> join_stack;
 };

From 9e3ba550701f68295774b7992e6261cd94da2167 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 15 Nov 2023 16:53:38 +0100
Subject: [PATCH 0597/1097] Fix incorrect nullable columns reporting in MySQL
 binary protocol

---
 .../MySQLJavaClientTest.java                  | 106 +++++++-------
 src/Core/MySQL/MySQLUtils.cpp                 |  11 +-
 src/Core/MySQL/MySQLUtils.h                   |   5 -
 src/Core/MySQL/PacketsProtocolBinary.cpp      |  35 ++---
 src/Core/MySQL/PacketsProtocolText.cpp        |   7 +-
 .../test_mysql_protocol/java_client.reference | 133 ++++++++++++++++++
 .../java_client_binary.reference              | 129 -----------------
 .../test_mysql_protocol/java_client_test.sql  |   3 +-
 .../java_client_text.reference                | 129 -----------------
 tests/integration/test_mysql_protocol/test.py |   4 +-
 10 files changed, 213 insertions(+), 349 deletions(-)
 create mode 100644 tests/integration/test_mysql_protocol/java_client.reference
 delete mode 100644 tests/integration/test_mysql_protocol/java_client_binary.reference
 delete mode 100644 tests/integration/test_mysql_protocol/java_client_text.reference

diff --git a/docker/test/integration/mysql_java_client/MySQLJavaClientTest.java b/docker/test/integration/mysql_java_client/MySQLJavaClientTest.java
index 1ac21ffe4b4..445e384ba1a 100644
--- a/docker/test/integration/mysql_java_client/MySQLJavaClientTest.java
+++ b/docker/test/integration/mysql_java_client/MySQLJavaClientTest.java
@@ -39,8 +39,7 @@ public class MySQLJavaClientTest {
 
         // useServerPrepStmts=true -> COM_STMT_PREPARE + COM_STMT_EXECUTE -> binary
         // useServerPrepStmts=false -> COM_QUERY -> text
-        String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?useSSL=false&useServerPrepStmts=%s",
-                host, port, database, binary);
+        String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?useSSL=false&useServerPrepStmts=%s", host, port, database, binary);
 
         try {
             Class.forName("com.mysql.cj.jdbc.Driver");
@@ -67,21 +66,21 @@ public class MySQLJavaClientTest {
         int rowNum = 1;
         while (rs.next()) {
             System.out.printf("Row #%d\n", rowNum++);
-            System.out.printf("%s, value: %d\n", getMysqlType(rs, "i8"), rs.getInt("i8"));
-            System.out.printf("%s, value: %d\n", getMysqlType(rs, "i16"), rs.getInt("i16"));
-            System.out.printf("%s, value: %d\n", getMysqlType(rs, "i32"), rs.getInt("i32"));
-            System.out.printf("%s, value: %d\n", getMysqlType(rs, "i64"), rs.getLong("i64"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "i128"), rs.getString("i128"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "i256"), rs.getString("i256"));
-            System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui8"), rs.getInt("ui8"));
-            System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui16"), rs.getInt("ui16"));
-            System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui32"), rs.getLong("ui32"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui64"), rs.getString("ui64"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui128"), rs.getString("ui128"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui256"), rs.getString("ui256"));
-            System.out.printf("%s, value: %f\n", getMysqlType(rs, "f32"), rs.getFloat("f32"));
-            System.out.printf("%s, value: %f\n", getMysqlType(rs, "f64"), rs.getFloat("f64"));
-            System.out.printf("%s, value: %b\n", getMysqlType(rs, "b"), rs.getBoolean("b"));
+            System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "i8"), rs.getInt("i8"), rs.wasNull());
+            System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "i16"), rs.getInt("i16"), rs.wasNull());
+            System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "i32"), rs.getInt("i32"), rs.wasNull());
+            System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "i64"), rs.getLong("i64"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "i128"), rs.getString("i128"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "i256"), rs.getString("i256"), rs.wasNull());
+            System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "ui8"), rs.getInt("ui8"), rs.wasNull());
+            System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "ui16"), rs.getInt("ui16"), rs.wasNull());
+            System.out.printf("%s, value: %d, wasNull: %b\n", getMysqlType(rs, "ui32"), rs.getLong("ui32"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ui64"), rs.getString("ui64"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ui128"), rs.getString("ui128"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ui256"), rs.getString("ui256"), rs.wasNull());
+            System.out.printf("%s, value: %f, wasNull: %b\n", getMysqlType(rs, "f32"), rs.getFloat("f32"), rs.wasNull());
+            System.out.printf("%s, value: %f, wasNull: %b\n", getMysqlType(rs, "f64"), rs.getFloat("f64"), rs.wasNull());
+            System.out.printf("%s, value: %b, wasNull: %b\n", getMysqlType(rs, "b"), rs.getBoolean("b"), rs.wasNull());
         }
         System.out.println();
     }
@@ -92,10 +91,10 @@ public class MySQLJavaClientTest {
         int rowNum = 1;
         while (rs.next()) {
             System.out.printf("Row #%d\n", rowNum++);
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "s"), rs.getString("s"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "sn"), rs.getString("sn"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "lc"), rs.getString("lc"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "nlc"), rs.getString("nlc"));
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "s"), rs.getString("s"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "sn"), rs.getString("sn"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "lc"), rs.getString("lc"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "nlc"), rs.getString("nlc"), rs.wasNull());
         }
         System.out.println();
     }
@@ -106,10 +105,10 @@ public class MySQLJavaClientTest {
         int rowNum = 1;
         while (rs.next()) {
             System.out.printf("Row #%d\n", rowNum++);
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ilc"), rs.getInt("ilc"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dlc"), rs.getDate("dlc"));
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ilc"), rs.getInt("ilc"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dlc"), rs.getDate("dlc"), rs.wasNull());
             // NULL int is represented as zero
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ni"), rs.getInt("ni"));
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "ni"), rs.getInt("ni"), rs.wasNull());
         }
         System.out.println();
     }
@@ -120,12 +119,11 @@ public class MySQLJavaClientTest {
         int rowNum = 1;
         while (rs.next()) {
             System.out.printf("Row #%d\n", rowNum++);
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getBigDecimal("d32").toPlainString());
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d64"), rs.getBigDecimal("d64").toPlainString());
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_native"),
-                    rs.getBigDecimal("d128_native").toPlainString());
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_text"), rs.getString("d128_text"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d256"), rs.getString("d256"));
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d32"), rs.getBigDecimal("d32").toPlainString(), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d64"), rs.getBigDecimal("d64").toPlainString(), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d128_native"), rs.getBigDecimal("d128_native").toPlainString(), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d128_text"), rs.getString("d128_text"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d256"), rs.getString("d256"), rs.wasNull());
         }
         System.out.println();
     }
@@ -136,12 +134,12 @@ public class MySQLJavaClientTest {
         int rowNum = 1;
         while (rs.next()) {
             System.out.printf("Row #%d\n", rowNum++);
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d"), rs.getDate("d"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getDate("d32"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_6"), rs.getTimestamp("dt64_6"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_9"), rs.getTimestamp("dt64_9"));
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d"), rs.getDate("d"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d32"), rs.getDate("d32"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_6"), rs.getTimestamp("dt64_6"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_9"), rs.getTimestamp("dt64_9"), rs.wasNull());
         }
         System.out.println();
     }
@@ -152,13 +150,13 @@ public class MySQLJavaClientTest {
         int rowNum = 1;
         while (rs.next()) {
             System.out.printf("Row #%d\n", rowNum++);
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_0"), rs.getTimestamp("dt64_0"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_1"), rs.getTimestamp("dt64_1"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_2"), rs.getTimestamp("dt64_2"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_4"), rs.getTimestamp("dt64_4"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_5"), rs.getTimestamp("dt64_5"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_7"), rs.getTimestamp("dt64_7"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_8"), rs.getTimestamp("dt64_8"));
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_0"), rs.getTimestamp("dt64_0"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_1"), rs.getTimestamp("dt64_1"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_2"), rs.getTimestamp("dt64_2"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_4"), rs.getTimestamp("dt64_4"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_5"), rs.getTimestamp("dt64_5"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_7"), rs.getTimestamp("dt64_7"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_8"), rs.getTimestamp("dt64_8"), rs.wasNull());
         }
         System.out.println();
     }
@@ -169,8 +167,8 @@ public class MySQLJavaClientTest {
         int rowNum = 1;
         while (rs.next()) {
             System.out.printf("Row #%d\n", rowNum++);
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"), rs.wasNull());
         }
         System.out.println();
     }
@@ -181,10 +179,10 @@ public class MySQLJavaClientTest {
         int rowNum = 1;
         while (rs.next()) {
             System.out.printf("Row #%d\n", rowNum++);
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "a"), rs.getString("a"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "u"), rs.getString("u"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "t"), rs.getString("t"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "m"), rs.getString("m"));
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "a"), rs.getString("a"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "u"), rs.getString("u"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "t"), rs.getString("t"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "m"), rs.getString("m"), rs.wasNull());
         }
         System.out.println();
     }
@@ -196,17 +194,15 @@ public class MySQLJavaClientTest {
         int rowNum = 1;
         while (rs.next()) {
             System.out.printf("Row #%d\n", rowNum++);
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "f"), rs.getFloat("f"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d"), rs.getDate("d"));
-            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "f"), rs.getFloat("f"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "d"), rs.getDate("d"), rs.wasNull());
+            System.out.printf("%s, value: %s, wasNull: %b\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"), rs.wasNull());
         }
         System.out.println();
     }
 
     private static String getMysqlType(ResultSet rs, String columnLabel) throws SQLException {
         ResultSetMetaData meta = rs.getMetaData();
-        return String.format("%s type is %s", columnLabel,
-                MysqlType.getByJdbcType(meta.getColumnType(rs.findColumn(columnLabel))));
+        return String.format("%s type is %s", columnLabel, MysqlType.getByJdbcType(meta.getColumnType(rs.findColumn(columnLabel))));
     }
-
 }
diff --git a/src/Core/MySQL/MySQLUtils.cpp b/src/Core/MySQL/MySQLUtils.cpp
index 8dedbdaa630..7ba6c32fd0d 100644
--- a/src/Core/MySQL/MySQLUtils.cpp
+++ b/src/Core/MySQL/MySQLUtils.cpp
@@ -12,8 +12,7 @@ namespace MySQLProtocol
 namespace MySQLUtils
 {
 
-DecimalUtils::DecimalComponents<DateTime64>
-getNormalizedDateTime64Components(DataTypePtr data_type, ColumnPtr col, size_t row_num)
+DecimalUtils::DecimalComponents<DateTime64> getNormalizedDateTime64Components(DataTypePtr data_type, ColumnPtr col, size_t row_num)
 {
     const auto * date_time_type = typeid_cast<const DataTypeDateTime64 *>(data_type.get());
 
@@ -52,14 +51,6 @@ getNormalizedDateTime64Components(DataTypePtr data_type, ColumnPtr col, size_t r
 
     return components;
 };
-
-ColumnPtr getBaseColumn(const DB::Columns & columns, size_t i)
-{
-    ColumnPtr col = columns[i]->convertToFullIfNeeded();
-    if (col->isNullable())
-        return assert_cast<const ColumnNullable &>(*col).getNestedColumnPtr();
-    return col;
-};
 }
 }
 }
diff --git a/src/Core/MySQL/MySQLUtils.h b/src/Core/MySQL/MySQLUtils.h
index 48b78fcf4d7..e77e9c22ee4 100644
--- a/src/Core/MySQL/MySQLUtils.h
+++ b/src/Core/MySQL/MySQLUtils.h
@@ -2,7 +2,6 @@
 
 #include "Core/DecimalFunctions.h"
 #include "DataTypes/IDataType.h"
-#include "base/types.h"
 
 namespace DB
 {
@@ -13,10 +12,6 @@ namespace MySQLUtils
 /// Splits DateTime64 column data at a certain row number into whole and fractional part
 /// Additionally, normalizes the fractional part as if it was scale 6 for MySQL compatibility purposes
 DecimalUtils::DecimalComponents<DateTime64> getNormalizedDateTime64Components(DataTypePtr data_type, ColumnPtr col, size_t row_num);
-
-/// If a column is ColumnSparse/ColumnLowCardinality/ColumnNullable, it is unwrapped in a correct order;
-/// otherwise, the original column is returned
-ColumnPtr getBaseColumn(const DB::Columns & columns, size_t i);
 }
 }
 }
diff --git a/src/Core/MySQL/PacketsProtocolBinary.cpp b/src/Core/MySQL/PacketsProtocolBinary.cpp
index 7f5e3900cb0..2fe6b76d05f 100644
--- a/src/Core/MySQL/PacketsProtocolBinary.cpp
+++ b/src/Core/MySQL/PacketsProtocolBinary.cpp
@@ -4,20 +4,15 @@
 #include <Core/MySQL/PacketsProtocolBinary.h>
 #include "Common/LocalDate.h"
 #include "Common/LocalDateTime.h"
-#include "Columns/ColumnLowCardinality.h"
 #include "Columns/ColumnNullable.h"
 #include "Columns/ColumnVector.h"
-#include "Columns/ColumnsDateTime.h"
 #include "Core/DecimalFunctions.h"
 #include "DataTypes/DataTypeDateTime64.h"
 #include "DataTypes/DataTypeLowCardinality.h"
-#include "DataTypes/DataTypeNullable.h"
-#include "DataTypes/DataTypesNumber.h"
 #include "Formats/FormatSettings.h"
 #include "IO/WriteBufferFromString.h"
 #include "MySQLUtils.h"
 #include "base/DayNum.h"
-#include "base/Decimal.h"
 #include "base/types.h"
 
 namespace DB
@@ -30,17 +25,21 @@ ResultSetRow::ResultSetRow(const Serializations & serializations_, const DataTyp
     : row_num(row_num_), columns(columns_), data_types(data_types_), serializations(serializations_)
 {
     payload_size = 1 + null_bitmap_size;
-    FormatSettings format_settings;
+    static FormatSettings format_settings;
     for (size_t i = 0; i < columns.size(); ++i)
     {
-        ColumnPtr col = MySQLUtils::getBaseColumn(columns, i);
-        if (col->isNullAt(row_num))
+        ColumnPtr col = columns[i]->convertToFullIfNeeded();
+        if (col->isNullable())
         {
-            // See https://dev.mysql.com/doc/dev/mysql-server/8.1.0/page_protocol_binary_resultset.html#sect_protocol_binary_resultset_row
-            size_t byte = (i + 2) / 8;
-            int bit = 1 << ((i + 2) % 8);
-            null_bitmap[byte] |= bit;
-            continue; // NULLs are stored in the null bitmap only
+            if (columns[i]->isNullAt(row_num))
+            {
+                // See https://dev.mysql.com/doc/dev/mysql-server/8.1.0/page_protocol_binary_resultset.html#sect_protocol_binary_resultset_row
+                size_t byte = (i + 2) / 8;
+                int bit = 1 << ((i + 2) % 8);
+                null_bitmap[byte] |= bit;
+                continue; // NULLs are stored in the null bitmap only
+            }
+            col = assert_cast<const ColumnNullable &>(*col).getNestedColumnPtr();
         }
 
         DataTypePtr data_type = removeLowCardinalityAndNullable(data_types[i]);
@@ -145,9 +144,13 @@ void ResultSetRow::writePayloadImpl(WriteBuffer & buffer) const
     buffer.write(null_bitmap.data(), null_bitmap_size);
     for (size_t i = 0; i < columns.size(); ++i)
     {
-        ColumnPtr col = MySQLUtils::getBaseColumn(columns, i);
-        if (col->isNullAt(row_num))
-            continue;
+        ColumnPtr col = columns[i]->convertToFullIfNeeded();
+        if (col->isNullable())
+        {
+            if (columns[i]->isNullAt(row_num))
+                continue;
+            col = assert_cast<const ColumnNullable &>(*col).getNestedColumnPtr();
+        }
 
         DataTypePtr data_type = removeLowCardinalityAndNullable(data_types[i]);
         TypeIndex type_index = data_type->getTypeId();
diff --git a/src/Core/MySQL/PacketsProtocolText.cpp b/src/Core/MySQL/PacketsProtocolText.cpp
index 744227f27dc..82d6e27d98a 100644
--- a/src/Core/MySQL/PacketsProtocolText.cpp
+++ b/src/Core/MySQL/PacketsProtocolText.cpp
@@ -1,3 +1,4 @@
+#include <Columns/ColumnNullable.h>
 #include <Core/MySQL/PacketsProtocolText.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
@@ -5,8 +6,8 @@
 #include "Common/assert_cast.h"
 #include "Core/MySQL/IMySQLWritePacket.h"
 #include "DataTypes/DataTypeLowCardinality.h"
-#include "DataTypes/DataTypeNullable.h"
 #include "DataTypes/DataTypesDecimal.h"
+
 #include "MySQLUtils.h"
 
 namespace DB
@@ -36,7 +37,9 @@ ResultSetRow::ResultSetRow(const Serializations & serializations, const DataType
         else if (type_index == TypeIndex::DateTime64)
         {
             WriteBufferFromOwnString ostr;
-            ColumnPtr col = MySQLUtils::getBaseColumn(columns, i);
+            ColumnPtr col = columns[i]->convertToFullIfNeeded();
+            if (col->isNullable())
+                col = assert_cast<const ColumnNullable &>(*col).getNestedColumnPtr();
             auto components = MySQLUtils::getNormalizedDateTime64Components(data_type, col, row_num);
             writeDateTimeText<'-', ':', ' '>(LocalDateTime(components.whole, DateLUT::instance(getDateTimeTimezone(*data_type))), ostr);
             ostr.write('.');
diff --git a/tests/integration/test_mysql_protocol/java_client.reference b/tests/integration/test_mysql_protocol/java_client.reference
new file mode 100644
index 00000000000..0e6ab5b5e79
--- /dev/null
+++ b/tests/integration/test_mysql_protocol/java_client.reference
@@ -0,0 +1,133 @@
+### testSimpleDataTypes
+Row #1
+i8 type is TINYINT, value: -128, wasNull: false
+i16 type is SMALLINT, value: -32768, wasNull: false
+i32 type is INT, value: -2147483648, wasNull: false
+i64 type is BIGINT, value: -9223372036854775808, wasNull: false
+i128 type is CHAR, value: -170141183460469231731687303715884105728, wasNull: false
+i256 type is CHAR, value: -57896044618658097711785492504343953926634992332820282019728792003956564819968, wasNull: false
+ui8 type is TINYINT, value: 120, wasNull: false
+ui16 type is SMALLINT, value: 1234, wasNull: false
+ui32 type is INT, value: 51234, wasNull: false
+ui64 type is BIGINT, value: 421342, wasNull: false
+ui128 type is CHAR, value: 15324355, wasNull: false
+ui256 type is CHAR, value: 41345135123432, wasNull: false
+f32 type is FLOAT, value: -0.796896, wasNull: false
+f64 type is DOUBLE, value: -0.113259, wasNull: false
+b type is TINYINT, value: true, wasNull: false
+Row #2
+i8 type is TINYINT, value: 127, wasNull: false
+i16 type is SMALLINT, value: 32767, wasNull: false
+i32 type is INT, value: 2147483647, wasNull: false
+i64 type is BIGINT, value: 9223372036854775807, wasNull: false
+i128 type is CHAR, value: 170141183460469231731687303715884105727, wasNull: false
+i256 type is CHAR, value: 57896044618658097711785492504343953926634992332820282019728792003956564819967, wasNull: false
+ui8 type is TINYINT, value: 255, wasNull: false
+ui16 type is SMALLINT, value: 65535, wasNull: false
+ui32 type is INT, value: 4294967295, wasNull: false
+ui64 type is BIGINT, value: 18446744073709551615, wasNull: false
+ui128 type is CHAR, value: 340282366920938463463374607431768211455, wasNull: false
+ui256 type is CHAR, value: 115792089237316195423570985008687907853269984665640564039457584007913129639935, wasNull: false
+f32 type is FLOAT, value: 1.234000, wasNull: false
+f64 type is DOUBLE, value: 3.352451, wasNull: false
+b type is TINYINT, value: false, wasNull: false
+
+### testStringTypes
+Row #1
+s type is CHAR, value: 42, wasNull: false
+sn type is CHAR, value: null, wasNull: true
+lc type is CHAR, value: test, wasNull: false
+nlc type is CHAR, value: null, wasNull: true
+Row #2
+s type is CHAR, value: foo, wasNull: false
+sn type is CHAR, value: bar, wasNull: false
+lc type is CHAR, value: qaz, wasNull: false
+nlc type is CHAR, value: qux, wasNull: false
+
+### testLowCardinalityAndNullableTypes
+Row #1
+ilc type is INT, value: -54, wasNull: false
+dlc type is DATE, value: 1970-01-01, wasNull: false
+ni type is INT, value: 144, wasNull: false
+Row #2
+ilc type is INT, value: 42, wasNull: false
+dlc type is DATE, value: 2011-02-05, wasNull: false
+ni type is INT, value: 0, wasNull: true
+
+### testDecimalTypes
+Row #1
+d32 type is DECIMAL, value: -1.55, wasNull: false
+d64 type is DECIMAL, value: 6.03, wasNull: false
+d128_native type is DECIMAL, value: 5, wasNull: false
+d128_text type is CHAR, value: -1224124.23423, wasNull: false
+d256 type is CHAR, value: -54342.3, wasNull: false
+Row #2
+d32 type is DECIMAL, value: 1234567.89, wasNull: false
+d64 type is DECIMAL, value: 123456789123456.789, wasNull: false
+d128_native type is DECIMAL, value: 12345678912345678912.1234567891, wasNull: false
+d128_text type is CHAR, value: 1234567.8912345678912345678911234567891, wasNull: false
+d256 type is CHAR, value: 12345678912345678912345678911234567891234567891234567891.12345678911234567891, wasNull: false
+
+### testMiscTypes
+Row #1
+a type is CHAR, value: ['foo','bar'], wasNull: false
+u type is CHAR, value: 5da5038d-788f-48c6-b510-babb41c538d3, wasNull: false
+t type is CHAR, value: (42,'qaz'), wasNull: false
+m type is CHAR, value: {'qux':144,'text':255}, wasNull: false
+
+### testDateTypes
+Row #1
+d type is DATE, value: 1970-01-01, wasNull: false
+d32 type is DATE, value: 1900-01-01, wasNull: false
+dt type is TIMESTAMP, value: 1970-01-01 00:00:00.0, wasNull: false
+dt64_3 type is TIMESTAMP, value: 1900-01-01 00:00:00.001, wasNull: false
+dt64_6 type is TIMESTAMP, value: 1900-01-01 00:00:00.000001, wasNull: false
+dt64_9 type is TIMESTAMP, value: 1900-01-01 00:00:00.0, wasNull: false
+Row #2
+d type is DATE, value: 2149-06-06, wasNull: false
+d32 type is DATE, value: 2178-04-16, wasNull: false
+dt type is TIMESTAMP, value: 2106-02-07 06:28:15.0, wasNull: false
+dt64_3 type is TIMESTAMP, value: 2106-02-07 06:28:15.123, wasNull: false
+dt64_6 type is TIMESTAMP, value: 2106-02-07 06:28:15.123456, wasNull: false
+dt64_9 type is TIMESTAMP, value: 2106-02-07 06:28:15.123456, wasNull: false
+
+### testUnusualDateTime64Scales
+Row #1
+dt64_0 type is TIMESTAMP, value: 2022-04-13 03:17:45.0, wasNull: false
+dt64_1 type is TIMESTAMP, value: 2022-04-13 03:17:45.1, wasNull: false
+dt64_2 type is TIMESTAMP, value: 2022-04-13 03:17:45.12, wasNull: false
+dt64_4 type is TIMESTAMP, value: 2022-04-13 03:17:45.1234, wasNull: false
+dt64_5 type is TIMESTAMP, value: 2022-04-13 03:17:45.12345, wasNull: false
+dt64_7 type is TIMESTAMP, value: 2022-04-13 03:17:45.123456, wasNull: false
+dt64_8 type is TIMESTAMP, value: 2022-04-13 03:17:45.123456, wasNull: false
+Row #2
+dt64_0 type is TIMESTAMP, value: 2022-04-13 03:17:45.0, wasNull: false
+dt64_1 type is TIMESTAMP, value: 2022-04-13 03:17:45.1, wasNull: false
+dt64_2 type is TIMESTAMP, value: 2022-04-13 03:17:45.01, wasNull: false
+dt64_4 type is TIMESTAMP, value: 2022-04-13 03:17:45.0001, wasNull: false
+dt64_5 type is TIMESTAMP, value: 2022-04-13 03:17:45.00001, wasNull: false
+dt64_7 type is TIMESTAMP, value: 2022-04-13 03:17:45.0, wasNull: false
+dt64_8 type is TIMESTAMP, value: 2022-04-13 03:17:45.0, wasNull: false
+
+### testDateTimeTimezones
+Row #1
+dt type is TIMESTAMP, value: 1970-01-01 01:00:00.0, wasNull: false
+dt64_3 type is TIMESTAMP, value: 1969-12-31 16:00:00.0, wasNull: false
+Row #2
+dt type is TIMESTAMP, value: 2022-09-04 20:31:05.0, wasNull: false
+dt64_3 type is TIMESTAMP, value: 2022-09-04 20:31:05.022, wasNull: false
+
+### testSuspiciousNullableLowCardinalityTypes
+Row #1
+f type is FLOAT, value: 1.0, wasNull: false
+d type is DATE, value: 2022-04-15, wasNull: false
+dt type is TIMESTAMP, value: 2021-06-04 13:55:11.0, wasNull: false
+Row #2
+f type is FLOAT, value: 3.14, wasNull: false
+d type is DATE, value: 1970-01-01, wasNull: false
+dt type is TIMESTAMP, value: 1970-01-01 00:00:00.0, wasNull: false
+Row #3
+f type is FLOAT, value: 0.0, wasNull: true
+d type is DATE, value: null, wasNull: true
+dt type is TIMESTAMP, value: null, wasNull: true
+
diff --git a/tests/integration/test_mysql_protocol/java_client_binary.reference b/tests/integration/test_mysql_protocol/java_client_binary.reference
deleted file mode 100644
index 763b7577487..00000000000
--- a/tests/integration/test_mysql_protocol/java_client_binary.reference
+++ /dev/null
@@ -1,129 +0,0 @@
-### testSimpleDataTypes
-Row #1
-i8 type is TINYINT, value: -128
-i16 type is SMALLINT, value: -32768
-i32 type is INT, value: -2147483648
-i64 type is BIGINT, value: -9223372036854775808
-i128 type is CHAR, value: -170141183460469231731687303715884105728
-i256 type is CHAR, value: -57896044618658097711785492504343953926634992332820282019728792003956564819968
-ui8 type is TINYINT, value: 120
-ui16 type is SMALLINT, value: 1234
-ui32 type is INT, value: 51234
-ui64 type is BIGINT, value: 421342
-ui128 type is CHAR, value: 15324355
-ui256 type is CHAR, value: 41345135123432
-f32 type is FLOAT, value: -0.796896
-f64 type is DOUBLE, value: -0.113259
-b type is TINYINT, value: true
-Row #2
-i8 type is TINYINT, value: 127
-i16 type is SMALLINT, value: 32767
-i32 type is INT, value: 2147483647
-i64 type is BIGINT, value: 9223372036854775807
-i128 type is CHAR, value: 170141183460469231731687303715884105727
-i256 type is CHAR, value: 57896044618658097711785492504343953926634992332820282019728792003956564819967
-ui8 type is TINYINT, value: 255
-ui16 type is SMALLINT, value: 65535
-ui32 type is INT, value: 4294967295
-ui64 type is BIGINT, value: 18446744073709551615
-ui128 type is CHAR, value: 340282366920938463463374607431768211455
-ui256 type is CHAR, value: 115792089237316195423570985008687907853269984665640564039457584007913129639935
-f32 type is FLOAT, value: 1.234000
-f64 type is DOUBLE, value: 3.352451
-b type is TINYINT, value: false
-
-### testStringTypes
-Row #1
-s type is CHAR, value: 42
-sn type is CHAR, value: ᴺᵁᴸᴸ
-lc type is CHAR, value: test
-nlc type is CHAR, value: ᴺᵁᴸᴸ
-Row #2
-s type is CHAR, value: foo
-sn type is CHAR, value: bar
-lc type is CHAR, value: qaz
-nlc type is CHAR, value: qux
-
-### testLowCardinalityAndNullableTypes
-Row #1
-ilc type is INT, value: -54
-dlc type is DATE, value: 1970-01-01
-ni type is INT, value: 144
-Row #2
-ilc type is INT, value: 42
-dlc type is DATE, value: 2011-02-05
-ni type is INT, value: 0
-
-### testDecimalTypes
-Row #1
-d32 type is DECIMAL, value: -1.55
-d64 type is DECIMAL, value: 6.03
-d128_native type is DECIMAL, value: 5
-d128_text type is CHAR, value: -1224124.23423
-d256 type is CHAR, value: -54342.3
-Row #2
-d32 type is DECIMAL, value: 1234567.89
-d64 type is DECIMAL, value: 123456789123456.789
-d128_native type is DECIMAL, value: 12345678912345678912.1234567891
-d128_text type is CHAR, value: 1234567.8912345678912345678911234567891
-d256 type is CHAR, value: 12345678912345678912345678911234567891234567891234567891.12345678911234567891
-
-### testMiscTypes
-Row #1
-a type is CHAR, value: ['foo','bar']
-u type is CHAR, value: 5da5038d-788f-48c6-b510-babb41c538d3
-t type is CHAR, value: (42,'qaz')
-m type is CHAR, value: {'qux':144,'text':255}
-
-### testDateTypes
-Row #1
-d type is DATE, value: 1970-01-01
-d32 type is DATE, value: 1900-01-01
-dt type is TIMESTAMP, value: 1970-01-01 00:00:00.0
-dt64_3 type is TIMESTAMP, value: 1900-01-01 00:00:00.001
-dt64_6 type is TIMESTAMP, value: 1900-01-01 00:00:00.000001
-dt64_9 type is TIMESTAMP, value: 1900-01-01 00:00:00.0
-Row #2
-d type is DATE, value: 2149-06-06
-d32 type is DATE, value: 2178-04-16
-dt type is TIMESTAMP, value: 2106-02-07 06:28:15.0
-dt64_3 type is TIMESTAMP, value: 2106-02-07 06:28:15.123
-dt64_6 type is TIMESTAMP, value: 2106-02-07 06:28:15.123456
-dt64_9 type is TIMESTAMP, value: 2106-02-07 06:28:15.123456
-
-### testUnusualDateTime64Scales
-Row #1
-dt64_0 type is TIMESTAMP, value: 2022-04-13 03:17:45.0
-dt64_1 type is TIMESTAMP, value: 2022-04-13 03:17:45.1
-dt64_2 type is TIMESTAMP, value: 2022-04-13 03:17:45.12
-dt64_4 type is TIMESTAMP, value: 2022-04-13 03:17:45.1234
-dt64_5 type is TIMESTAMP, value: 2022-04-13 03:17:45.12345
-dt64_7 type is TIMESTAMP, value: 2022-04-13 03:17:45.123456
-dt64_8 type is TIMESTAMP, value: 2022-04-13 03:17:45.123456
-Row #2
-dt64_0 type is TIMESTAMP, value: 2022-04-13 03:17:45.0
-dt64_1 type is TIMESTAMP, value: 2022-04-13 03:17:45.1
-dt64_2 type is TIMESTAMP, value: 2022-04-13 03:17:45.01
-dt64_4 type is TIMESTAMP, value: 2022-04-13 03:17:45.0001
-dt64_5 type is TIMESTAMP, value: 2022-04-13 03:17:45.00001
-dt64_7 type is TIMESTAMP, value: 2022-04-13 03:17:45.0
-dt64_8 type is TIMESTAMP, value: 2022-04-13 03:17:45.0
-
-### testDateTimeTimezones
-Row #1
-dt type is TIMESTAMP, value: 1970-01-01 01:00:00.0
-dt64_3 type is TIMESTAMP, value: 1969-12-31 16:00:00.0
-Row #2
-dt type is TIMESTAMP, value: 2022-09-04 20:31:05.0
-dt64_3 type is TIMESTAMP, value: 2022-09-04 20:31:05.022
-
-### testSuspiciousNullableLowCardinalityTypes
-Row #1
-f type is FLOAT, value: 1.0
-d type is DATE, value: 2022-04-15
-dt type is TIMESTAMP, value: 2021-06-04 13:55:11.0
-Row #2
-f type is FLOAT, value: 3.14
-d type is DATE, value: 1970-01-01
-dt type is TIMESTAMP, value: 1970-01-01 00:00:00.0
-
diff --git a/tests/integration/test_mysql_protocol/java_client_test.sql b/tests/integration/test_mysql_protocol/java_client_test.sql
index 1bcb29b8bee..311c315bae3 100644
--- a/tests/integration/test_mysql_protocol/java_client_test.sql
+++ b/tests/integration/test_mysql_protocol/java_client_test.sql
@@ -145,4 +145,5 @@ CREATE OR REPLACE TABLE suspicious_nullable_low_cardinality_types
 
 INSERT INTO suspicious_nullable_low_cardinality_types
 VALUES (1,    '2022-04-15', '2021-06-04 13:55:11'),
-       (3.14, '1970-01-01', '1970-01-01 00:00:00');
+       (3.14, '1970-01-01', '1970-01-01 00:00:00'),
+       (NULL, NULL, NULL);
diff --git a/tests/integration/test_mysql_protocol/java_client_text.reference b/tests/integration/test_mysql_protocol/java_client_text.reference
deleted file mode 100644
index f0dc390bea6..00000000000
--- a/tests/integration/test_mysql_protocol/java_client_text.reference
+++ /dev/null
@@ -1,129 +0,0 @@
-### testSimpleDataTypes
-Row #1
-i8 type is TINYINT, value: -128
-i16 type is SMALLINT, value: -32768
-i32 type is INT, value: -2147483648
-i64 type is BIGINT, value: -9223372036854775808
-i128 type is CHAR, value: -170141183460469231731687303715884105728
-i256 type is CHAR, value: -57896044618658097711785492504343953926634992332820282019728792003956564819968
-ui8 type is TINYINT, value: 120
-ui16 type is SMALLINT, value: 1234
-ui32 type is INT, value: 51234
-ui64 type is BIGINT, value: 421342
-ui128 type is CHAR, value: 15324355
-ui256 type is CHAR, value: 41345135123432
-f32 type is FLOAT, value: -0.796896
-f64 type is DOUBLE, value: -0.113259
-b type is TINYINT, value: true
-Row #2
-i8 type is TINYINT, value: 127
-i16 type is SMALLINT, value: 32767
-i32 type is INT, value: 2147483647
-i64 type is BIGINT, value: 9223372036854775807
-i128 type is CHAR, value: 170141183460469231731687303715884105727
-i256 type is CHAR, value: 57896044618658097711785492504343953926634992332820282019728792003956564819967
-ui8 type is TINYINT, value: 255
-ui16 type is SMALLINT, value: 65535
-ui32 type is INT, value: 4294967295
-ui64 type is BIGINT, value: 18446744073709551615
-ui128 type is CHAR, value: 340282366920938463463374607431768211455
-ui256 type is CHAR, value: 115792089237316195423570985008687907853269984665640564039457584007913129639935
-f32 type is FLOAT, value: 1.234000
-f64 type is DOUBLE, value: 3.352451
-b type is TINYINT, value: false
-
-### testStringTypes
-Row #1
-s type is CHAR, value: 42
-sn type is CHAR, value: null
-lc type is CHAR, value: test
-nlc type is CHAR, value: null
-Row #2
-s type is CHAR, value: foo
-sn type is CHAR, value: bar
-lc type is CHAR, value: qaz
-nlc type is CHAR, value: qux
-
-### testLowCardinalityAndNullableTypes
-Row #1
-ilc type is INT, value: -54
-dlc type is DATE, value: 1970-01-01
-ni type is INT, value: 144
-Row #2
-ilc type is INT, value: 42
-dlc type is DATE, value: 2011-02-05
-ni type is INT, value: 0
-
-### testDecimalTypes
-Row #1
-d32 type is DECIMAL, value: -1.55
-d64 type is DECIMAL, value: 6.03
-d128_native type is DECIMAL, value: 5
-d128_text type is CHAR, value: -1224124.23423
-d256 type is CHAR, value: -54342.3
-Row #2
-d32 type is DECIMAL, value: 1234567.89
-d64 type is DECIMAL, value: 123456789123456.789
-d128_native type is DECIMAL, value: 12345678912345678912.1234567891
-d128_text type is CHAR, value: 1234567.8912345678912345678911234567891
-d256 type is CHAR, value: 12345678912345678912345678911234567891234567891234567891.12345678911234567891
-
-### testMiscTypes
-Row #1
-a type is CHAR, value: ['foo','bar']
-u type is CHAR, value: 5da5038d-788f-48c6-b510-babb41c538d3
-t type is CHAR, value: (42,'qaz')
-m type is CHAR, value: {'qux':144,'text':255}
-
-### testDateTypes
-Row #1
-d type is DATE, value: 1970-01-01
-d32 type is DATE, value: 1900-01-01
-dt type is TIMESTAMP, value: 1970-01-01 00:00:00.0
-dt64_3 type is TIMESTAMP, value: 1900-01-01 00:00:00.001
-dt64_6 type is TIMESTAMP, value: 1900-01-01 00:00:00.000001
-dt64_9 type is TIMESTAMP, value: 1900-01-01 00:00:00.0
-Row #2
-d type is DATE, value: 2149-06-06
-d32 type is DATE, value: 2178-04-16
-dt type is TIMESTAMP, value: 2106-02-07 06:28:15.0
-dt64_3 type is TIMESTAMP, value: 2106-02-07 06:28:15.123
-dt64_6 type is TIMESTAMP, value: 2106-02-07 06:28:15.123456
-dt64_9 type is TIMESTAMP, value: 2106-02-07 06:28:15.123456
-
-### testUnusualDateTime64Scales
-Row #1
-dt64_0 type is TIMESTAMP, value: 2022-04-13 03:17:45.0
-dt64_1 type is TIMESTAMP, value: 2022-04-13 03:17:45.1
-dt64_2 type is TIMESTAMP, value: 2022-04-13 03:17:45.12
-dt64_4 type is TIMESTAMP, value: 2022-04-13 03:17:45.1234
-dt64_5 type is TIMESTAMP, value: 2022-04-13 03:17:45.12345
-dt64_7 type is TIMESTAMP, value: 2022-04-13 03:17:45.123456
-dt64_8 type is TIMESTAMP, value: 2022-04-13 03:17:45.123456
-Row #2
-dt64_0 type is TIMESTAMP, value: 2022-04-13 03:17:45.0
-dt64_1 type is TIMESTAMP, value: 2022-04-13 03:17:45.1
-dt64_2 type is TIMESTAMP, value: 2022-04-13 03:17:45.01
-dt64_4 type is TIMESTAMP, value: 2022-04-13 03:17:45.0001
-dt64_5 type is TIMESTAMP, value: 2022-04-13 03:17:45.00001
-dt64_7 type is TIMESTAMP, value: 2022-04-13 03:17:45.0
-dt64_8 type is TIMESTAMP, value: 2022-04-13 03:17:45.0
-
-### testDateTimeTimezones
-Row #1
-dt type is TIMESTAMP, value: 1970-01-01 01:00:00.0
-dt64_3 type is TIMESTAMP, value: 1969-12-31 16:00:00.0
-Row #2
-dt type is TIMESTAMP, value: 2022-09-04 20:31:05.0
-dt64_3 type is TIMESTAMP, value: 2022-09-04 20:31:05.022
-
-### testSuspiciousNullableLowCardinalityTypes
-Row #1
-f type is FLOAT, value: 1.0
-d type is DATE, value: 2022-04-15
-dt type is TIMESTAMP, value: 2021-06-04 13:55:11.0
-Row #2
-f type is FLOAT, value: 3.14
-d type is DATE, value: 1970-01-01
-dt type is TIMESTAMP, value: 1970-01-01 00:00:00.0
-
diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py
index 6a6831467f8..7a69d07633c 100644
--- a/tests/integration/test_mysql_protocol/test.py
+++ b/tests/integration/test_mysql_protocol/test.py
@@ -799,7 +799,7 @@ def test_java_client_text(started_cluster, java_container):
         demux=True,
     )
 
-    with open(os.path.join(SCRIPT_DIR, "java_client_text.reference")) as fp:
+    with open(os.path.join(SCRIPT_DIR, "java_client.reference")) as fp:
         reference = fp.read()
 
     assert stdout.decode() == reference
@@ -813,7 +813,7 @@ def test_java_client_binary(started_cluster, java_container):
         demux=True,
     )
 
-    with open(os.path.join(SCRIPT_DIR, "java_client_binary.reference")) as fp:
+    with open(os.path.join(SCRIPT_DIR, "java_client.reference")) as fp:
         reference = fp.read()
 
     assert stdout.decode() == reference

From e2b25aab0c4032c0b36f2124d02f9b4bfd36d242 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 15 Nov 2023 15:53:38 +0000
Subject: [PATCH 0598/1097] Fixups, pt. II

---
 src/Functions/FunctionsConversion.h |  4 ++--
 src/Functions/concat.cpp            | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index e3ec7ebd320..d7c2c70884b 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -1247,9 +1247,9 @@ struct ConvertImplGenericToString
 
             FormatSettings format_settings;
             auto serialization = type.getDefaultSerialization();
-            for (size_t i = 0; i < size; ++i)
+            for (size_t row = 0; row < size; ++row)
             {
-                serialization->serializeText(col_from, i, write_buffer, format_settings);
+                serialization->serializeText(col_from, row, write_buffer, format_settings);
                 write_helper.rowWritten();
             }
 
diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp
index 9aa6de5d219..f426f662868 100644
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@@ -132,15 +132,15 @@ private:
             else
             {
                 /// A non-String/non-FixedString-type argument: use the default serialization to convert it to String
-                const auto full_column = column->convertToFullIfNeeded();
-                const auto serialization = arguments[i].type->getDefaultSerialization();
+                auto full_column = column->convertToFullIfNeeded();
+                auto serialization = arguments[i].type->getDefaultSerialization();
                 auto converted_col_str = ColumnString::create();
                 ColumnStringHelpers::WriteHelper write_helper(*converted_col_str, column->size());
                 auto & write_buffer = write_helper.getWriteBuffer();
                 FormatSettings format_settings;
-                for (size_t j = 0; j < column->size(); ++j)
+                for (size_t row = 0; row < column->size(); ++row)
                 {
-                    serialization->serializeText(*full_column, j, write_buffer, format_settings);
+                    serialization->serializeText(*full_column, row, write_buffer, format_settings);
                     write_helper.rowWritten();
                 }
                 write_helper.finalize();
@@ -150,7 +150,7 @@ private:
                 data[i] = &converted_col_str->getChars();
                 offsets[i] = &converted_col_str->getOffsets();
 
-                /// Keep the refcounted-pointer alive
+                /// Keep the pointer alive
                 converted_col_ptrs[i] = std::move(converted_col_str);
             }
         }

From bf1098951508dff6f1ec5e1787aaf4cae6f3de25 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 15 Nov 2023 16:54:47 +0100
Subject: [PATCH 0599/1097] Fix unexpected parts handling (#56693)

* fix unexpected parts handling

* Automatic style fix

* fix

---------

Co-authored-by: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
---
 src/Storages/MergeTree/MergeTreeData.cpp      |  6 ++-
 src/Storages/MergeTree/MergeTreeSettings.h    |  1 +
 src/Storages/StorageReplicatedMergeTree.cpp   | 30 ++++++++++++-
 src/Storages/StorageReplicatedMergeTree.h     |  1 +
 .../test.py                                   | 43 ++++++++++++++++++-
 5 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index d5a82fb032c..b101d9b0c96 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1863,6 +1863,9 @@ try
             is_async ? "asynchronously" : "synchronously");
     }
 
+    std::this_thread::sleep_for(std::chrono::milliseconds(static_cast<size_t>(getSettings()->sleep_before_loading_outdated_parts_ms)));
+    ThreadFuzzer::maybeInjectSleep();
+
     /// Acquire shared lock because 'relative_data_path' is used while loading parts.
     TableLockHolder shared_lock;
     if (is_async)
@@ -1875,6 +1878,7 @@ try
 
     while (true)
     {
+        ThreadFuzzer::maybeInjectSleep();
         PartLoadingTree::NodePtr part;
 
         {
@@ -3982,7 +3986,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
     LOG_TEST(log, "forcefullyMovePartToDetachedAndRemoveFromMemory: removing {} from data_parts_indexes", part->getNameWithState());
     data_parts_indexes.erase(it_part);
 
-    if (restore_covered && part->info.level == 0)
+    if (restore_covered && part->info.level == 0 && part->info.mutation == 0)
     {
         LOG_WARNING(log, "Will not recover parts covered by zero-level part {}", part->name);
         return;
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 53876e77376..69307e74d1d 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -167,6 +167,7 @@ struct Settings;
     M(Bool, enable_the_endpoint_id_with_zookeeper_name_prefix, false, "Enable the endpoint id with zookeeper name prefix for the replicated merge tree table", 0) \
     M(UInt64, zero_copy_merge_mutation_min_parts_size_sleep_before_lock, 1ULL * 1024 * 1024 * 1024, "If zero copy replication is enabled sleep random amount of time before trying to lock depending on parts size for merge or mutation", 0) \
     M(Bool, allow_floating_point_partition_key, false, "Allow floating point as partition key", 0) \
+    M(UInt64, sleep_before_loading_outdated_parts_ms, 0, "For testing. Do not change it.", 0) \
     \
     /** Experimental/work in progress feature. Unsafe for production. */ \
     M(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 74821a9186c..c56887c085f 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1327,6 +1327,20 @@ void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(
 }
 
 void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
+{
+    if (checkPartsImpl(skip_sanity_checks))
+        return;
+
+    /// We failed to check parts in an optimistic way, and now we need all the parts including Outdated parts to check them correctly.
+    waitForOutdatedPartsToBeLoaded();
+
+    if (checkPartsImpl(skip_sanity_checks))
+        return;
+
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "checkPartsImpl returned false after loading Outdated parts");
+}
+
+bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
 {
     auto zookeeper = getZooKeeper();
 
@@ -1422,6 +1436,18 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
             continue;
         }
 
+        /// We have uncovered unexpected parts, and we are not sure if we can restore them or not.
+        /// So we have to exit, load all Outdated parts, and check again.
+        {
+            std::lock_guard lock(outdated_data_parts_mutex);
+            if (!outdated_data_parts_loading_finished)
+            {
+                LOG_INFO(log, "Outdated parts are not loaded yet, but we may need them to check if unexpected parts can be recovered. "
+                              "Need retry.");
+                return false;
+            }
+        }
+
         /// Part is unexpected and we don't have covering part: it's suspicious
         uncovered_unexpected_parts.insert(part->name);
         uncovered_unexpected_parts_rows += part->rows_count;
@@ -1478,7 +1504,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
                         unexpected_parts_rows - uncovered_unexpected_parts_rows);
     }
 
-    if (unexpected_parts_nonnew_rows > 0 || uncovered_unexpected_parts_rows > 0)
+    if (unexpected_parts_nonnew_rows > 0 || uncovered_unexpected_parts_rows > 0 || !restorable_unexpected_parts.empty())
     {
         LOG_DEBUG(log, sanity_report_debug_fmt, fmt::join(uncovered_unexpected_parts, ", "), fmt::join(restorable_unexpected_parts, ", "), fmt::join(parts_to_fetch, ", "),
                   fmt::join(covered_unexpected_parts, ", "), fmt::join(expected_parts, ", "));
@@ -1503,6 +1529,8 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
         LOG_ERROR(log, "Renaming unexpected part {} to ignored_{}{}", part->name, part->name, restore_covered ? ", restoring covered parts" : "");
         forcefullyMovePartToDetachedAndRemoveFromMemory(part, "ignored", restore_covered);
     }
+
+    return true;
 }
 
 
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 8c90d0e2679..b2a67572adc 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -621,6 +621,7 @@ private:
       *  But if there are too many, throw an exception just in case - it's probably a configuration error.
       */
     void checkParts(bool skip_sanity_checks);
+    bool checkPartsImpl(bool skip_sanity_checks);
 
     /// Synchronize the list of part uuids which are currently pinned. These should be sent to root query executor
     /// to be used for deduplication.
diff --git a/tests/integration/test_max_suspicious_broken_parts_replicated/test.py b/tests/integration/test_max_suspicious_broken_parts_replicated/test.py
index 6226240df56..0d009e6b132 100644
--- a/tests/integration/test_max_suspicious_broken_parts_replicated/test.py
+++ b/tests/integration/test_max_suspicious_broken_parts_replicated/test.py
@@ -81,12 +81,51 @@ def test_unexpected_uncommitted_merge():
     detach_table("broken_table")
     attach_table("broken_table")
 
-    assert node.query("SELECT sum(key) FROM broken_table") == "190\n"
+    # it's not readonly
+    node.query("INSERT INTO broken_table SELECT 1")
+
+    assert node.query("SELECT sum(key) FROM broken_table") == "191\n"
     assert (
         node.query(
             "SELECT name FROM system.parts where table = 'broken_table' and active order by name"
         )
-        == "all_0_0_0\nall_1_1_0\n"
+        == "all_0_0_0\nall_1_1_0\nall_2_2_0\n"
+    )
+
+
+def test_unexpected_uncommitted_mutation():
+    node.query(
+        """
+    CREATE TABLE broken_table0 (key Int) ENGINE = ReplicatedMergeTree('/tables/broken0', '1') ORDER BY tuple()
+    SETTINGS max_suspicious_broken_parts = 0, replicated_max_ratio_of_wrong_parts=0, old_parts_lifetime=100500, sleep_before_loading_outdated_parts_ms=10000"""
+    )
+
+    node.query("INSERT INTO broken_table0 SELECT number from numbers(10)")
+
+    node.query(
+        "ALTER TABLE broken_table0 UPDATE key = key * 10 WHERE 1 SETTINGS mutations_sync=1"
+    )
+
+    assert node.query("SELECT sum(key) FROM broken_table0") == "450\n"
+    assert (
+        node.query(
+            "SELECT name FROM system.parts where table = 'broken_table0' and active"
+        )
+        == "all_0_0_0_1\n"
+    )
+
+    remove_part_from_zookeeper("/tables/broken0/replicas/1", "all_0_0_0_1")
+
+    detach_table("broken_table0")
+    attach_table("broken_table0")
+
+    node.query("INSERT INTO broken_table0 SELECT 1")
+
+    # it may remain 45 if the nutation was finalized
+    sum_key = node.query("SELECT sum(key) FROM broken_table0")
+    assert sum_key == "46\n" or sum_key == "451\n"
+    assert "all_0_0_0_1" in node.query(
+        "SELECT name FROM system.detached_parts where table = 'broken_table0'"
     )
 
 
From 57aa049feff85f5482fdfd0d99f2c02a989257bc Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 15 Nov 2023 16:21:02 +0000
Subject: [PATCH 0600/1097] Change table name to table UUID, fix test

---
 src/Interpreters/InterpreterCreateQuery.cpp   | 10 ++++++++--
 src/Storages/StorageReplicatedMergeTree.cpp   |  4 ++--
 ...8_replicated_merge_tree_creation.reference |  8 ++++----
 .../02888_replicated_merge_tree_creation.sh   | 20 +++++++++++--------
 4 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index c88b1b5f968..b7738e8c459 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1442,13 +1442,19 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
                         "ATTACH ... FROM ... query is not supported for {} table engine, "
                         "because such tables do not store any data on disk. Use CREATE instead.", res->getName());
 
-    bool is_replicated_storage = typeid_cast<const StorageReplicatedMergeTree *>(res.get()) != nullptr;
-    if (is_replicated_storage)
+    auto * replicated_storage = typeid_cast<StorageReplicatedMergeTree *>(res.get());
+    if (replicated_storage)
     {
         const auto probability = getContext()->getSettingsRef().create_replicated_merge_tree_fault_injection_probability;
         std::bernoulli_distribution fault(probability);
         if (fault(thread_local_rng))
+        {
+            /// We emulate the case when the exception was thrown in StorageReplicatedMergeTree constructor
+            if (!create.attach)
+                replicated_storage->dropIfEmpty();
+
             throw Coordination::Exception(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (during table creation)");
+        }
     }
 
     database->createTable(getContext(), create.getTable(), res, query_ptr);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index d40e99aae1f..861bfa5d1cf 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -836,7 +836,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr
         ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/mutation_pointer", "",
             zkutil::CreateMode::Persistent));
 
-        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/creator_info", toString(getStorageID().getFullTableName()) + "|" + toString(ServerUUID::get()),
+        ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/creator_info", toString(getStorageID().uuid) + "|" + toString(ServerUUID::get()),
             zkutil::CreateMode::Persistent));
 
         Coordination::Responses responses;
@@ -869,7 +869,7 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
     const String local_metadata = ReplicatedMergeTreeTableMetadata(*this, metadata_snapshot).toString();
     const String local_columns = metadata_snapshot->getColumns().toString();
     const String local_metadata_version = toString(metadata_snapshot->getMetadataVersion());
-    const String creator_info = toString(getStorageID().getFullTableName()) + "|" + toString(ServerUUID::get());
+    const String creator_info = toString(getStorageID().uuid) + "|" + toString(ServerUUID::get());
 
     /// It is possible for the replica to fail after creating ZK nodes without saving local metadata.
     /// Because of that we need to check whether the replica exists and is newly created.
diff --git a/tests/queries/0_stateless/02888_replicated_merge_tree_creation.reference b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.reference
index 487b1165348..98fb6a68656 100644
--- a/tests/queries/0_stateless/02888_replicated_merge_tree_creation.reference
+++ b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.reference
@@ -1,4 +1,4 @@
-2
-2
-2
-2
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh
index 34bddc04be1..b3ccef5de30 100755
--- a/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh
+++ b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh
@@ -1,38 +1,42 @@
 #!/usr/bin/env bash
-# Tags: zookeeper
+# Tags: zookeeper, no-parallel
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
 
-
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_exception_replicated SYNC"
 
+UUID=$(${CLICKHOUSE_CLIENT} --query "SELECT reinterpretAsUUID(currentDatabase())")
+
 #### 1 - There is only one replica
 
 ${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection_probability=1 \
-    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "Fault injected"
+    -q "CREATE TABLE test_exception_replicated UUID '$UUID' (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -cm1 "Fault injected"
 
 # We will see that the replica is empty and throw the same 'Fault injected' exception as before
 ${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection_probability=1 \
-    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "Fault injected"
+    -q "CREATE TABLE test_exception_replicated UUID '$UUID' (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -cm1 "Fault injected"
 
 # We will succeed
 ${CLICKHOUSE_CLIENT} \
-    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date"
+    -q "CREATE TABLE test_exception_replicated UUID '$UUID' (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date"
 
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE test_exception_replicated SYNC"
 
 #### 2 - There are two replicas
 
 ${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection_probability=1 \
-    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -c "Fault injected"
+    -q "CREATE TABLE test_exception_replicated UUID '$UUID' (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date" 2>&1 | grep -cm1 "Fault injected"
 ${CLICKHOUSE_CLIENT} --create_replicated_merge_tree_fault_injection_probability=1 \
-    -q "CREATE TABLE test_exception_replicated_2 (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r2') ORDER BY date" 2>&1 | grep -c "Fault injected"
+    -q "CREATE TABLE test_exception_replicated_2 (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r2') ORDER BY date" 2>&1 | grep -cm1 "Fault injected"
 
 # We will succeed
 ${CLICKHOUSE_CLIENT} \
-    -q "CREATE TABLE test_exception_replicated (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date"
+    -q "CREATE TABLE test_exception_replicated UUID '$UUID' (date Date) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/recreate', 'r1') ORDER BY date"
+
+# The trash from the second replica creation will not prevent us from dropping the table fully, so we delete it separately
+${CLICKHOUSE_CLIENT} -q "SYSTEM DROP REPLICA 'r2' FROM TABLE test_exception_replicated"
 
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE test_exception_replicated SYNC"
\ No newline at end of file

From 0bca553789a7a1bac238d3cabc44f65d3a7c8254 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 15 Nov 2023 17:26:08 +0100
Subject: [PATCH 0601/1097] Address review comments

---
 src/Core/MySQL/PacketsProtocolBinary.cpp | 2 +-
 src/Core/MySQL/PacketsProtocolText.cpp   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Core/MySQL/PacketsProtocolBinary.cpp b/src/Core/MySQL/PacketsProtocolBinary.cpp
index 2fe6b76d05f..6a85dc685db 100644
--- a/src/Core/MySQL/PacketsProtocolBinary.cpp
+++ b/src/Core/MySQL/PacketsProtocolBinary.cpp
@@ -25,7 +25,7 @@ ResultSetRow::ResultSetRow(const Serializations & serializations_, const DataTyp
     : row_num(row_num_), columns(columns_), data_types(data_types_), serializations(serializations_)
 {
     payload_size = 1 + null_bitmap_size;
-    static FormatSettings format_settings;
+    FormatSettings format_settings;
     for (size_t i = 0; i < columns.size(); ++i)
     {
         ColumnPtr col = columns[i]->convertToFullIfNeeded();
diff --git a/src/Core/MySQL/PacketsProtocolText.cpp b/src/Core/MySQL/PacketsProtocolText.cpp
index 82d6e27d98a..d84f6684671 100644
--- a/src/Core/MySQL/PacketsProtocolText.cpp
+++ b/src/Core/MySQL/PacketsProtocolText.cpp
@@ -1,5 +1,5 @@
-#include <Columns/ColumnNullable.h>
 #include <Core/MySQL/PacketsProtocolText.h>
+#include <Columns/ColumnNullable.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/WriteHelpers.h>

From 21a283049c940201848c2d21d4eb8f991452f8e5 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Tue, 14 Nov 2023 21:31:49 +0100
Subject: [PATCH 0602/1097] Add the test for clickhouse_backupview to CI.

---
 docker/test/integration/runner/Dockerfile     |  1 +
 tests/integration/runner                      |  1 +
 .../test_backup_restore_new/test.py           | 19 +++++++++++++++++++
 .../test/{test.py => test_backupview.py}      |  4 ++--
 4 files changed, 23 insertions(+), 2 deletions(-)
 rename utils/backupview/test/{test.py => test_backupview.py} (98%)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 8345e3d5791..458ca2b1da8 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -68,6 +68,7 @@ RUN python3 -m pip install --no-cache-dir \
     asyncio \
     avro==1.10.2 \
     azure-storage-blob \
+    boto3 \
     cassandra-driver \
     confluent-kafka==1.9.2 \
     delta-spark==2.3.0 \
diff --git a/tests/integration/runner b/tests/integration/runner
index 7be491a9a57..3760bf16b84 100755
--- a/tests/integration/runner
+++ b/tests/integration/runner
@@ -428,6 +428,7 @@ if __name__ == "__main__":
         f"--volume={args.library_bridge_binary}:/clickhouse-library-bridge "
         f"--volume={args.base_configs_dir}:/clickhouse-config "
         f"--volume={args.cases_dir}:/ClickHouse/tests/integration "
+        f"--volume={args.utils_dir}/backupview:/ClickHouse/utils/backupview "
         f"--volume={args.utils_dir}/grpc-client/pb2:/ClickHouse/utils/grpc-client/pb2 "
         f"--volume=/run:/run/host:ro {dockerd_internal_volume} {env_tags} {env_cleanup} "
         f"-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 {use_analyzer} -e PYTHONUNBUFFERED=1 "
diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py
index 4a26a470aab..cac458f616d 100644
--- a/tests/integration/test_backup_restore_new/test.py
+++ b/tests/integration/test_backup_restore_new/test.py
@@ -4,11 +4,14 @@ import glob
 import re
 import random
 import os.path
+import sys
 from collections import namedtuple
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import assert_eq_with_retry, TSV
 
 
+script_dir = os.path.dirname(os.path.realpath(__file__))
+
 cluster = ClickHouseCluster(__file__)
 instance = cluster.add_instance(
     "instance",
@@ -1559,3 +1562,19 @@ def test_tables_dependency():
         )
 
     drop()
+
+
+# Test for the "clickhouse_backupview" utility.
+
+test_backupview_dir = os.path.abspath(
+    os.path.join(script_dir, "../../../utils/backupview/test")
+)
+if test_backupview_dir not in sys.path:
+    sys.path.append(test_backupview_dir)
+import test_backupview as test_backupview_module
+
+
+def test_backupview():
+    if instance.is_built_with_sanitizer():
+        return  # This test is actually for clickhouse_backupview, not for ClickHouse itself.
+    test_backupview_module.test_backupview_1()
diff --git a/utils/backupview/test/test.py b/utils/backupview/test/test_backupview.py
similarity index 98%
rename from utils/backupview/test/test.py
rename to utils/backupview/test/test_backupview.py
index 9a1dc5cab2e..5c0d546cbfa 100755
--- a/utils/backupview/test/test.py
+++ b/utils/backupview/test/test_backupview.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 # Tests for the clickhouse_backupview utility.
-# Use pytest ./test.py to run.
+# Use pytest ./test_backupview.py to run.
 
 import pytest
 
@@ -36,7 +36,7 @@ def calculate_total_size(dir):
 # Actual tests
 
 
-def test_backup_1():
+def test_backupview_1():
     with open_backup(os.path.join(script_dir, "test_backup_1.zip")) as b:
         assert b.get_subdirs("/") == ["shards"]
         assert b.dir_exists("/shards")

From 3e26161eaa48353fb3f1d0f6df0f2e848123f512 Mon Sep 17 00:00:00 2001
From: Alexander Sapin <alesapin@gmail.com>
Date: Wed, 15 Nov 2023 18:33:59 +0100
Subject: [PATCH 0603/1097] Implement asynchronous excute of ALTER TABLE ...
 MOVE ... TO DISK|VOLUME

---
 src/Core/Settings.h                           |   1 +
 .../MergeTree/BackgroundJobsAssignee.cpp      |   3 +-
 .../MergeTree/BackgroundJobsAssignee.h        |   2 +-
 src/Storages/MergeTree/MergeTreeData.cpp      | 110 +++++++++++++-----
 src/Storages/MergeTree/MergeTreeData.h        |   5 +-
 src/Storages/MergeTree/MergeTreePartsMover.h  |   1 +
 .../__init__.py                               |   1 +
 .../configs/storage_policy.xml                |  38 ++++++
 .../test.py                                   | 105 +++++++++++++++++
 9 files changed, 235 insertions(+), 31 deletions(-)
 create mode 100644 tests/integration/test_move_partition_to_volume_async/__init__.py
 create mode 100644 tests/integration/test_move_partition_to_volume_async/configs/storage_policy.xml
 create mode 100644 tests/integration/test_move_partition_to_volume_async/test.py

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 3b90a3e068b..ca3d33700c3 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -140,6 +140,7 @@ class IColumn;
     \
     M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
     M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
+    M(Bool, alter_move_to_space_execute_async, false, "Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously", 0) \
     \
     M(LoadBalancing, load_balancing, LoadBalancing::RANDOM, "Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing.", 0) \
     M(UInt64, load_balancing_first_offset, 0, "Which replica to preferably send a query when FIRST_OR_RANDOM load balancing strategy is used.", 0) \
diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp
index f06ff909799..32714b3b07f 100644
--- a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp
+++ b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp
@@ -67,10 +67,11 @@ void BackgroundJobsAssignee::scheduleFetchTask(ExecutableTaskPtr fetch_task)
 }
 
 
-void BackgroundJobsAssignee::scheduleMoveTask(ExecutableTaskPtr move_task)
+bool BackgroundJobsAssignee::scheduleMoveTask(ExecutableTaskPtr move_task)
 {
     bool res = getContext()->getMovesExecutor()->trySchedule(move_task);
     res ? trigger() : postpone();
+    return res;
 }
 
 
diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.h b/src/Storages/MergeTree/BackgroundJobsAssignee.h
index 27e75a79b97..f1c7eadf5f7 100644
--- a/src/Storages/MergeTree/BackgroundJobsAssignee.h
+++ b/src/Storages/MergeTree/BackgroundJobsAssignee.h
@@ -67,7 +67,7 @@ public:
 
     bool scheduleMergeMutateTask(ExecutableTaskPtr merge_task);
     void scheduleFetchTask(ExecutableTaskPtr fetch_task);
-    void scheduleMoveTask(ExecutableTaskPtr move_task);
+    bool scheduleMoveTask(ExecutableTaskPtr move_task);
     void scheduleCommonTask(ExecutableTaskPtr common_task, bool need_trigger);
 
     /// Just call finish
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index d5a82fb032c..d5a2204df8d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -192,6 +192,7 @@ namespace ErrorCodes
     extern const int NOT_INITIALIZED;
     extern const int SERIALIZATION_ERROR;
     extern const int TOO_MANY_MUTATIONS;
+    extern const int CANNOT_SCHEDULE_TASK;
 }
 
 static void checkSuspiciousIndices(const ASTFunction * index_function)
@@ -4832,17 +4833,36 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
             throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on disk '{}'", partition_id, disk->getName());
     }
 
-    MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast<Space>(disk), local_context->getReadSettings(), local_context->getWriteSettings());
-    switch (moves_outcome)
+    if (parts_mover.moves_blocker.isCancelled())
+        throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
+
+    auto moving_tagger = checkPartsForMove(parts, std::static_pointer_cast<Space>(disk));
+    if (moving_tagger->parts_to_move.empty())
+        throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id);
+
+    const auto & query_settings = local_context->getSettingsRef();
+    std::future<MovePartsOutcome> moves_future = movePartsToSpace(moving_tagger, local_context->getReadSettings(), local_context->getWriteSettings(), query_settings.alter_move_to_space_execute_async);
+
+    if (query_settings.alter_move_to_space_execute_async && moves_future.wait_for(std::chrono::seconds(0)) != std::future_status::ready)
     {
-        case MovePartsOutcome::MovesAreCancelled:
-            throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
-        case MovePartsOutcome::NothingToMove:
-            throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id);
-        case MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy:
-            throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Move was not finished, because zero copy mode is enabled and someone other is moving the same parts right now");
-        case MovePartsOutcome::PartsMoved:
-            break;
+        return;
+    }
+    else
+    {
+        auto moves_outcome = moves_future.get();
+        switch (moves_outcome)
+        {
+            case MovePartsOutcome::MovesAreCancelled:
+                throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
+            case MovePartsOutcome::NothingToMove:
+                throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id);
+            case MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy:
+                throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Move was not finished, because zero copy mode is enabled and someone other is moving the same parts right now");
+            case MovePartsOutcome::CannotScheduleMove:
+                throw Exception(ErrorCodes::CANNOT_SCHEDULE_TASK, "Cannot schedule move, no free threads, try to wait until all in-progress move finish or increase <background_move_pool_size>");
+            case MovePartsOutcome::PartsMoved:
+                break;
+        }
     }
 }
 
@@ -4895,17 +4915,36 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
             throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on volume '{}'", partition_id, volume->getName());
     }
 
-    MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast<Space>(volume), local_context->getReadSettings(), local_context->getWriteSettings());
-    switch (moves_outcome)
+    if (parts_mover.moves_blocker.isCancelled())
+        throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
+
+    auto moving_tagger = checkPartsForMove(parts, std::static_pointer_cast<Space>(volume));
+    if (moving_tagger->parts_to_move.empty())
+        throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id);
+
+    const auto & query_settings = local_context->getSettingsRef();
+    std::future<MovePartsOutcome> moves_future = movePartsToSpace(moving_tagger, local_context->getReadSettings(), local_context->getWriteSettings(), query_settings.alter_move_to_space_execute_async);
+
+    if (query_settings.alter_move_to_space_execute_async && moves_future.wait_for(std::chrono::seconds(0)) != std::future_status::ready)
     {
-        case MovePartsOutcome::MovesAreCancelled:
-            throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
-        case MovePartsOutcome::NothingToMove:
-            throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id);
-        case MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy:
-            throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Move was not finished, because zero copy mode is enabled and someone other is moving the same parts right now");
-        case MovePartsOutcome::PartsMoved:
-            break;
+        return;
+    }
+    else
+    {
+        auto moves_outcome = moves_future.get();
+        switch (moves_outcome)
+        {
+            case MovePartsOutcome::MovesAreCancelled:
+                throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
+            case MovePartsOutcome::NothingToMove:
+                throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id);
+            case MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy:
+                throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Move was not finished, because zero copy mode is enabled and someone other is moving the same parts right now");
+            case MovePartsOutcome::CannotScheduleMove:
+                throw Exception(ErrorCodes::CANNOT_SCHEDULE_TASK, "Cannot schedule move, no free threads, try to wait until all in-progress move finish or increase <background_move_pool_size>");
+            case MovePartsOutcome::PartsMoved:
+                break;
+        }
     }
 }
 
@@ -7476,16 +7515,33 @@ bool MergeTreeData::areBackgroundMovesNeeded() const
     return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1;
 }
 
-MovePartsOutcome MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space, const ReadSettings & read_settings, const WriteSettings & write_settings)
+std::future<MovePartsOutcome> MergeTreeData::movePartsToSpace(const CurrentlyMovingPartsTaggerPtr & moving_tagger, const ReadSettings & read_settings, const WriteSettings & write_settings, bool async)
 {
-    if (parts_mover.moves_blocker.isCancelled())
-        return MovePartsOutcome::MovesAreCancelled;
+    auto finish_move_promise = std::make_shared<std::promise<MovePartsOutcome>>();
+    auto finish_move_future = finish_move_promise->get_future();
 
-    auto moving_tagger = checkPartsForMove(parts, space);
-    if (moving_tagger->parts_to_move.empty())
-        return MovePartsOutcome::NothingToMove;
+    if (async)
+    {
+        bool is_scheduled = background_moves_assignee.scheduleMoveTask(std::make_shared<ExecutableLambdaAdapter>(
+            [this, finish_move_promise, moving_tagger, read_settings, write_settings] () mutable
+            {
+                auto outcome = moveParts(moving_tagger, read_settings, write_settings, /* wait_for_move_if_zero_copy= */ true);
 
-    return moveParts(moving_tagger, read_settings, write_settings, /* wait_for_move_if_zero_copy= */ true);
+                finish_move_promise->set_value(outcome);
+
+                return outcome == MovePartsOutcome::PartsMoved;
+            }, moves_assignee_trigger, getStorageID()));
+
+        if (!is_scheduled)
+            finish_move_promise->set_value(MovePartsOutcome::CannotScheduleMove);
+    }
+    else
+    {
+        auto outcome = moveParts(moving_tagger, read_settings, write_settings, /* wait_for_move_if_zero_copy= */ true);
+        finish_move_promise->set_value(outcome);
+    }
+
+    return finish_move_future;
 }
 
 MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::selectPartsForMove()
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 75e334af69f..4c46980f333 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1359,8 +1359,6 @@ protected:
     /// method has different implementations for replicated and non replicated
     /// MergeTree because they store mutations in different way.
     virtual std::map<int64_t, MutationCommands> getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0;
-    /// Moves part to specified space, used in ALTER ... MOVE ... queries
-    MovePartsOutcome movePartsToSpace(const DataPartsVector & parts, SpacePtr space, const ReadSettings & read_settings, const WriteSettings & write_settings);
 
     struct PartBackupEntries
     {
@@ -1513,6 +1511,9 @@ private:
 
     using CurrentlyMovingPartsTaggerPtr = std::shared_ptr<CurrentlyMovingPartsTagger>;
 
+    /// Moves part to specified space, used in ALTER ... MOVE ... queries
+    std::future<MovePartsOutcome> movePartsToSpace(const CurrentlyMovingPartsTaggerPtr & moving_tagger, const ReadSettings & read_settings, const WriteSettings & write_settings, bool async);
+
     /// Move selected parts to corresponding disks
     MovePartsOutcome moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger, const ReadSettings & read_settings, const WriteSettings & write_settings, bool wait_for_move_if_zero_copy);
 
diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h
index f172dade40e..b9109e51309 100644
--- a/src/Storages/MergeTree/MergeTreePartsMover.h
+++ b/src/Storages/MergeTree/MergeTreePartsMover.h
@@ -18,6 +18,7 @@ enum class MovePartsOutcome
     NothingToMove,
     MovesAreCancelled,
     MoveWasPostponedBecauseOfZeroCopy,
+    CannotScheduleMove,
 };
 
 /// Active part from storage and destination reservation where it has to be moved
diff --git a/tests/integration/test_move_partition_to_volume_async/__init__.py b/tests/integration/test_move_partition_to_volume_async/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_move_partition_to_volume_async/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_move_partition_to_volume_async/configs/storage_policy.xml b/tests/integration/test_move_partition_to_volume_async/configs/storage_policy.xml
new file mode 100644
index 00000000000..f5d82656210
--- /dev/null
+++ b/tests/integration/test_move_partition_to_volume_async/configs/storage_policy.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="utf-8"?>
+<clickhouse>
+  <storage_configuration>
+    <disks>
+        <default/>
+
+        <s3>
+            <type>s3</type>
+            <endpoint>http://minio1:9001/root/data/</endpoint>
+            <access_key_id>minio</access_key_id>
+            <secret_access_key>minio123</secret_access_key>
+        </s3>
+
+        <broken_s3>
+            <type>s3</type>
+            <endpoint>http://resolver:8083/root/data/</endpoint>
+            <access_key_id>minio</access_key_id>
+            <secret_access_key>minio123</secret_access_key>
+        </broken_s3>
+    </disks>
+
+    <policies>
+        <slow_s3>
+            <volumes>
+                <main>
+                    <disk>default</disk>
+                </main>
+                <broken>
+                    <disk>broken_s3</disk>
+                </broken>
+            </volumes>
+
+            <move_factor>0.0</move_factor>
+        </slow_s3>
+    </policies>
+  </storage_configuration>
+
+</clickhouse>
diff --git a/tests/integration/test_move_partition_to_volume_async/test.py b/tests/integration/test_move_partition_to_volume_async/test.py
new file mode 100644
index 00000000000..c1c860b6e50
--- /dev/null
+++ b/tests/integration/test_move_partition_to_volume_async/test.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+import logging
+import time
+import os
+
+import pytest
+from helpers.cluster import ClickHouseCluster
+from helpers.mock_servers import start_s3_mock, start_mock_servers
+from helpers.utility import generate_values, replace_config, SafeThread
+from helpers.wait_for_helpers import wait_for_delete_inactive_parts
+from helpers.wait_for_helpers import wait_for_delete_empty_parts
+from helpers.wait_for_helpers import wait_for_merges
+
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+
+@pytest.fixture(scope="module")
+def init_broken_s3(cluster):
+    yield start_s3_mock(cluster, "broken_s3", "8083")
+
+
+@pytest.fixture(scope="function")
+def broken_s3(init_broken_s3):
+    init_broken_s3.reset()
+    yield init_broken_s3
+
+@pytest.fixture(scope="module")
+def cluster():
+    try:
+        cluster = ClickHouseCluster(__file__)
+        cluster.add_instance(
+            "node",
+            main_configs=[
+                "configs/storage_policy.xml",
+            ],
+            with_minio=True,
+        )
+
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_async_alter_move(cluster, broken_s3):
+    node = cluster.instances["node"]
+
+    node.query("""
+    CREATE TABLE moving_table_async
+    (
+        key UInt64,
+        data String
+    )
+    ENGINE MergeTree()
+    ORDER BY tuple()
+    SETTINGS storage_policy = 'slow_s3'
+    """)
+
+    node.query("INSERT INTO moving_table_async SELECT number, randomPrintableASCII(1000) FROM numbers(10000)")
+
+    broken_s3.setup_slow_answers(
+        timeout=5,
+        count=1000000,
+    )
+
+    node.query("ALTER TABLE moving_table_async MOVE PARTITION tuple() TO DISK 'broken_s3'", settings={'alter_move_to_space_execute_async': True}, timeout=10)
+
+    # not flaky, just introduce some wait
+    time.sleep(3)
+
+    for i in range(100):
+        count = node.query("SELECT count() FROM system.moves where table = 'moving_table_async'")
+        if count == "1\n":
+            break
+        time.sleep(0.1)
+    else:
+        assert False, "Cannot find any moving background operation"
+
+def test_sync_alter_move(cluster, broken_s3):
+    node = cluster.instances["node"]
+
+    node.query("""
+    CREATE TABLE moving_table_sync
+    (
+        key UInt64,
+        data String
+    )
+    ENGINE MergeTree()
+    ORDER BY tuple()
+    SETTINGS storage_policy = 'slow_s3'
+    """)
+
+    node.query("INSERT INTO moving_table_sync SELECT number, randomPrintableASCII(1000) FROM numbers(10000)")
+
+    broken_s3.reset()
+
+    node.query("ALTER TABLE moving_table_sync MOVE PARTITION tuple() TO DISK 'broken_s3'", timeout=30)
+    # not flaky, just introduce some wait
+    time.sleep(3)
+
+    assert node.query("SELECT count() FROM system.moves where table = 'moving_table_sync'") == "0\n"
+
+    assert node.query("SELECT disk_name FROM system.parts WHERE table = 'moving_table_sync'") == "broken_s3\n"

From 28522d56b76a4191e73d9c94deafa143b1ca6c18 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 15 Nov 2023 17:45:07 +0000
Subject: [PATCH 0604/1097] Support Iceberg metadata files for metastore tables

---
 .../DataLakes/Iceberg/IcebergMetadata.cpp     | 13 +++++--
 .../integration/test_storage_iceberg/test.py  | 36 +++++++++++++++++++
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
index 2b8b7082515..40a81b59c36 100644
--- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
+++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
@@ -304,7 +304,9 @@ MutableColumns parseAvro(
 
 /**
  * Each version of table metadata is stored in a `metadata` directory and
- * has format: v<V>.metadata.json, where V - metadata version.
+ * has one of 2 formats:
+ *   1) v<V>.metadata.json, where V - metadata version.
+ *   2) <V>-<random-uuid>.metadata.json, where V - metadata version
  */
 std::pair<Int32, String> getMetadataFileAndVersion(const StorageS3::Configuration & configuration)
 {
@@ -322,7 +324,14 @@ std::pair<Int32, String> getMetadataFileAndVersion(const StorageS3::Configuratio
     for (const auto & path : metadata_files)
     {
         String file_name(path.begin() + path.find_last_of('/') + 1, path.end());
-        String version_str(file_name.begin() + 1, file_name.begin() + file_name.find_first_of('.'));
+        String version_str;
+        /// v<V>.metadata.json
+        if (file_name.starts_with('v'))
+            version_str = String(file_name.begin() + 1, file_name.begin() + file_name.find_first_of('.'));
+        /// <V>-<random-uuid>.metadata.json
+        else
+            version_str = String(file_name.begin(), file_name.begin() + file_name.find_first_of('-'));
+
         if (!std::all_of(version_str.begin(), version_str.end(), isdigit))
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad metadata file name: {}. Expected vN.metadata.json where N is a number", file_name);
         metadata_files_with_versions.emplace_back(std::stoi(version_str), path);
diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index 11198a7175b..f6bea26cb15 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -9,6 +9,8 @@ import json
 import pytest
 import time
 import glob
+import uuid
+import os
 
 from pyspark.sql.types import (
     StructType,
@@ -515,3 +517,37 @@ def test_metadata_file_selection(started_cluster, format_version):
     create_iceberg_table(instance, TABLE_NAME)
 
     assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 500
+
+
+@pytest.mark.parametrize("format_version", ["1", "2"])
+def test_metadata_file_format_with_uuid(started_cluster, format_version):
+    instance = started_cluster.instances["node1"]
+    spark = started_cluster.spark_session
+    minio_client = started_cluster.minio_client
+    bucket = started_cluster.minio_bucket
+    TABLE_NAME = "test_metadata_selection_" + format_version
+
+    spark.sql(
+        f"CREATE TABLE {TABLE_NAME} (id bigint, data string) USING iceberg TBLPROPERTIES ('format-version' = '2', 'write.update.mode'='merge-on-read', 'write.delete.mode'='merge-on-read', 'write.merge.mode'='merge-on-read')"
+    )
+
+    for i in range(50):
+        spark.sql(
+            f"INSERT INTO {TABLE_NAME} select id, char(id + ascii('a')) from range(10)"
+        )
+
+    print(os.listdir(f"/iceberg_data/default/{TABLE_NAME}/metadata/"))
+    for i in range(50):
+        os.rename(
+            f"/iceberg_data/default/{TABLE_NAME}/metadata/v{i + 1}.metadata.json",
+            f"/iceberg_data/default/{TABLE_NAME}/metadata/{str(i).zfill(5)}-{uuid.uuid4()}.metadata.json",
+        )
+    print(os.listdir(f"/iceberg_data/default/{TABLE_NAME}/metadata/"))
+
+    files = upload_directory(
+        minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
+    )
+
+    create_iceberg_table(instance, TABLE_NAME)
+
+    assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 500

From 808c3f36ed84b01f840eedc309511a202972c870 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 15 Nov 2023 17:47:10 +0000
Subject: [PATCH 0605/1097] Remove prints from test

---
 tests/integration/test_storage_iceberg/test.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index f6bea26cb15..f5b2733eeb8 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -536,13 +536,11 @@ def test_metadata_file_format_with_uuid(started_cluster, format_version):
             f"INSERT INTO {TABLE_NAME} select id, char(id + ascii('a')) from range(10)"
         )
 
-    print(os.listdir(f"/iceberg_data/default/{TABLE_NAME}/metadata/"))
     for i in range(50):
         os.rename(
             f"/iceberg_data/default/{TABLE_NAME}/metadata/v{i + 1}.metadata.json",
             f"/iceberg_data/default/{TABLE_NAME}/metadata/{str(i).zfill(5)}-{uuid.uuid4()}.metadata.json",
         )
-    print(os.listdir(f"/iceberg_data/default/{TABLE_NAME}/metadata/"))
 
     files = upload_directory(
         minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""

From af2f986215450f6a728978bb5be861f824e4ee3c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 15 Nov 2023 17:48:54 +0000
Subject: [PATCH 0606/1097] Automatic style fix

---
 .../test.py                                   | 50 +++++++++++++++----
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/tests/integration/test_move_partition_to_volume_async/test.py b/tests/integration/test_move_partition_to_volume_async/test.py
index c1c860b6e50..cdd2ee126c0 100644
--- a/tests/integration/test_move_partition_to_volume_async/test.py
+++ b/tests/integration/test_move_partition_to_volume_async/test.py
@@ -14,6 +14,7 @@ from helpers.wait_for_helpers import wait_for_merges
 
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
 
+
 @pytest.fixture(scope="module")
 def init_broken_s3(cluster):
     yield start_s3_mock(cluster, "broken_s3", "8083")
@@ -24,6 +25,7 @@ def broken_s3(init_broken_s3):
     init_broken_s3.reset()
     yield init_broken_s3
 
+
 @pytest.fixture(scope="module")
 def cluster():
     try:
@@ -47,7 +49,8 @@ def cluster():
 def test_async_alter_move(cluster, broken_s3):
     node = cluster.instances["node"]
 
-    node.query("""
+    node.query(
+        """
     CREATE TABLE moving_table_async
     (
         key UInt64,
@@ -56,32 +59,43 @@ def test_async_alter_move(cluster, broken_s3):
     ENGINE MergeTree()
     ORDER BY tuple()
     SETTINGS storage_policy = 'slow_s3'
-    """)
+    """
+    )
 
-    node.query("INSERT INTO moving_table_async SELECT number, randomPrintableASCII(1000) FROM numbers(10000)")
+    node.query(
+        "INSERT INTO moving_table_async SELECT number, randomPrintableASCII(1000) FROM numbers(10000)"
+    )
 
     broken_s3.setup_slow_answers(
         timeout=5,
         count=1000000,
     )
 
-    node.query("ALTER TABLE moving_table_async MOVE PARTITION tuple() TO DISK 'broken_s3'", settings={'alter_move_to_space_execute_async': True}, timeout=10)
+    node.query(
+        "ALTER TABLE moving_table_async MOVE PARTITION tuple() TO DISK 'broken_s3'",
+        settings={"alter_move_to_space_execute_async": True},
+        timeout=10,
+    )
 
     # not flaky, just introduce some wait
     time.sleep(3)
 
     for i in range(100):
-        count = node.query("SELECT count() FROM system.moves where table = 'moving_table_async'")
+        count = node.query(
+            "SELECT count() FROM system.moves where table = 'moving_table_async'"
+        )
         if count == "1\n":
             break
         time.sleep(0.1)
     else:
         assert False, "Cannot find any moving background operation"
 
+
 def test_sync_alter_move(cluster, broken_s3):
     node = cluster.instances["node"]
 
-    node.query("""
+    node.query(
+        """
     CREATE TABLE moving_table_sync
     (
         key UInt64,
@@ -90,16 +104,30 @@ def test_sync_alter_move(cluster, broken_s3):
     ENGINE MergeTree()
     ORDER BY tuple()
     SETTINGS storage_policy = 'slow_s3'
-    """)
+    """
+    )
 
-    node.query("INSERT INTO moving_table_sync SELECT number, randomPrintableASCII(1000) FROM numbers(10000)")
+    node.query(
+        "INSERT INTO moving_table_sync SELECT number, randomPrintableASCII(1000) FROM numbers(10000)"
+    )
 
     broken_s3.reset()
 
-    node.query("ALTER TABLE moving_table_sync MOVE PARTITION tuple() TO DISK 'broken_s3'", timeout=30)
+    node.query(
+        "ALTER TABLE moving_table_sync MOVE PARTITION tuple() TO DISK 'broken_s3'",
+        timeout=30,
+    )
     # not flaky, just introduce some wait
     time.sleep(3)
 
-    assert node.query("SELECT count() FROM system.moves where table = 'moving_table_sync'") == "0\n"
+    assert (
+        node.query("SELECT count() FROM system.moves where table = 'moving_table_sync'")
+        == "0\n"
+    )
 
-    assert node.query("SELECT disk_name FROM system.parts WHERE table = 'moving_table_sync'") == "broken_s3\n"
+    assert (
+        node.query(
+            "SELECT disk_name FROM system.parts WHERE table = 'moving_table_sync'"
+        )
+        == "broken_s3\n"
+    )

From 6e3e6383ba0ad5b317c8189ac5a654ee5bb9057b Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 15 Nov 2023 19:00:27 +0100
Subject: [PATCH 0607/1097] perf check 2

---
 base/poco/Net/src/HTTPServerSession.cpp      |  1 -
 base/poco/Net/src/HTTPSession.cpp            | 32 +++++++++-----------
 src/Disks/ObjectStorages/S3/diskSettings.cpp |  2 +-
 src/IO/S3/PocoHTTPClient.cpp                 |  1 -
 4 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/base/poco/Net/src/HTTPServerSession.cpp b/base/poco/Net/src/HTTPServerSession.cpp
index f6d3c4e5b92..d4f2b24879e 100644
--- a/base/poco/Net/src/HTTPServerSession.cpp
+++ b/base/poco/Net/src/HTTPServerSession.cpp
@@ -26,7 +26,6 @@ HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParam
 	_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
 {
 	setTimeout(pParams->getTimeout());
-	this->socket().setReceiveTimeout(pParams->getTimeout());
 }
 
 
diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp
index 97decded282..9ebbd7d04cd 100644
--- a/base/poco/Net/src/HTTPSession.cpp
+++ b/base/poco/Net/src/HTTPSession.cpp
@@ -94,24 +94,22 @@ void HTTPSession::setTimeout(const Poco::Timespan& timeout)
 void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco::Timespan& sendTimeout, const Poco::Timespan& receiveTimeout)
 {
 	 _connectionTimeout = connectionTimeout;
-     _sendTimeout = sendTimeout;
-     _receiveTimeout = receiveTimeout;
 
-//     if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds())
-//     {
-//         _sendTimeout = sendTimeout;
-//
-//         if (connected())
-//             _socket.setSendTimeout(_sendTimeout);
-//     }
-//
-//     if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds())
-//     {
-//         _receiveTimeout = receiveTimeout;
-//
-//         if (connected())
-//             _socket.setReceiveTimeout(_receiveTimeout);
-//     }
+     if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds())
+     {
+         _sendTimeout = sendTimeout;
+
+         if (connected())
+             _socket.setSendTimeout(_sendTimeout);
+     }
+
+     if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds())
+     {
+         _receiveTimeout = receiveTimeout;
+
+         if (connected())
+             _socket.setReceiveTimeout(_receiveTimeout);
+     }
 }
 
 
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index b0384daab2d..0232a6eb070 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -67,7 +67,7 @@ std::unique_ptr<S3::Client> getClient(
         config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
     client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000);
     client_configuration.wait_on_pool_size_limit = false;
-    client_configuration.s3_use_adaptive_timeouts = config.getUInt(
+    client_configuration.s3_use_adaptive_timeouts = config.getBool(
         config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
 
     /*
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 904e2324145..f681362e607 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -14,7 +14,6 @@
 #include <Common/Stopwatch.h>
 #include <Common/Throttler.h>
 #include <IO/HTTPCommon.h>
-#include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <IO/S3/ProviderType.h>

From f3f839205197a73cf3c4b40dd8d67077839701ba Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 15 Nov 2023 17:37:00 +0100
Subject: [PATCH 0608/1097] upgrade replication protocol

---
 src/Storages/MergeTree/DataPartsExchange.cpp  | 83 ++++++++++++++-----
 src/Storages/MergeTree/MergeTreeSettings.h    |  5 +-
 .../MergeTree/ReplicatedMergeTreeSink.cpp     |  8 ++
 tests/clickhouse-test                         |  3 +
 ...plication_protocol_wait_for_part.reference |  1 +
 ...916_replication_protocol_wait_for_part.sql | 23 +++++
 6 files changed, 101 insertions(+), 22 deletions(-)
 create mode 100644 tests/queries/0_stateless/02916_replication_protocol_wait_for_part.reference
 create mode 100644 tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql

diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 4545b2b98ae..7fd6f59ed69 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -65,8 +65,7 @@ constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID = 5;
 constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY = 6;
 constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION = 7;
 constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_METADATA_VERSION = 8;
-// Reserved for ALTER PRIMARY KEY
-// constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PRIMARY_KEY = 9;
+constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE = 9;
 
 std::string getEndpointId(const std::string & node_id)
 {
@@ -122,7 +121,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
     MergeTreePartInfo::fromPartName(part_name, data.format_version);
 
     /// We pretend to work as older server version, to be sure that client will correctly process our version
-    response.addCookie({"server_protocol_version", toString(std::min(client_protocol_version, REPLICATION_PROTOCOL_VERSION_WITH_METADATA_VERSION))});
+    response.addCookie({"server_protocol_version", toString(std::min(client_protocol_version, REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE))});
 
     LOG_TRACE(log, "Sending part {}", part_name);
 
@@ -140,6 +139,29 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
     {
         part = findPart(part_name);
 
+        /// Ephemeral zero-copy lock may be lost for PreActive parts
+        /// do not expose PreActive parts
+        if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE)
+        {
+            bool part_is_ready = part->getState() != MergeTreeDataPartState::PreActive;
+            writeBinary(part_is_ready, out);
+
+            if (!part_is_ready)
+            {
+                LOG_TRACE(log, "Part {} is in PreActive state, reply to the client that part is not ready yet", part_name);
+                return;
+            }
+        }
+        else
+        {
+            bool zero_copy_enabled = data.getSettings()->allow_remote_fs_zero_copy_replication;
+            if (part->getState() == MergeTreeDataPartState::PreActive && zero_copy_enabled)
+            {
+                /// report error, client will try again later, error message would be printed
+                throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in table", part_name);
+            }
+        }
+
         CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedSend};
 
         if (part->getDataPartStorage().isStoredOnRemoteDisk())
@@ -357,12 +379,8 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name)
     /// determine the local state of the part, so queries for the parts in these states are completely normal.
     MergeTreeData::DataPartPtr part;
 
-    /// Ephemeral zero-copy lock may be lost for PreActive parts
-    bool zero_copy_enabled = data.getSettings()->allow_remote_fs_zero_copy_replication;
-    if (zero_copy_enabled)
-        part = data.getPartIfExists(name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated});
-    else
-        part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated});
+    part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated});
+
     if (part)
         return part;
 
@@ -424,7 +442,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected
     {
         {"endpoint",                endpoint_id},
         {"part",                    part_name},
-        {"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_METADATA_VERSION)},
+        {"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE)},
         {"compress",                "false"}
     });
 
@@ -482,17 +500,42 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected
         creds.setPassword(password);
     }
 
-    std::unique_ptr<PooledReadWriteBufferFromHTTP> in = std::make_unique<PooledReadWriteBufferFromHTTP>(
-        uri,
-        Poco::Net::HTTPRequest::HTTP_POST,
-        nullptr,
-        timeouts,
-        creds,
-        DBMS_DEFAULT_BUFFER_SIZE,
-        0, /* no redirects */
-        static_cast<uint64_t>(data_settings->replicated_max_parallel_fetches_for_host));
+    std::unique_ptr<PooledReadWriteBufferFromHTTP> in;
+    int server_protocol_version = 0;
+    bool part_is_ready = true;
 
-    int server_protocol_version = parse<int>(in->getResponseCookie("server_protocol_version", "0"));
+    static const UInt32 part_not_ready_attempts = 5;
+    static const UInt32 wait_sleep_time_ms = 100;
+
+    for (UInt32 attempt = 1; attempt <= part_not_ready_attempts; ++attempt)
+    {
+        in = std::make_unique<PooledReadWriteBufferFromHTTP>(
+            uri,
+            Poco::Net::HTTPRequest::HTTP_POST,
+            nullptr,
+            timeouts,
+            creds,
+            DBMS_DEFAULT_BUFFER_SIZE,
+            0, /* no redirects */
+            static_cast<uint64_t>(data_settings->replicated_max_parallel_fetches_for_host));
+
+        server_protocol_version = parse<int>(in->getResponseCookie("server_protocol_version", "0"));
+
+        if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE)
+            readBinary(part_is_ready, *in);
+
+        if (part_is_ready)
+            break;
+
+        sleepForMilliseconds(wait_sleep_time_ms);
+
+        if (blocker.isCancelled())
+            throw Exception(ErrorCodes::ABORTED, "Fetching of part was cancelled");
+    }
+
+    if (!part_is_ready)
+        throw Exception(ErrorCodes::ABORTED, "Part {} is still not ready in host {} after {} attempts, try another host",
+                        part_name, host, part_not_ready_attempts);
 
     String remote_fs_metadata = parse<String>(in->getResponseCookie("remote_fs_metadata", ""));
 
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 53876e77376..15c54ee3791 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -83,7 +83,8 @@ struct Settings;
     M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
-    M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0) \
+    M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0)                                                                                                                                  \
+    M(Milliseconds, sleep_before_commit_local_part_in_replicated_table_ms, 0, "For testing. Do not change it.", 0) \
     \
     /* Part removal settings. */ \
     M(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \
@@ -121,7 +122,7 @@ struct Settings;
     M(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
     M(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \
     M(Float, fault_probability_before_part_commit, 0, "For testing. Do not change it.", 0) \
-    M(Float, fault_probability_after_part_commit, 0, "For testing. Do not change it.", 0) \
+    M(Float, fault_probability_after_part_commit, 0, "For testing. Do not change it.", 0)  \
     \
     /** Check delay of replicas settings. */ \
     M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index 7de5d46c66b..37f808824b5 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -924,6 +924,14 @@ std::pair<std::vector<String>, bool> ReplicatedMergeTreeSinkImpl<async_insert>::
         Coordination::Error multi_code = zookeeper->tryMultiNoThrow(ops, responses); /// 1 RTT
         if (multi_code == Coordination::Error::ZOK)
         {
+            auto sleep_before_commit_local_part_in_replicated_table_ms = storage.getSettings()->sleep_before_commit_local_part_in_replicated_table_ms;
+            if (sleep_before_commit_local_part_in_replicated_table_ms.totalMilliseconds())
+            {
+                LOG_INFO(log, "committing part {}, triggered sleep_before_commit_local_part_in_replicated_table_ms {}",
+                         part->name, sleep_before_commit_local_part_in_replicated_table_ms.totalMilliseconds());
+                sleepForMilliseconds(sleep_before_commit_local_part_in_replicated_table_ms.totalMilliseconds());
+            }
+
             part->new_part_was_committed_to_zookeeper_after_rename_on_disk = true;
             transaction.commit();
             storage.merge_selecting_task->schedule();
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index cab7d7e79ff..048f848ff27 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -673,6 +673,9 @@ class MergeTreeSettingsRandomizer:
         "primary_key_compress_block_size": lambda: random.randint(8000, 100000),
         "replace_long_file_name_to_hash": lambda: random.randint(0, 1),
         "max_file_name_length": threshold_generator(0.3, 0.3, 0, 128),
+        "sleep_before_commit_local_part_in_replicated_table_ms": threshold_generator(
+            0.3, 0.3, 0, 250
+        ),
     }
 
     @staticmethod
diff --git a/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.reference b/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.reference
new file mode 100644
index 00000000000..0cfbf08886f
--- /dev/null
+++ b/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.reference
@@ -0,0 +1 @@
+2
diff --git a/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql b/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql
new file mode 100644
index 00000000000..ed9cfd00b45
--- /dev/null
+++ b/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql
@@ -0,0 +1,23 @@
+-- Tags: no-replicated-database, no-fasttest
+-- Tag no-replicated-database: different number of replicas
+
+create table tableIn (n int)
+    engine=ReplicatedMergeTree('/test/02916/{database}/table', '1')
+    order by tuple()
+    settings
+        storage_policy='s3_cache',
+        allow_remote_fs_zero_copy_replication=1,
+        sleep_before_commit_local_part_in_replicated_table_ms=50000;
+create table tableOut (n int)
+    engine=ReplicatedMergeTree('/test/02916/{database}/table', '2')
+    order by tuple()
+    settings
+        storage_policy='s3_cache',
+        allow_remote_fs_zero_copy_replication=1;
+
+SET send_logs_level = 'error';
+
+insert into tableIn values(1);
+insert into tableIn values(2);
+system sync replica tableOut;
+select count() from tableOut;

From d6c58023d50bbe7a77a7497ddc9b1031186bb01c Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 15 Nov 2023 18:07:24 +0000
Subject: [PATCH 0609/1097] Better except for SSL connection failure

---
 src/Server/TCPHandler.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 1da9806b4f5..c0941603d78 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -104,6 +104,7 @@ namespace DB::ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
     extern const int SUPPORT_IS_DISABLED;
     extern const int UNSUPPORTED_METHOD;
+    extern const int WRONG_PASSWORD;
 }
 
 namespace
@@ -1431,8 +1432,11 @@ void TCPHandler::receiveHello()
                     getClientAddress(client_info));
                 return;
             }
-            catch (...)
+            catch (const Exception & e)
             {
+                if (e.code() != DB::ErrorCodes::WRONG_PASSWORD)
+                    throw;
+
                 tryLogCurrentException(log, "SSL authentication failed, falling back to password authentication");
             }
         }

From 67796b18bbd335a2b3f58968ea664ae7d2883653 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 15 Nov 2023 19:36:23 +0100
Subject: [PATCH 0610/1097] Add missing test config files

---
 .../configs/000-config_with_env_subst.xml       | 17 +++++++++++++++++
 .../configs/010-env_subst_override.xml          | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml
 create mode 100644 tests/integration/test_config_substitutions/configs/010-env_subst_override.xml

diff --git a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml
new file mode 100644
index 00000000000..ffa26488874
--- /dev/null
+++ b/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml
@@ -0,0 +1,17 @@
+<clickhouse>
+  <profiles>
+    <default>
+        <max_query_size from_env="MAX_QUERY_SIZE" />
+    </default>
+  </profiles>
+  <users>
+      <default>
+          <password></password>
+          <profile>default</profile>
+          <quota>default</quota>
+      </default>
+
+      <include incl="users_1" />
+      <include incl="users_2" />
+  </users>
+</clickhouse>
diff --git a/tests/integration/test_config_substitutions/configs/010-env_subst_override.xml b/tests/integration/test_config_substitutions/configs/010-env_subst_override.xml
new file mode 100644
index 00000000000..2d768b05172
--- /dev/null
+++ b/tests/integration/test_config_substitutions/configs/010-env_subst_override.xml
@@ -0,0 +1,17 @@
+<clickhouse>
+  <profiles>
+    <default>
+        <max_query_size>424242</max_query_size>
+    </default>
+  </profiles>
+  <users>
+      <default>
+          <password></password>
+          <profile>default</profile>
+          <quota>default</quota>
+      </default>
+
+      <include incl="users_1" />
+      <include incl="users_2" />
+  </users>
+</clickhouse>

From d862dfdf9c753e17896f1c3a9d5cb01e71a5cee3 Mon Sep 17 00:00:00 2001
From: Jianfei Hu <hujianfei258@gmail.com>
Date: Wed, 15 Nov 2023 18:38:23 +0000
Subject: [PATCH 0611/1097] fix comments

Signed-off-by: Jianfei Hu <hujianfei258@gmail.com>
---
 src/IO/S3/Credentials.cpp | 53 +++++++++++----------------------------
 src/IO/S3/Credentials.h   | 11 ++++----
 2 files changed, 21 insertions(+), 43 deletions(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index 7d6ed094486..bc336634114 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -1,9 +1,4 @@
-#include <exception>
-#include <variant>
 #include <IO/S3/Credentials.h>
-#include <boost/algorithm/string/classification.hpp>
-#include <Poco/Exception.h>
-#include "Common/Exception.h"
 
 #if USE_AWS_S3
 
@@ -21,22 +16,24 @@
 #    include <aws/core/platform/FileSystem.h>
 
 #    include <Common/logger_useful.h>
-
+#    include <Common/Exception.h>
 #    include <IO/S3/PocoHTTPClient.h>
 #    include <IO/S3/Client.h>
 
+#    include <exception>
+#    include <variant>
 #    include <fstream>
 #    include <base/EnumReflection.h>
 
-#include <boost/algorithm/string.hpp>
-#include <boost/algorithm/string/split.hpp>
-
-
-#include <Poco/URI.h>
-#include <Poco/Net/HTTPClientSession.h>
-#include <Poco/Net/HTTPRequest.h>
-#include <Poco/Net/HTTPResponse.h>
-#include <Poco/StreamCopier.h>
+#    include <boost/algorithm/string.hpp>
+#    include <boost/algorithm/string/split.hpp>
+#    include <boost/algorithm/string/classification.hpp>
+#    include <Poco/Exception.h>
+#    include <Poco/URI.h>
+#    include <Poco/Net/HTTPClientSession.h>
+#    include <Poco/Net/HTTPRequest.h>
+#    include <Poco/Net/HTTPResponse.h>
+#    include <Poco/StreamCopier.h>
 
 
 namespace DB
@@ -65,7 +62,7 @@ bool areCredentialsEmptyOrExpired(const Aws::Auth::AWSCredentials & credentials,
 }
 
 const char SSO_CREDENTIALS_PROVIDER_LOG_TAG[] = "SSOCredentialsProvider";
-const int AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS = 3;
+constexpr int AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS = 3;
 
 }
 
@@ -275,11 +272,11 @@ String getGCPAvailabilityZoneOrException()
     boost::split(zone_info, response_data, boost::is_any_of("/"));
     /// We expect GCP returns a string as "projects/123456789/zones/us-central1a".
     if (zone_info.size() != 4)
-        throw DB::Exception(ErrorCodes::GCP_ERROR, "Invalid format of GCP zone information, expect projects/<project-number>/zones/<zone-value>, got {}", response_data);
+        throw DB::Exception(ErrorCodes::GCP_ERROR, "Invalid format of GCP zone information, expect projects/<project-number>/zones/<zone-value>");
     return zone_info[3];
 }
 
-String getRunningAvailabilityZoneImpl()
+String getRunningAvailabilityZone()
 {
     LOG_INFO(&Poco::Logger::get("Application"), "Trying to detect the availability zone.");
     try
@@ -302,26 +299,6 @@ String getRunningAvailabilityZoneImpl()
     }
 }
 
-std::variant<String, std::exception_ptr> getRunningAvailabilityZoneImplOrException()
-{
-    try
-    {
-        return getRunningAvailabilityZoneImpl();
-    }
-    catch (...)
-    {
-        return std::current_exception();
-    }
-}
-
-String getRunningAvailabilityZone()
-{
-    static auto az_or_exception = getRunningAvailabilityZoneImplOrException();
-    if (const auto * az = std::get_if<String>(&az_or_exception))
-        return *az;
-    else
-        std::rethrow_exception(std::get<std::exception_ptr>(az_or_exception));
-}
 
 AWSEC2InstanceProfileConfigLoader::AWSEC2InstanceProfileConfigLoader(const std::shared_ptr<AWSEC2MetadataClient> & client_, bool use_secure_pull_)
     : client(client_)
diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h
index a978679348f..b1666e13757 100644
--- a/src/IO/S3/Credentials.h
+++ b/src/IO/S3/Credentials.h
@@ -1,12 +1,13 @@
 #pragma once
 
-#include <exception>
-#include <base/types.h>
-#include <variant>
 #include "config.h"
 
 #if USE_AWS_S3
 
+#    include <exception>
+#    include <base/types.h>
+#    include <variant>
+
 #    include <aws/core/client/ClientConfiguration.h>
 #    include <aws/core/internal/AWSHttpResourceClient.h>
 #    include <aws/core/config/AWSProfileConfigLoader.h>
@@ -22,7 +23,7 @@ namespace DB::S3
 inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120;
 
 /// In GCP metadata service can be accessed via DNS regardless of IPv4 or IPv6.
-static constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal";
+static inline constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal";
 
 /// getRunningAvailabilityZone returns the availability zone of the underlying compute resources where the current process runs.
 String getRunningAvailabilityZone();
@@ -59,7 +60,7 @@ public:
 
     virtual Aws::String getCurrentRegion() const;
 
-    friend String getRunningAvailabilityZoneImpl();
+    friend String getRunningAvailabilityZone();
 
 private:
     std::pair<Aws::String, Aws::Http::HttpResponseCode> getEC2MetadataToken(const std::string & user_agent_string) const;

From d0398e3c1d1f55281e65633a4947c807d3c0c022 Mon Sep 17 00:00:00 2001
From: Jianfei Hu <hujianfei258@gmail.com>
Date: Wed, 15 Nov 2023 18:47:28 +0000
Subject: [PATCH 0612/1097] remove variant header

Signed-off-by: Jianfei Hu <hujianfei258@gmail.com>
---
 src/IO/S3/Credentials.cpp | 1 -
 src/IO/S3/Credentials.h   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index bc336634114..9ab21465593 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -21,7 +21,6 @@
 #    include <IO/S3/Client.h>
 
 #    include <exception>
-#    include <variant>
 #    include <fstream>
 #    include <base/EnumReflection.h>
 
diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h
index b1666e13757..1f35443adf4 100644
--- a/src/IO/S3/Credentials.h
+++ b/src/IO/S3/Credentials.h
@@ -6,7 +6,6 @@
 
 #    include <exception>
 #    include <base/types.h>
-#    include <variant>
 
 #    include <aws/core/client/ClientConfiguration.h>
 #    include <aws/core/internal/AWSHttpResourceClient.h>

From fc6a25fad562ccfcec76e83aa040b3d3ba2b9039 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 15 Nov 2023 21:54:19 +0300
Subject: [PATCH 0613/1097] Update src/Compression/CompressionCodecFPC.cpp

Co-authored-by: vdimir <vdimir@clickhouse.com>
---
 src/Compression/CompressionCodecFPC.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index ec8efa0fb38..7246082caea 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -440,7 +440,7 @@ private:
         first = decompressValue(value1, is_dfcm_predictor1);
         second = decompressValue(value2, is_dfcm_predictor2);
 
-        return 1 + tail_size1 + tail_size2;
+        return expected_size;
     }
 
     static void* valueTail(TUInt& value, UInt32 compressed_size)

From 83acec9511e082bd872f2968ec90eb4579abaf57 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 15 Nov 2023 18:58:14 +0000
Subject: [PATCH 0614/1097] Fix max_threads = 1 case

---
 src/Processors/QueryPlan/SortingStep.cpp      |  2 +-
 ...568_window_functions_distributed.reference | 19 +++++++++++++++++++
 .../01568_window_functions_distributed.sql    |  2 ++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index e94e818a4aa..e7149aa8848 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -336,7 +336,7 @@ void SortingStep::fullSort(
     fullSortStreams(pipeline, sort_settings, result_sort_desc, limit_, skip_partial_sort);
 
     /// If there are several streams, then we merge them into one
-    if (pipeline.getNumStreams() > 1 && partition_by_description.empty())
+    if ((pipeline.getNumStreams() > 1 && partition_by_description.empty()) || pipeline.getNumThreads() == 1)
     {
         auto transform = std::make_shared<MergingSortedTransform>(
             pipeline.getHeader(),
diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.reference b/tests/queries/0_stateless/01568_window_functions_distributed.reference
index f0fe6884201..29ff2e7133c 100644
--- a/tests/queries/0_stateless/01568_window_functions_distributed.reference
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.reference
@@ -51,6 +51,25 @@ select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,
 42	8
 42	8
 42	8
+select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y SETTINGS max_threads = 1;
+6	2
+6	2
+6	2
+6	2
+6	2
+6	2
+24	5
+24	5
+24	5
+24	5
+24	5
+24	5
+42	8
+42	8
+42	8
+42	8
+42	8
+42	8
 select distinct sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 6	2
 24	5
diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.sql b/tests/queries/0_stateless/01568_window_functions_distributed.sql
index dba450716a0..ecce7b412ba 100644
--- a/tests/queries/0_stateless/01568_window_functions_distributed.sql
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.sql
@@ -19,6 +19,8 @@ select sum(number) over w, max(number) over w from t_01568 window w as (partitio
 
 select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 
+select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y SETTINGS max_threads = 1;
+
 select distinct sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 
 -- window functions + aggregation w/shards

From e37fdaad3330356dc96d68c9ebead9e71ab3ed7a Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 15 Nov 2023 19:17:02 +0000
Subject: [PATCH 0615/1097] fix issues with history and errors

---
 programs/server/dashboard.html | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html
index f3096d72e14..72df69ed809 100644
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@@ -1081,7 +1081,6 @@ function showAuthError(message) {
     charts.style.opacity = '0';
     document.getElementById('add').style.display = 'none';
     document.getElementById('edit').style.display = 'none';
-    document.getElementById('search-span').style.display = 'none';
 
     const authError = document.getElementById('auth-error');
     authError.textContent = message;
@@ -1182,12 +1181,13 @@ function enableButtons() {
 async function reloadAll(do_search) {
     disableButtons();
     try {
-        if (do_search) {
-            await searchQueries(document.getElementById('search-query').value);
-        }
         updateParams();
-        await drawAll();
+        search_query = document.getElementById('search-query').value;
         saveState();
+        if (do_search) {
+            await searchQueries();
+        }
+        await drawAll();
     } catch (e) {
         showAuthError(e.toString());
     }
@@ -1207,9 +1207,7 @@ function saveState() {
         window.location.pathname + (window.location.search || '') + '#' + btoa(JSON.stringify(state)));
 }
 
-async function searchQueries(query) {
-    search_query = query;
-
+async function searchQueries() {
     let {data, error} = await doFetch(search_query);
     if (error) {
         throw new Error(error);
@@ -1270,10 +1268,11 @@ if (window.location.hash) {
 async function start() {
     try {
         if (queries.length == 0) {
-            await searchQueries(search_query);
+            await searchQueries();
         } else {
             regenerate();
         }
+        saveState();
         let new_theme = window.localStorage.getItem('theme');
         if (new_theme && new_theme != theme) {
             setTheme(new_theme);

From 592c653caca58a95094505ee8015505980a5d01f Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 15 Nov 2023 20:26:07 +0000
Subject: [PATCH 0616/1097] Fixup

---
 src/Processors/QueryPlan/SortingStep.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index e7149aa8848..641b9036d4c 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -336,7 +336,7 @@ void SortingStep::fullSort(
     fullSortStreams(pipeline, sort_settings, result_sort_desc, limit_, skip_partial_sort);
 
     /// If there are several streams, then we merge them into one
-    if ((pipeline.getNumStreams() > 1 && partition_by_description.empty()) || pipeline.getNumThreads() == 1)
+    if (pipeline.getNumStreams() > 1 && (partition_by_description.empty() || pipeline.getNumThreads() == 1))
     {
         auto transform = std::make_shared<MergingSortedTransform>(
             pipeline.getHeader(),

From ea92dbb1c74c700b4df4172999d6ca504ff593bf Mon Sep 17 00:00:00 2001
From: Jianfei Hu <hujianfei258@gmail.com>
Date: Wed, 15 Nov 2023 19:18:38 +0000
Subject: [PATCH 0617/1097] fix build for non USE_S3 case

Signed-off-by: Jianfei Hu <hujianfei258@gmail.com>
---
 src/IO/S3/Credentials.cpp | 6 ++++--
 src/IO/S3/Credentials.h   | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index 9ab21465593..e25f4551723 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -784,15 +784,17 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
 
 #else
 
+#    include <stdexcept>
+
 namespace DB
 {
 
 namespace S3
 {
 
-String getRunningAvailabilityZone()
+std::string getRunningAvailabilityZone()
 {
-    throw Poco::Exception("Does not support availability zone detection for non-cloud environment");
+    throw std::runtime_error("Does not support availability zone detection for non-cloud environment");
 }
 
 }
diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h
index 1f35443adf4..d8d103a847a 100644
--- a/src/IO/S3/Credentials.h
+++ b/src/IO/S3/Credentials.h
@@ -25,7 +25,7 @@ inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120;
 static inline constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal";
 
 /// getRunningAvailabilityZone returns the availability zone of the underlying compute resources where the current process runs.
-String getRunningAvailabilityZone();
+std::string getRunningAvailabilityZone();
 
 class AWSEC2MetadataClient : public Aws::Internal::AWSHttpResourceClient
 {
@@ -189,12 +189,14 @@ public:
 
 #else
 
+#    include <string>
+
 namespace DB
 {
 
 namespace S3
 {
-String getRunningAvailabilityZone();
+std::string getRunningAvailabilityZone();
 }
 
 }

From 1e464609107105bce48ef990eaacd1a41ddc43eb Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Thu, 16 Nov 2023 05:27:05 +0800
Subject: [PATCH 0618/1097] Fix nullable primary key in final (#56452)

---
 src/Processors/QueryPlan/PartsSplitter.cpp    | 36 ++++++++++---------
 ...02867_nullable_primary_key_final.reference |  1 +
 .../02867_nullable_primary_key_final.sql      |  8 +++++
 3 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
index 90f6f49826c..8bf877cf8b9 100644
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -241,10 +241,6 @@ ASTs buildFilters(const KeyDescription & primary_key, const std::vector<Values>
         ASTs values_ast;
         for (size_t i = 0; i < values.size(); ++i)
         {
-            /// NULL is treated as a terminator for > comparison.
-            if (values[i].isNull())
-                break;
-
             const auto & type = primary_key.data_types.at(i);
 
             // PK may contain functions of the table columns, so we need the actual PK AST with all expressions it contains.
@@ -255,25 +251,31 @@ ASTs buildFilters(const KeyDescription & primary_key, const std::vector<Values>
             if (type->isNullable())
             {
                 pks_ast.push_back(makeASTFunction("isNull", pk_ast));
-                values_ast.push_back(std::make_shared<ASTLiteral>(0));
+                values_ast.push_back(std::make_shared<ASTLiteral>(values[i].isNull() ? 1 : 0));
                 pk_ast = makeASTFunction("assumeNotNull", pk_ast);
             }
 
-            ASTPtr component_ast = std::make_shared<ASTLiteral>(values[i]);
-            auto decayed_type = removeNullable(removeLowCardinality(primary_key.data_types.at(i)));
-            // Values of some types (e.g. Date, DateTime) are stored in columns as numbers and we get them as just numbers from the index.
-            // So we need an explicit Cast for them.
-            if (isColumnedAsNumber(decayed_type->getTypeId()) && !isNumber(decayed_type->getTypeId()))
-                component_ast = makeASTFunction("cast", std::move(component_ast), std::make_shared<ASTLiteral>(decayed_type->getName()));
+            pks_ast.push_back(pk_ast);
 
-            pks_ast.push_back(std::move(pk_ast));
-            values_ast.push_back(std::move(component_ast));
+            // If value is null, the comparison is already complete by looking at the null mask column.
+            // Here we put the pk_ast as a placeholder: (pk_null_mask, pk_ast_not_null) > (value_is_null?, pk_ast_not_null).
+            if (values[i].isNull())
+            {
+                values_ast.push_back(pk_ast);
+            }
+            else
+            {
+                ASTPtr component_ast = std::make_shared<ASTLiteral>(values[i]);
+                auto decayed_type = removeNullable(removeLowCardinality(primary_key.data_types.at(i)));
+                // Values of some types (e.g. Date, DateTime) are stored in columns as numbers and we get them as just numbers from the index.
+                // So we need an explicit Cast for them.
+                if (isColumnedAsNumber(decayed_type->getTypeId()) && !isNumber(decayed_type->getTypeId()))
+                    component_ast = makeASTFunction("cast", std::move(component_ast), std::make_shared<ASTLiteral>(decayed_type->getName()));
+
+                values_ast.push_back(std::move(component_ast));
+            }
         }
 
-        /// It indicates (pk1, ...) > (NULL, ...), which is an always false predicate.
-        if (pks_ast.empty())
-            return std::make_shared<ASTLiteral>(0u);
-
         ASTPtr pk_columns_as_tuple = makeASTFunction("tuple", pks_ast);
         ASTPtr values_as_tuple = makeASTFunction("tuple", values_ast);
 
diff --git a/tests/queries/0_stateless/02867_nullable_primary_key_final.reference b/tests/queries/0_stateless/02867_nullable_primary_key_final.reference
index 2e55b120f6e..035932e1bb4 100644
--- a/tests/queries/0_stateless/02867_nullable_primary_key_final.reference
+++ b/tests/queries/0_stateless/02867_nullable_primary_key_final.reference
@@ -1,2 +1,3 @@
 2023-09-01	2500000000
 2023-09-01	166167
+10
diff --git a/tests/queries/0_stateless/02867_nullable_primary_key_final.sql b/tests/queries/0_stateless/02867_nullable_primary_key_final.sql
index 05677789459..773a6d35b8d 100644
--- a/tests/queries/0_stateless/02867_nullable_primary_key_final.sql
+++ b/tests/queries/0_stateless/02867_nullable_primary_key_final.sql
@@ -55,3 +55,11 @@ WHERE f2 = 'x'
 GROUP BY 1;
 
 DROP TABLE t;
+
+CREATE TABLE t (o Nullable(String), p Nullable(String)) ENGINE = ReplacingMergeTree ORDER BY (p, o) SETTINGS allow_nullable_key = 1, index_granularity = 2;
+
+INSERT INTO t SELECT number, NULL FROM numbers(10);
+
+SELECT count() FROM t FINAL;
+
+DROP TABLE t;

From 2508e91856ab9d5745415e30ded0c9a62f2c6c63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 15 Nov 2023 23:10:25 +0100
Subject: [PATCH 0619/1097] Fix TSAN race in transform

---
 src/Functions/transform.cpp | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index b7582b37017..f1d2b60f1f4 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -154,7 +154,7 @@ namespace
         ColumnPtr executeImpl(
             const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
         {
-            initialize(arguments, result_type);
+            std::call_once(once, [&] { initialize(arguments, result_type); });
 
             const auto * in = arguments[0].column.get();
 
@@ -672,11 +672,9 @@ namespace
             ColumnPtr default_column;
 
             bool is_empty = false;
-            bool initialized = false;
-
-            std::mutex mutex;
         };
 
+        mutable std::once_flag once;
         mutable Cache cache;
 
 
@@ -706,10 +704,6 @@ namespace
         /// Can be called from different threads. It works only on the first call.
         void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
         {
-            std::lock_guard lock(cache.mutex);
-            if (cache.initialized)
-                return;
-
             const DataTypePtr & from_type = arguments[0].type;
 
             if (from_type->onlyNull())
@@ -824,8 +818,6 @@ namespace
                     }
                 }
             }
-
-            cache.initialized = true;
         }
     };
 

From 6c013f835e8c2643c44e1d8fd9843693f5a02090 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 15 Nov 2023 23:12:51 +0100
Subject: [PATCH 0620/1097] Add reproducer

---
 tests/queries/0_stateless/02917_transform_tsan.reference | 4 ++++
 tests/queries/0_stateless/02917_transform_tsan.sql       | 2 ++
 2 files changed, 6 insertions(+)
 create mode 100644 tests/queries/0_stateless/02917_transform_tsan.reference
 create mode 100644 tests/queries/0_stateless/02917_transform_tsan.sql

diff --git a/tests/queries/0_stateless/02917_transform_tsan.reference b/tests/queries/0_stateless/02917_transform_tsan.reference
new file mode 100644
index 00000000000..896ae5f0269
--- /dev/null
+++ b/tests/queries/0_stateless/02917_transform_tsan.reference
@@ -0,0 +1,4 @@
+\N
+\N
+\N
+\N
diff --git a/tests/queries/0_stateless/02917_transform_tsan.sql b/tests/queries/0_stateless/02917_transform_tsan.sql
new file mode 100644
index 00000000000..dac79f83d6a
--- /dev/null
+++ b/tests/queries/0_stateless/02917_transform_tsan.sql
@@ -0,0 +1,2 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/56815
+SELECT transform(arrayJoin([NULL, NULL]), [NULL, NULL], [NULL]) GROUP BY GROUPING SETS (('0.1'), ('-0.2147483647'));

From 4ff82528f1824510c9dabe070f51b93ea0297218 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Thu, 16 Nov 2023 00:01:36 +0100
Subject: [PATCH 0621/1097] small fixes after review

---
 src/Common/parseGlobs.cpp    | 27 +++++++++++++++------------
 src/Storages/StorageFile.cpp |  8 ++++----
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp
index e19393478b4..834111ec754 100644
--- a/src/Common/parseGlobs.cpp
+++ b/src/Common/parseGlobs.cpp
@@ -126,31 +126,32 @@ namespace
 {
 void expandSelectorGlobImpl(const std::string & path, std::vector<std::string> & for_match_paths_expanded)
 {
-    /// regexp for {expr1,expr2,....};
+    /// regexp for {expr1,expr2,....} (a selector glob);
     /// expr1, expr2,... cannot contain any of these: '{', '}', ','
     static const re2::RE2 selector_regex(R"({([^{}*,]+,[^{}*]*[^{}*,])})");
 
     std::string_view path_view(path);
     std::string_view matched;
 
+    // No (more) selector globs found, quit
     if (!RE2::FindAndConsume(&path_view, selector_regex, &matched))
     {
         for_match_paths_expanded.push_back(path);
         return;
     }
 
-    Strings expanded_paths;
-
     std::vector<size_t> anchor_positions;
-    bool opened = false, closed = false;
+    bool opened = false;
+    bool closed = false;
 
-    for (std::string::const_iterator it = path.begin(); it != path.end(); it++)
+    // Looking for first occurrence of {} selector: write down positions of {, } and all intermediate commas
+    for (auto it = path.begin(); it != path.end(); ++it)
     {
         if (*it == '{')
         {
             if (opened)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                "Unexpected '{{' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
+                                "Unexpected '{{' found in path '{}' at position {}.", path, it - path.begin());
             anchor_positions.push_back(std::distance(path.begin(), it));
             opened = true;
         }
@@ -158,7 +159,7 @@ void expandSelectorGlobImpl(const std::string & path, std::vector<std::string> &
         {
             if (!opened)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                "Unexpected '}}' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
+                                "Unexpected '}}' found in path '{}' at position {}.", path, it - path.begin());
             anchor_positions.push_back(std::distance(path.begin(), it));
             closed = true;
             break;
@@ -175,13 +176,15 @@ void expandSelectorGlobImpl(const std::string & path, std::vector<std::string> &
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
                         "Invalid {{}} glob in path {}.", path);
 
-    std::string common_prefix = path.substr(0, anchor_positions[0]);
-    std::string common_suffix = path.substr(anchor_positions[anchor_positions.size()-1] + 1);
+    // generate result: prefix/{a,b,c}/suffix -> [prefix/a/suffix, prefix/b/suffix, prefix/c/suffix]
+    std::string common_prefix = path.substr(0, anchor_positions.front());
+    std::string common_suffix = path.substr(anchor_positions.back() + 1);
     for (size_t i = 1; i < anchor_positions.size(); ++i)
     {
-        std::string expanded_matcher = common_prefix
-                                       + path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
-                                       + common_suffix;
+        std::string current_selection =
+                path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1));
+
+        std::string expanded_matcher = common_prefix + current_selection + common_suffix;
         expandSelectorGlobImpl(expanded_matcher, for_match_paths_expanded);
     }
 }
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 2dffdfa8be9..dd527a50794 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -114,7 +114,7 @@ void listFilesWithRegexpMatchingImpl(
     const std::string & for_match,
     size_t & total_bytes_to_read,
     std::vector<std::string> & result,
-    bool recursive = false)
+    bool recursive)
 {
     const size_t first_glob_pos = for_match.find_first_of("*?{");
 
@@ -128,7 +128,7 @@ void listFilesWithRegexpMatchingImpl(
         catch (const std::exception &) // NOLINT
         {
             /// There is no such file, but we just ignore this.
-//            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", for_match);
+            /// throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", for_match);
         }
         return;
     }
@@ -185,7 +185,7 @@ void listFilesWithRegexpMatchingImpl(
             else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher))
                 /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
                 listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash_after_glob_pos),
-                                                total_bytes_to_read, result);
+                                                total_bytes_to_read, result, false);
         }
     }
 }
@@ -199,7 +199,7 @@ std::vector<std::string> listFilesWithRegexpMatching(
     Strings for_match_paths_expanded = expandSelectionGlob(for_match);
 
     for (const auto & for_match_expanded : for_match_paths_expanded)
-        listFilesWithRegexpMatchingImpl("/", for_match_expanded, total_bytes_to_read, result);
+        listFilesWithRegexpMatchingImpl("/", for_match_expanded, total_bytes_to_read, result, false);
 
     return result;
 }

From 3bbb329dd0f091b49a6dd771820110cde0e3a052 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 16 Nov 2023 00:13:05 +0100
Subject: [PATCH 0622/1097] Fix tests

---
 src/Server/TCPHandler.cpp                       | 15 +++++++++++++--
 tests/queries/0_stateless/01119_session_log.sql |  2 +-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index e7c40092077..884fc45f763 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -587,8 +587,19 @@ void TCPHandler::runImpl()
         }
         catch (const Exception & e)
         {
-            /// Authentication failure with interserver secret.
-            if (e.code() == ErrorCodes::AUTHENTICATION_FAILED)
+            /// Authentication failure with interserver secret
+            /// - early exit without trying to send the exception to the client.
+            /// Because the server should not try to skip (parse, decompress) the remaining packets sent by the client,
+            /// as it will lead to additional work and unneeded exposure to unauthenticated connections.
+
+            /// Note that the exception AUTHENTICATION_FAILED can be here in two cases:
+            /// 1. The authentication in receiveHello is skipped with "interserver secret",
+            /// postponed to receiving the query, and then failed.
+            /// 2. Receiving exception from a query using a table function to authenticate with another server.
+            /// In this case, the user is already authenticated with this server,
+            /// is_interserver_mode is false, and we can send the exception to the client normally.
+
+            if (is_interserver_mode && e.code() == ErrorCodes::AUTHENTICATION_FAILED)
                 throw;
 
             state.io.onException();
diff --git a/tests/queries/0_stateless/01119_session_log.sql b/tests/queries/0_stateless/01119_session_log.sql
index 55f6228797a..8f6967b89ec 100644
--- a/tests/queries/0_stateless/01119_session_log.sql
+++ b/tests/queries/0_stateless/01119_session_log.sql
@@ -4,7 +4,7 @@ select * from remote('127.0.0.2', system, one, 'default', '');
 select * from remote('127.0.0.2', system, one, 'default', 'wrong password'); -- { serverError AUTHENTICATION_FAILED }
 select * from remote('127.0.0.2', system, one, 'nonexistsnt_user_1119', ''); -- { serverError AUTHENTICATION_FAILED }
 set receive_timeout=1;
-select * from remote('127.0.0.2', system, one, ' INTERSERVER SECRET ', ''); -- { serverError NO_REMOTE_SHARD_AVAILABLE }
+select * from remote('127.0.0.2', system, one, ' INTERSERVER SECRET ', ''); -- { serverError AUTHENTICATION_FAILED }
 set receive_timeout=300;
 select * from remote('127.0.0.2', system, one, '   ', ''); -- { serverError AUTHENTICATION_FAILED }
 

From 09bec3c754698f2c7e3cf37a9b02018c7adddc33 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 16 Nov 2023 00:16:02 +0100
Subject: [PATCH 0623/1097] Fix integration test

---
 .../test_distributed_inter_server_secret/test.py   | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py
index 4276fcffbf9..6e3f1e6e416 100644
--- a/tests/integration/test_distributed_inter_server_secret/test.py
+++ b/tests/integration/test_distributed_inter_server_secret/test.py
@@ -304,26 +304,20 @@ def test_secure_insert_buffer_async():
 
 
 def test_secure_disagree():
-    with pytest.raises(
-        QueryRuntimeException, match=".*Interserver authentication failed.*"
-    ):
+    with pytest.raises(QueryRuntimeException):
         n1.query("SELECT * FROM dist_secure_disagree")
 
 
 def test_secure_disagree_insert():
     n1.query("TRUNCATE TABLE data")
     n1.query("INSERT INTO dist_secure_disagree SELECT * FROM numbers(2)")
-    with pytest.raises(
-        QueryRuntimeException, match=".*Interserver authentication failed.*"
-    ):
+    with pytest.raises(QueryRuntimeException):
         n1.query(
             "SYSTEM FLUSH DISTRIBUTED ON CLUSTER secure_disagree dist_secure_disagree"
         )
-    # check the the connection will be re-established
+    # check that the connection will be re-established
     # IOW that we will not get "Unknown BlockInfo field"
-    with pytest.raises(
-        QueryRuntimeException, match=".*Interserver authentication failed.*"
-    ):
+    with pytest.raises(QueryRuntimeException):
         assert int(n1.query("SELECT count() FROM dist_secure_disagree")) == 0
 
 
From e0f6c684655be08ed37139438fadeb7b85048bce Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <thevar1able@users.noreply.github.com>
Date: Thu, 16 Nov 2023 02:51:29 +0300
Subject: [PATCH 0624/1097] Lint

---
 src/Common/Config/ConfigProcessor.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index 2e7372fa9a2..c213b7257d9 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -334,10 +334,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
                     bool source_has_value = with_element.hasChildNodes();
                     if (source_has_value)
                         for (const auto & attr_name: SUBSTITUTION_ATTRS)
-                        {
-                            if (config_element.hasAttribute(attr_name))
-                                config_element.removeAttribute(attr_name);
-                        }
+                            config_element.removeAttribute(attr_name);
 
                     mergeAttributes(config_element, with_element);
                     mergeRecursive(config, config_node, with_node);

From 1c4f2454e4d53a52d4fc0b2b492aa77c1e447ab9 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 16 Nov 2023 01:38:42 +0000
Subject: [PATCH 0625/1097] Disable test with database Ordinary

---
 .../queries/0_stateless/02888_replicated_merge_tree_creation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh
index b3ccef5de30..b9603e75d2e 100755
--- a/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh
+++ b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: zookeeper, no-parallel
+# Tags: zookeeper, no-parallel, no-ordinary-database
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From db666bf2bc063eac1668258f890a6766dc3c4430 Mon Sep 17 00:00:00 2001
From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com>
Date: Wed, 15 Nov 2023 21:47:35 -0400
Subject: [PATCH 0626/1097] Disables RU intro section.

---
 docs/ru/introduction/_category_.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/docs/ru/introduction/_category_.yml b/docs/ru/introduction/_category_.yml
index 539f7ab97ed..b3e58207c12 100644
--- a/docs/ru/introduction/_category_.yml
+++ b/docs/ru/introduction/_category_.yml
@@ -2,6 +2,3 @@ position: 1
 label: 'Введение'
 collapsible: true
 collapsed: true
-link:
-  type: generated-index
-  title: Введение

From f505181b0d8c485ee02d3e62fc057c22d5d188cc Mon Sep 17 00:00:00 2001
From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com>
Date: Wed, 15 Nov 2023 22:08:10 -0400
Subject: [PATCH 0627/1097] Adds basics index page to RU introduction.

---
 docs/ru/introduction/index.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 docs/ru/introduction/index.md

diff --git a/docs/ru/introduction/index.md b/docs/ru/introduction/index.md
new file mode 100644
index 00000000000..74a6e4dd135
--- /dev/null
+++ b/docs/ru/introduction/index.md
@@ -0,0 +1,13 @@
+---
+slug: /ru/introduction/
+sidebar_label: "Введение"
+sidebar_position: 8
+---
+
+# Введение
+
+В этом разделе содержится информация о том, как начать работу с ClickHouse.
+
+- [Отличительные возможности ClickHouse](./distinctive-features.md)
+- [Производительность](./performance.md)
+- [История ClickHouse](./history.md)

From ef17d972ab64c90ff72a7c6c2beba61b522d6fcf Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 16 Nov 2023 05:18:50 +0000
Subject: [PATCH 0628/1097] Fix SET query formatting

---
 src/Parsers/ASTSetQuery.cpp                   | 48 ++++++++++++++++++-
 .../02916_set_formatting.reference            | 11 +++++
 .../0_stateless/02916_set_formatting.sql      | 13 +++++
 3 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02916_set_formatting.reference
 create mode 100644 tests/queries/0_stateless/02916_set_formatting.sql

diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp
index e2c60e8369d..78161b865ee 100644
--- a/src/Parsers/ASTSetQuery.cpp
+++ b/src/Parsers/ASTSetQuery.cpp
@@ -4,11 +4,57 @@
 #include <Common/FieldVisitorHash.h>
 #include <Common/FieldVisitorToString.h>
 #include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>
 
 
 namespace DB
 {
 
+class FieldVisitorToSetting : public StaticVisitor<String>
+{
+public:
+    template <class T>
+    String operator() (const T & x) const
+    {
+        FieldVisitorToString visitor;
+        return visitor(x);
+    }
+
+    String operator() (const Map & x) const
+    {
+        WriteBufferFromOwnString wb;
+
+        wb << '{';
+
+        auto it = x.begin();
+        while (it != x.end())
+        {
+            if (it != x.begin())
+                wb << ", ";
+            wb << applyVisitor(*this, *it);
+            ++it;
+        }
+        wb << '}';
+
+        return wb.str();
+    }
+
+    String operator() (const Tuple & x) const
+    {
+        WriteBufferFromOwnString wb;
+
+        for (auto it = x.begin(); it != x.end(); ++it)
+        {
+            if (it != x.begin())
+                wb << ":";
+            wb << applyVisitor(*this, *it);
+        }
+
+        return wb.str();
+    }
+};
+
+
 void ASTSetQuery::updateTreeHashImpl(SipHash & hash_state, bool /*ignore_aliases*/) const
 {
     for (const auto & change : changes)
@@ -38,7 +84,7 @@ void ASTSetQuery::formatImpl(const FormatSettings & format, FormatState &, Forma
         if (!format.show_secrets && change.value.tryGet<CustomType>(custom) && custom.isSecret())
             format.ostr << " = " << custom.toString(false);
         else
-            format.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value);
+            format.ostr << " = " << applyVisitor(FieldVisitorToSetting(), change.value);
     }
 
     for (const auto & setting_name : default_settings)
diff --git a/tests/queries/0_stateless/02916_set_formatting.reference b/tests/queries/0_stateless/02916_set_formatting.reference
new file mode 100644
index 00000000000..34ff52365f9
--- /dev/null
+++ b/tests/queries/0_stateless/02916_set_formatting.reference
@@ -0,0 +1,11 @@
+SET additional_table_filters = {\'kjsnckjn\':\'ksanmn\', \'dkm\':\'dd\'}
+SELECT v FROM t1 SETTINGS additional_table_filters = {\'default.t1\':\'s\'}
+Row 1:
+──────
+statement: CREATE VIEW default.v1
+(
+    `v` UInt64
+) AS
+SELECT v
+FROM default.t1
+SETTINGS additional_table_filters = {'default.t1':'s != \'s1%\''}
diff --git a/tests/queries/0_stateless/02916_set_formatting.sql b/tests/queries/0_stateless/02916_set_formatting.sql
new file mode 100644
index 00000000000..10b875293f1
--- /dev/null
+++ b/tests/queries/0_stateless/02916_set_formatting.sql
@@ -0,0 +1,13 @@
+SELECT formatQuerySingleLine('set additional_table_filters = {\'kjsnckjn\': \'ksanmn\', \'dkm\': \'dd\'}');
+SELECT formatQuerySingleLine('SELECT v FROM t1 SETTINGS additional_table_filters = {\'default.t1\': \'s\'}');
+
+DROP TABLE IF EXISTS t1;
+DROP VIEW IF EXISTS v1;
+
+CREATE TABLE t1 (v UInt64, s String) ENGINE=MergeTree() ORDER BY v;
+CREATE VIEW v1 (v UInt64) AS SELECT v FROM t1 SETTINGS additional_table_filters = {'default.t1': 's != \'s1%\''};
+
+SHOW CREATE TABLE v1 FORMAT Vertical;
+
+DROP VIEW v1;
+DROP TABLE t1;

From 1d7eecaeece1bb2cdadf8c446bd8631592ecfb08 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 16 Nov 2023 06:08:12 +0000
Subject: [PATCH 0629/1097] Fix failure to start due to table dependency in
 joinGet

---
 src/Databases/DDLLoadingDependencyVisitor.cpp | 16 +++++------
 .../02916_joinget_dependency.reference        |  1 +
 .../0_stateless/02916_joinget_dependency.sh   | 27 +++++++++++++++++++
 3 files changed, 36 insertions(+), 8 deletions(-)
 create mode 100644 tests/queries/0_stateless/02916_joinget_dependency.reference
 create mode 100755 tests/queries/0_stateless/02916_joinget_dependency.sh

diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp
index fc362dd8578..77a40f674fd 100644
--- a/src/Databases/DDLLoadingDependencyVisitor.cpp
+++ b/src/Databases/DDLLoadingDependencyVisitor.cpp
@@ -144,22 +144,22 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction
 
     const auto * arg = function.arguments->as<ASTExpressionList>()->children[arg_idx].get();
 
-    if (const auto * dict_function = arg->as<ASTFunction>())
+    if (const auto * function_arg = arg->as<ASTFunction>())
     {
-        if (!functionIsDictGet(dict_function->name))
+        if (!functionIsJoinGet(function_arg->name) && !functionIsDictGet(function_arg->name))
             return;
 
-        /// Get the dictionary name from `dict*` function.
-        const auto * literal_arg = dict_function->arguments->as<ASTExpressionList>()->children[0].get();
-        const auto * dictionary_name = literal_arg->as<ASTLiteral>();
+        /// Get the dictionary name from `dict*` function or the table name from 'joinGet' function.
+        const auto * literal_arg = function_arg->arguments->as<ASTExpressionList>()->children[0].get();
+        const auto * name = literal_arg->as<ASTLiteral>();
 
-        if (!dictionary_name)
+        if (!name)
             return;
 
-        if (dictionary_name->value.getType() != Field::Types::String)
+        if (name->value.getType() != Field::Types::String)
             return;
 
-        auto maybe_qualified_name = QualifiedTableName::tryParseFromString(dictionary_name->value.get<String>());
+        auto maybe_qualified_name = QualifiedTableName::tryParseFromString(name->value.get<String>());
         if (!maybe_qualified_name)
             return;
 
diff --git a/tests/queries/0_stateless/02916_joinget_dependency.reference b/tests/queries/0_stateless/02916_joinget_dependency.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02916_joinget_dependency.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02916_joinget_dependency.sh b/tests/queries/0_stateless/02916_joinget_dependency.sh
new file mode 100755
index 00000000000..6477ae8c967
--- /dev/null
+++ b/tests/queries/0_stateless/02916_joinget_dependency.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# We test the dependency on the DROP
+
+$CLICKHOUSE_CLIENT -nm -q "
+    DROP TABLE IF EXISTS Sub_distributed;
+    DROP TABLE IF EXISTS Sub;
+    DROP TABLE IF EXISTS Mapping;
+
+    CREATE TABLE Mapping (Id UInt64, RegionId UInt64) ENGINE = Join(ANY,LEFT,Id);
+    INSERT INTO Mapping VALUES (1,1);
+    CREATE TABLE Sub (Id UInt64, PropertyId UInt64) ENGINE = MergeTree() PRIMARY KEY (Id) ORDER BY (Id);
+    CREATE TABLE Sub_distributed (Id UInt64, PropertyId UInt64)ENGINE = Distributed('test_shard_localhost', $CLICKHOUSE_DATABASE, Sub, joinGet('$CLICKHOUSE_DATABASE.Mapping','RegionId',PropertyId));"
+
+$CLICKHOUSE_CLIENT -q "
+    DROP TABLE Mapping;
+" 2>&1 | grep -cm1 "HAVE_DEPENDENT_OBJECTS"
+
+$CLICKHOUSE_CLIENT -nm -q "
+    DROP TABLE Sub_distributed;
+    DROP TABLE Sub;
+    DROP TABLE Mapping;
+"
\ No newline at end of file

From 7623153d3841d58f282915e24f9d032ee6d84e94 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 16 Nov 2023 07:01:49 +0000
Subject: [PATCH 0630/1097] Fix flattening existing Nested columns during ADD
 COLUMN

---
 src/Storages/AlterCommands.cpp                | 21 +++++++++++++++----
 .../02916_addcolumn_nested.reference          |  3 +++
 .../0_stateless/02916_addcolumn_nested.sql    | 17 +++++++++++++++
 3 files changed, 37 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02916_addcolumn_nested.reference
 create mode 100644 tests/queries/0_stateless/02916_addcolumn_nested.sql

diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 98bfa3b3f57..7eeaa2d4594 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -395,11 +395,24 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
 
         column.ttl = ttl;
 
-        metadata.columns.add(column, after_column, first);
-
-        /// Slow, because each time a list is copied
         if (context->getSettingsRef().flatten_nested)
-            metadata.columns.flattenNested();
+        {
+            StorageInMemoryMetadata temporary_metadata;
+            temporary_metadata.columns.add(column, /*after_column*/ "", /*first*/ true);
+            temporary_metadata.columns.flattenNested();
+
+            const auto transformed_columns = temporary_metadata.columns.getAll();
+
+            for (auto it = transformed_columns.rbegin(); it != transformed_columns.rend(); it++)
+            {
+                const auto & transformed_column = temporary_metadata.columns.get(it->name);
+                metadata.columns.add(transformed_column, after_column, first);
+            }
+        }
+        else
+        {
+            metadata.columns.add(column, after_column, first);
+        }
     }
     else if (type == DROP_COLUMN)
     {
diff --git a/tests/queries/0_stateless/02916_addcolumn_nested.reference b/tests/queries/0_stateless/02916_addcolumn_nested.reference
new file mode 100644
index 00000000000..869d4336c62
--- /dev/null
+++ b/tests/queries/0_stateless/02916_addcolumn_nested.reference
@@ -0,0 +1,3 @@
+CREATE TABLE default.nested_table\n(\n    `id` UInt64,\n    `first` Nested(a Int8, b String)\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
+CREATE TABLE default.nested_table\n(\n    `id` UInt64,\n    `second.c` Array(Int8),\n    `second.d` Array(String),\n    `first` Nested(a Int8, b String)\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
+CREATE TABLE default.nested_table\n(\n    `third` Nested(e Int8, f String),\n    `id` UInt64,\n    `second.c` Array(Int8),\n    `second.d` Array(String),\n    `first` Nested(a Int8, b String)\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
diff --git a/tests/queries/0_stateless/02916_addcolumn_nested.sql b/tests/queries/0_stateless/02916_addcolumn_nested.sql
new file mode 100644
index 00000000000..b23854824b5
--- /dev/null
+++ b/tests/queries/0_stateless/02916_addcolumn_nested.sql
@@ -0,0 +1,17 @@
+SET flatten_nested = 0;
+
+DROP TABLE IF EXISTS nested_table;
+CREATE TABLE nested_table (id UInt64, first Nested(a Int8, b String)) ENGINE = MergeTree() ORDER BY id;
+SHOW CREATE nested_table;
+
+SET flatten_nested = 1;
+
+ALTER TABLE nested_table ADD COLUMN second Nested(c Int8, d String) AFTER id;
+SHOW CREATE nested_table;
+
+SET flatten_nested = 0;
+
+ALTER TABLE nested_table ADD COLUMN third Nested(e Int8, f String) FIRST;
+SHOW CREATE nested_table;
+
+DROP TABLE nested_table;

From cffc6611e000faa19cac35dccfc093d77ae7e315 Mon Sep 17 00:00:00 2001
From: Jianfei Hu <hujianfei258@gmail.com>
Date: Thu, 16 Nov 2023 07:05:41 +0000
Subject: [PATCH 0631/1097] Empty commit.


From 052134b143f59c1a2061beebec73009c34077c8c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 16 Nov 2023 07:32:11 +0000
Subject: [PATCH 0632/1097] Better

---
 docker/test/stateless/stress_tests.lib           | 16 ++++++++++------
 tests/config/config.d/zookeeper.xml              |  2 +-
 .../config.d/zookeeper_fault_injection.xml       |  1 +
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib
index edcf2bc7bee..551461b6eca 100644
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@@ -53,7 +53,7 @@ function configure()
       > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
     sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
 
-    function randomize_keeper_config_boolean_value {
+    function randomize_config_boolean_value {
         value=$(($RANDOM % 2))
         sudo cat /etc/clickhouse-server/config.d/$2.xml \
         | sed "s|<$1>[01]</$1>|<$1>$value</$1>|" \
@@ -63,16 +63,20 @@ function configure()
 
     if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
         # Randomize all Keeper feature flags
-        randomize_keeper_config_boolean_value filtered_list keeper_port
-        randomize_keeper_config_boolean_value multi_read keeper_port
-        randomize_keeper_config_boolean_value check_not_exists keeper_port
-        randomize_keeper_config_boolean_value create_if_not_exists keeper_port
+        randomize_config_boolean_value filtered_list keeper_port
+        randomize_config_boolean_value multi_read keeper_port
+        randomize_config_boolean_value check_not_exists keeper_port
+        randomize_config_boolean_value create_if_not_exists keeper_port
     fi
 
     sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
     sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
 
-    randomize_keeper_config_boolean_value use_compression zookeeper
+    if [[ -n "$ZOOKEEPER_FAULT_INJECTION" ]] && [[ "$ZOOKEEPER_FAULT_INJECTION" -eq 1 ]]; then
+        randomize_config_boolean_value use_compression zookeeper_fault_injection
+    else
+        randomize_config_boolean_value use_compression zookeeper
+    fi
 
     # for clickhouse-server (via service)
     echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
diff --git a/tests/config/config.d/zookeeper.xml b/tests/config/config.d/zookeeper.xml
index a54149e6617..ce402f4850b 100644
--- a/tests/config/config.d/zookeeper.xml
+++ b/tests/config/config.d/zookeeper.xml
@@ -2,7 +2,7 @@
     <zookeeper>
         <!--<zookeeper_load_balancing>random / in_order / nearest_hostname / hostname_levenshtein_distance / first_or_random / round_robin</zookeeper_load_balancing>-->
         <zookeeper_load_balancing>random</zookeeper_load_balancing>
-        <use_compression>true</use_compression>
+        <use_compression>1</use_compression>
         <node index="1">
             <host>127.0.0.1</host>
             <port>9181</port>
diff --git a/tests/config/config.d/zookeeper_fault_injection.xml b/tests/config/config.d/zookeeper_fault_injection.xml
index a339e1f0fba..75b96064817 100644
--- a/tests/config/config.d/zookeeper_fault_injection.xml
+++ b/tests/config/config.d/zookeeper_fault_injection.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <zookeeper>
+        <use_compression>1</use_compression>
         <node index="1">
             <host>localhost</host>
             <port>9181</port>

From 69f214cdbcc85808d77893112f7c560285747a09 Mon Sep 17 00:00:00 2001
From: Jianfei Hu <hujianfei258@gmail.com>
Date: Thu, 16 Nov 2023 08:04:57 +0000
Subject: [PATCH 0633/1097] fix comments.

Signed-off-by: Jianfei Hu <hujianfei258@gmail.com>
---
 src/IO/S3/Credentials.cpp | 32 +++++++++++++++++++-------------
 src/IO/S3/Credentials.h   |  1 -
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index e25f4551723..73763853713 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -1,4 +1,15 @@
 #include <IO/S3/Credentials.h>
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int UNSUPPORTED_METHOD;
+}
+
+}
 
 #if USE_AWS_S3
 
@@ -16,11 +27,9 @@
 #    include <aws/core/platform/FileSystem.h>
 
 #    include <Common/logger_useful.h>
-#    include <Common/Exception.h>
 #    include <IO/S3/PocoHTTPClient.h>
 #    include <IO/S3/Client.h>
 
-#    include <exception>
 #    include <fstream>
 #    include <base/EnumReflection.h>
 
@@ -42,7 +51,6 @@ namespace ErrorCodes
 {
     extern const int AWS_ERROR;
     extern const int GCP_ERROR;
-    extern const int UNSUPPORTED_METHOD;
 }
 
 namespace S3
@@ -280,20 +288,20 @@ String getRunningAvailabilityZone()
     LOG_INFO(&Poco::Logger::get("Application"), "Trying to detect the availability zone.");
     try
     {
-        auto aws_az = AWSEC2MetadataClient::getAvailabilityZoneOrException();
-        return aws_az;
+        return AWSEC2MetadataClient::getAvailabilityZoneOrException();
     }
-    catch (const std::exception & aws_ex)
+    catch (...)
     {
+        auto aws_ex_msg = getExceptionMessage(std::current_exception(), false);
         try
         {
-            auto gcp_zone = getGCPAvailabilityZoneOrException();
-            return gcp_zone;
+            return getGCPAvailabilityZoneOrException();
         }
-        catch (const std::exception & gcp_ex)
+        catch (...)
         {
+            auto gcp_ex_msg = getExceptionMessage(std::current_exception(), false);
             throw DB::Exception(ErrorCodes::UNSUPPORTED_METHOD,
-                "Failed to find the availability zone, tried AWS and GCP. AWS Error: {}\nGCP Error: {}", aws_ex.what(), gcp_ex.what());
+                "Failed to find the availability zone, tried AWS and GCP. AWS Error: {}\nGCP Error: {}", aws_ex_msg, gcp_ex_msg);
         }
     }
 }
@@ -784,8 +792,6 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
 
 #else
 
-#    include <stdexcept>
-
 namespace DB
 {
 
@@ -794,7 +800,7 @@ namespace S3
 
 std::string getRunningAvailabilityZone()
 {
-    throw std::runtime_error("Does not support availability zone detection for non-cloud environment");
+    throw DB::Exception(ErrorCodes::UNSUPPORTED_METHOD, "Does not support availability zone detection for non-cloud environment");
 }
 
 }
diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h
index d8d103a847a..ad73de23486 100644
--- a/src/IO/S3/Credentials.h
+++ b/src/IO/S3/Credentials.h
@@ -4,7 +4,6 @@
 
 #if USE_AWS_S3
 
-#    include <exception>
 #    include <base/types.h>
 
 #    include <aws/core/client/ClientConfiguration.h>

From 3b226a10feac18ae16836f5e627d48b413c12cc1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 16 Nov 2023 09:50:34 +0100
Subject: [PATCH 0634/1097] Fix test

---
 .../0_stateless/01555_system_distribution_queue_mask.sql      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
index 7ade1d24c59..3a90765226a 100644
--- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
+++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
@@ -17,7 +17,7 @@ system stop distributed sends dist_01555;
 
 insert into dist_01555 values (1)(2);
 -- since test_cluster_with_incorrect_pw contains incorrect password ignore error
-system flush distributed dist_01555; -- { clientError ATTEMPT_TO_READ_AFTER_EOF }
+system flush distributed dist_01555; -- { serverError 516 }
 select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 3600 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV;
 
 drop table dist_01555;
@@ -30,7 +30,7 @@ create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect
 
 insert into dist_01555 values (1)(2);
 -- since test_cluster_with_incorrect_pw contains incorrect password ignore error
-system flush distributed dist_01555; -- { clientError ATTEMPT_TO_READ_AFTER_EOF }
+system flush distributed dist_01555; -- { serverError 516 }
 select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 3600 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV;
 
 drop table dist_01555;

From 2ddc6132f9b20f4b891d5f3a40508876a4fc0b0b Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Thu, 16 Nov 2023 10:23:54 +0100
Subject: [PATCH 0635/1097] Revert "Better except for SSL authentication
 failure"

---
 src/Server/TCPHandler.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index c0bcb5cb89a..f929d0f5ff9 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -104,7 +104,6 @@ namespace DB::ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
     extern const int SUPPORT_IS_DISABLED;
     extern const int UNSUPPORTED_METHOD;
-    extern const int WRONG_PASSWORD;
 }
 
 namespace
@@ -1432,11 +1431,8 @@ void TCPHandler::receiveHello()
                     getClientAddress(client_info));
                 return;
             }
-            catch (const Exception & e)
+            catch (...)
             {
-                if (e.code() != DB::ErrorCodes::WRONG_PASSWORD)
-                    throw;
-
                 tryLogCurrentException(log, "SSL authentication failed, falling back to password authentication");
             }
         }

From 61948e31714f1aa3fa8143a569a72403c5c70408 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Thu, 16 Nov 2023 11:12:45 +0100
Subject: [PATCH 0636/1097] Update src/Core/Settings.h

Co-authored-by: Nikita Taranov <nickita.taranov@gmail.com>
---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 34547aded9c..76016bc70fb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -94,7 +94,7 @@ class IColumn;
     M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \
     M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \
     M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
-    M(Bool, s3_use_adaptive_timeouts, true, "When aggressive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \
+    M(Bool, s3_use_adaptive_timeouts, true, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \
     M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
     M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
     M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \

From bdeb04f7d3b403a425dd4138fdf4c5fd4c72cb96 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 15 Nov 2023 16:00:54 +0000
Subject: [PATCH 0637/1097] Exctract JOIN ON visitor from
 LogicalExpressionOptimizerVisitor

---
 .../Passes/LogicalExpressionOptimizerPass.cpp | 355 +++++++++---------
 1 file changed, 181 insertions(+), 174 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 9602ef8a743..50410c8c829 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -16,6 +16,181 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+/// Visitor that optimizes logical expressions _only_ in JOIN ON section
+class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<JoinOnLogicalExpressionOptimizerVisitor>
+{
+public:
+    using Base = InDepthQueryTreeVisitorWithContext<JoinOnLogicalExpressionOptimizerVisitor>;
+
+    explicit JoinOnLogicalExpressionOptimizerVisitor(ContextPtr context)
+        : Base(std::move(context))
+    {}
+
+    void enterImpl(QueryTreeNodePtr & node)
+    {
+        auto * function_node = node->as<FunctionNode>();
+
+        if (!function_node)
+            return;
+
+        if (function_node->getFunctionName() == "or")
+        {
+            tryOptimizeIsNotDistinctOrIsNull(node);
+            return;
+        }
+    }
+
+private:
+    void tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node)
+    {
+        auto & function_node = node->as<FunctionNode &>();
+        assert(function_node.getFunctionName() == "or");
+
+        QueryTreeNodes or_operands;
+
+        /// Indices of `equals` or `isNotDistinctFrom` functions in the vector above
+        std::vector<size_t> equals_functions_indices;
+
+        /** Map from `isNull` argument to indices of operands that contains that `isNull` functions
+          * `a = b OR (a IS NULL AND b IS NULL) OR (a IS NULL AND c IS NULL)`
+          * will be mapped to
+          * {
+          *     a => [(a IS NULL AND b IS NULL), (a IS NULL AND c IS NULL)]
+          *     b => [(a IS NULL AND b IS NULL)]
+          *     c => [(a IS NULL AND c IS NULL)]
+          * }
+          * Then for each a <=> b we can find all operands that contains both a IS NULL and b IS NULL
+          */
+        QueryTreeNodePtrWithHashMap<std::vector<size_t>> is_null_argument_to_indices;
+
+        for (const auto & argument : function_node.getArguments())
+        {
+            or_operands.push_back(argument);
+
+            auto * argument_function = argument->as<FunctionNode>();
+            if (!argument_function)
+                continue;
+
+            const auto & func_name = argument_function->getFunctionName();
+            if (func_name == "equals" || func_name == "isNotDistinctFrom")
+                equals_functions_indices.push_back(or_operands.size() - 1);
+
+            if (func_name == "and")
+            {
+                for (const auto & and_argument : argument_function->getArguments().getNodes())
+                {
+                    auto * and_argument_function = and_argument->as<FunctionNode>();
+                    if (and_argument_function && and_argument_function->getFunctionName() == "isNull")
+                    {
+                        const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0];
+                        is_null_argument_to_indices[is_null_argument].push_back(or_operands.size() - 1);
+                    }
+                }
+            }
+        }
+
+        /// OR operands that are changed to and needs to be re-resolved
+        std::unordered_set<size_t> arguments_to_reresolve;
+
+        for (size_t equals_function_idx : equals_functions_indices)
+        {
+            auto * equals_function = or_operands[equals_function_idx]->as<FunctionNode>();
+
+            /// For a <=> b we are looking for expressions containing both `a IS NULL` and `b IS NULL` combined with AND
+            const auto & argument_nodes = equals_function->getArguments().getNodes();
+            const auto & lhs_is_null_parents = is_null_argument_to_indices[argument_nodes[0]];
+            const auto & rhs_is_null_parents = is_null_argument_to_indices[argument_nodes[1]];
+            std::unordered_set<size_t> operands_to_optimize;
+            std::set_intersection(lhs_is_null_parents.begin(), lhs_is_null_parents.end(),
+                                  rhs_is_null_parents.begin(), rhs_is_null_parents.end(),
+                                  std::inserter(operands_to_optimize, operands_to_optimize.begin()));
+
+            /// If we have `a = b OR (a IS NULL AND b IS NULL)` we can optimize it to `a <=> b`
+            if (!operands_to_optimize.empty() && equals_function->getFunctionName() == "equals")
+                arguments_to_reresolve.insert(equals_function_idx);
+
+            for (size_t to_optimize_idx : operands_to_optimize)
+            {
+                /// We are looking for operand `a IS NULL AND b IS NULL AND ...`
+                auto * operand_to_optimize = or_operands[to_optimize_idx]->as<FunctionNode>();
+
+                /// Remove `a IS NULL` and `b IS NULL` arguments from AND
+                QueryTreeNodes new_arguments;
+                for (const auto & and_argument : operand_to_optimize->getArguments().getNodes())
+                {
+                    bool to_eliminate = false;
+
+                    const auto * and_argument_function = and_argument->as<FunctionNode>();
+                    if (and_argument_function && and_argument_function->getFunctionName() == "isNull")
+                    {
+                        const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0];
+                        to_eliminate = (is_null_argument->isEqual(*argument_nodes[0]) || is_null_argument->isEqual(*argument_nodes[1]));
+                    }
+
+                    if (to_eliminate)
+                        arguments_to_reresolve.insert(to_optimize_idx);
+                    else
+                        new_arguments.emplace_back(and_argument);
+                }
+                /// If less than two arguments left, we will remove or replace the whole AND below
+                operand_to_optimize->getArguments().getNodes() = std::move(new_arguments);
+            }
+        }
+
+        if (arguments_to_reresolve.empty())
+            /// Nothing have been changed
+            return;
+
+        auto and_function_resolver = FunctionFactory::instance().get("and", getContext());
+        auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", getContext());
+        QueryTreeNodes new_or_operands;
+        for (size_t i = 0; i < or_operands.size(); ++i)
+        {
+            if (arguments_to_reresolve.contains(i))
+            {
+                auto * function = or_operands[i]->as<FunctionNode>();
+                if (function->getFunctionName() == "equals")
+                {
+                    /// Because we removed checks for IS NULL, we should replace `a = b` with `a <=> b`
+                    function->resolveAsFunction(strict_equals_function_resolver);
+                    new_or_operands.emplace_back(std::move(or_operands[i]));
+                }
+                else if (function->getFunctionName() == "and")
+                {
+                    const auto & and_arguments = function->getArguments().getNodes();
+                    if (and_arguments.size() > 1)
+                    {
+                        function->resolveAsFunction(and_function_resolver);
+                        new_or_operands.emplace_back(std::move(or_operands[i]));
+                    }
+                    else if (and_arguments.size() == 1)
+                    {
+                        /// Replace AND with a single argument with the argument itself
+                        new_or_operands.emplace_back(and_arguments[0]);
+                    }
+                }
+                else
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name: '{}'", function->getFunctionName());
+            }
+            else
+            {
+                new_or_operands.emplace_back(std::move(or_operands[i]));
+            }
+        }
+
+        if (new_or_operands.size() == 1)
+        {
+            node = std::move(new_or_operands[0]);
+            return;
+        }
+
+        /// Rebuild OR function
+        auto or_function_resolver = FunctionFactory::instance().get("or", getContext());
+        function_node.getArguments().getNodes() = std::move(new_or_operands);
+        function_node.resolveAsFunction(or_function_resolver);
+    }
+};
+
 class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<LogicalExpressionOptimizerVisitor>
 {
 public:
@@ -29,13 +204,15 @@ public:
     {
         if (auto * join_node = node->as<JoinNode>())
         {
-            join_stack.push_back(join_node);
+            /// Operator <=> is not supported outside of JOIN ON section
+            if (join_node->hasJoinExpression())
+            {
+                JoinOnLogicalExpressionOptimizerVisitor join_on_visitor(getContext());
+                join_on_visitor.visit(join_node->getJoinExpression());
+            }
             return;
         }
 
-        if (!join_stack.empty() && join_stack.back()->getJoinExpression().get() == node.get())
-            is_inside_on_section = true;
-
         auto * function_node = node->as<FunctionNode>();
 
         if (!function_node)
@@ -44,10 +221,6 @@ public:
         if (function_node->getFunctionName() == "or")
         {
             tryReplaceOrEqualsChainWithIn(node);
-
-            /// Operator <=> is not supported outside of JOIN ON section
-            if (is_inside_on_section)
-                tryOptimizeIsNotDistinctOrIsNull(node);
             return;
         }
 
@@ -58,19 +231,6 @@ public:
         }
     }
 
-    void leaveImpl(QueryTreeNodePtr & node)
-    {
-        if (!join_stack.empty() && join_stack.back()->getJoinExpression().get() == node.get())
-            is_inside_on_section = false;
-
-        if (auto * join_node = node->as<JoinNode>())
-        {
-            assert(join_stack.back() == join_node);
-            join_stack.pop_back();
-            return;
-        }
-    }
-
 private:
     void tryReplaceAndEqualsChainsWithConstant(QueryTreeNodePtr & node)
     {
@@ -264,159 +424,6 @@ private:
         function_node.getArguments().getNodes() = std::move(or_operands);
         function_node.resolveAsFunction(or_function_resolver);
     }
-
-    void tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node)
-    {
-        auto & function_node = node->as<FunctionNode &>();
-        assert(function_node.getFunctionName() == "or");
-
-        QueryTreeNodes or_operands;
-
-        /// Indices of `equals` or `isNotDistinctFrom` functions in the vector above
-        std::vector<size_t> equals_functions_indices;
-
-        /** Map from `isNull` argument to indices of operands that contains that `isNull` functions
-          * `a = b OR (a IS NULL AND b IS NULL) OR (a IS NULL AND c IS NULL)`
-          * will be mapped to
-          * {
-          *     a => [(a IS NULL AND b IS NULL), (a IS NULL AND c IS NULL)]
-          *     b => [(a IS NULL AND b IS NULL)]
-          *     c => [(a IS NULL AND c IS NULL)]
-          * }
-          * Then for each a <=> b we can find all operands that contains both a IS NULL and b IS NULL
-          */
-        QueryTreeNodePtrWithHashMap<std::vector<size_t>> is_null_argument_to_indices;
-
-        for (const auto & argument : function_node.getArguments())
-        {
-            or_operands.push_back(argument);
-
-            auto * argument_function = argument->as<FunctionNode>();
-            if (!argument_function)
-                continue;
-
-            const auto & func_name = argument_function->getFunctionName();
-            if (func_name == "equals" || func_name == "isNotDistinctFrom")
-                equals_functions_indices.push_back(or_operands.size() - 1);
-
-            if (func_name == "and")
-            {
-                for (const auto & and_argument : argument_function->getArguments().getNodes())
-                {
-                    auto * and_argument_function = and_argument->as<FunctionNode>();
-                    if (and_argument_function && and_argument_function->getFunctionName() == "isNull")
-                    {
-                        const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0];
-                        is_null_argument_to_indices[is_null_argument].push_back(or_operands.size() - 1);
-                    }
-                }
-            }
-        }
-
-        /// OR operands that are changed to and needs to be re-resolved
-        std::unordered_set<size_t> arguments_to_reresolve;
-
-        for (size_t equals_function_idx : equals_functions_indices)
-        {
-            auto * equals_function = or_operands[equals_function_idx]->as<FunctionNode>();
-
-            /// For a <=> b we are looking for expressions containing both `a IS NULL` and `b IS NULL` combined with AND
-            const auto & argument_nodes = equals_function->getArguments().getNodes();
-            const auto & lhs_is_null_parents = is_null_argument_to_indices[argument_nodes[0]];
-            const auto & rhs_is_null_parents = is_null_argument_to_indices[argument_nodes[1]];
-            std::unordered_set<size_t> operands_to_optimize;
-            std::set_intersection(lhs_is_null_parents.begin(), lhs_is_null_parents.end(),
-                                  rhs_is_null_parents.begin(), rhs_is_null_parents.end(),
-                                  std::inserter(operands_to_optimize, operands_to_optimize.begin()));
-
-            /// If we have `a = b OR (a IS NULL AND b IS NULL)` we can optimize it to `a <=> b`
-            if (!operands_to_optimize.empty() && equals_function->getFunctionName() == "equals")
-                arguments_to_reresolve.insert(equals_function_idx);
-
-            for (size_t to_optimize_idx : operands_to_optimize)
-            {
-                /// We are looking for operand `a IS NULL AND b IS NULL AND ...`
-                auto * operand_to_optimize = or_operands[to_optimize_idx]->as<FunctionNode>();
-
-                /// Remove `a IS NULL` and `b IS NULL` arguments from AND
-                QueryTreeNodes new_arguments;
-                for (const auto & and_argument : operand_to_optimize->getArguments().getNodes())
-                {
-                    bool to_eliminate = false;
-
-                    const auto * and_argument_function = and_argument->as<FunctionNode>();
-                    if (and_argument_function && and_argument_function->getFunctionName() == "isNull")
-                    {
-                        const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0];
-                        to_eliminate = (is_null_argument->isEqual(*argument_nodes[0]) || is_null_argument->isEqual(*argument_nodes[1]));
-                    }
-
-                    if (to_eliminate)
-                        arguments_to_reresolve.insert(to_optimize_idx);
-                    else
-                        new_arguments.emplace_back(and_argument);
-                }
-                /// If less than two arguments left, we will remove or replace the whole AND below
-                operand_to_optimize->getArguments().getNodes() = std::move(new_arguments);
-            }
-        }
-
-
-        if (arguments_to_reresolve.empty())
-            /// Nothing have been changed
-            return;
-
-        auto and_function_resolver = FunctionFactory::instance().get("and", getContext());
-        auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", getContext());
-        QueryTreeNodes new_or_operands;
-        for (size_t i = 0; i < or_operands.size(); ++i)
-        {
-            if (arguments_to_reresolve.contains(i))
-            {
-                auto * function = or_operands[i]->as<FunctionNode>();
-                if (function->getFunctionName() == "equals")
-                {
-                    /// Because we removed checks for IS NULL, we should replace `a = b` with `a <=> b`
-                    function->resolveAsFunction(strict_equals_function_resolver);
-                    new_or_operands.emplace_back(std::move(or_operands[i]));
-                }
-                else if (function->getFunctionName() == "and")
-                {
-                    const auto & and_arguments = function->getArguments().getNodes();
-                    if (and_arguments.size() > 1)
-                    {
-                        function->resolveAsFunction(and_function_resolver);
-                        new_or_operands.emplace_back(std::move(or_operands[i]));
-                    }
-                    else if (and_arguments.size() == 1)
-                    {
-                        /// Replace AND with a single argument with the argument itself
-                        new_or_operands.emplace_back(and_arguments[0]);
-                    }
-                }
-                else
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name: '{}'", function->getFunctionName());
-            }
-            else
-            {
-                new_or_operands.emplace_back(std::move(or_operands[i]));
-            }
-        }
-
-        if (new_or_operands.size() == 1)
-        {
-            node = std::move(new_or_operands[0]);
-            return;
-        }
-
-        /// Rebuild OR function
-        auto or_function_resolver = FunctionFactory::instance().get("or", getContext());
-        function_node.getArguments().getNodes() = std::move(new_or_operands);
-        function_node.resolveAsFunction(or_function_resolver);
-    }
-
-    bool is_inside_on_section = false;
-    std::deque<const JoinNode *> join_stack;
 };
 
 void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)

From 6ad0e9066a65c30aa9abb758dd8f05b99a31bb68 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 15 Nov 2023 17:02:30 +0000
Subject: [PATCH 0638/1097] Rerun resolve in
 JoinOnLogicalExpressionOptimizerVisitor

---
 .../Passes/LogicalExpressionOptimizerPass.cpp | 35 ++++++++++++++-----
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 50410c8c829..e667b603020 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -7,6 +7,7 @@
 #include <Analyzer/ConstantNode.h>
 #include <Analyzer/JoinNode.h>
 #include <Analyzer/HashUtils.h>
+#include <Analyzer/Utils.h>
 
 namespace DB
 {
@@ -35,13 +36,27 @@ public:
 
         if (function_node->getFunctionName() == "or")
         {
-            tryOptimizeIsNotDistinctOrIsNull(node);
+            bool is_argument_type_changed = tryOptimizeIsNotDistinctOrIsNull(node, getContext());
+            if (is_argument_type_changed)
+                need_rerun_resolve = true;
             return;
         }
     }
 
+    void leaveImpl(QueryTreeNodePtr & node)
+    {
+        if (!need_rerun_resolve)
+            return;
+
+        if (auto * function_node = node->as<FunctionNode>())
+            rerunFunctionResolve(function_node, getContext());
+    }
+
 private:
-    void tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node)
+    bool need_rerun_resolve = false;
+
+    /// Returns true if type of some operand is changed and parent function needs to be re-resolved
+    static bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context)
     {
         auto & function_node = node->as<FunctionNode &>();
         assert(function_node.getFunctionName() == "or");
@@ -139,10 +154,12 @@ private:
 
         if (arguments_to_reresolve.empty())
             /// Nothing have been changed
-            return;
+            return false;
 
-        auto and_function_resolver = FunctionFactory::instance().get("and", getContext());
-        auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", getContext());
+        auto and_function_resolver = FunctionFactory::instance().get("and", context);
+        auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", context);
+
+        bool need_reresolve = false;
         QueryTreeNodes new_or_operands;
         for (size_t i = 0; i < or_operands.size(); ++i)
         {
@@ -151,7 +168,8 @@ private:
                 auto * function = or_operands[i]->as<FunctionNode>();
                 if (function->getFunctionName() == "equals")
                 {
-                    /// Because we removed checks for IS NULL, we should replace `a = b` with `a <=> b`
+                    /// We should replace `a = b` with `a <=> b` because we removed checks for IS NULL
+                    need_reresolve = need_reresolve || function->getResultType()->isNullable();
                     function->resolveAsFunction(strict_equals_function_resolver);
                     new_or_operands.emplace_back(std::move(or_operands[i]));
                 }
@@ -181,13 +199,14 @@ private:
         if (new_or_operands.size() == 1)
         {
             node = std::move(new_or_operands[0]);
-            return;
+            return need_reresolve;
         }
 
         /// Rebuild OR function
-        auto or_function_resolver = FunctionFactory::instance().get("or", getContext());
+        auto or_function_resolver = FunctionFactory::instance().get("or", context);
         function_node.getArguments().getNodes() = std::move(new_or_operands);
         function_node.resolveAsFunction(or_function_resolver);
+        return need_reresolve;
     }
 };
 

From 21a17f83347ae8ff0579604f13eb62e4cbcd163f Mon Sep 17 00:00:00 2001
From: Aleksei Filatov <alexfvk@yandex-team.ru>
Date: Thu, 16 Nov 2023 12:57:09 +0300
Subject: [PATCH 0639/1097] Add cancellation hook for moving background
 operation

---
 src/Disks/DiskEncrypted.cpp                   |  10 +-
 src/Disks/DiskEncrypted.h                     |   8 +-
 src/Disks/DiskLocal.cpp                       |  10 +-
 src/Disks/DiskLocal.h                         |   8 +-
 src/Disks/IDisk.cpp                           |  52 ++++++--
 src/Disks/IDisk.h                             |  20 ++-
 .../ObjectStorages/DiskObjectStorage.cpp      |   6 +-
 src/Disks/ObjectStorages/DiskObjectStorage.h  |   4 +-
 .../MergeTree/DataPartStorageOnDiskBase.cpp   |   5 +-
 .../MergeTree/DataPartStorageOnDiskBase.h     |   4 +-
 src/Storages/MergeTree/IDataPartStorage.h     |   4 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |   9 +-
 src/Storages/MergeTree/IMergeTreeDataPart.h   |   7 +-
 .../MergeTree/MergeTreePartsMover.cpp         |  12 +-
 tests/integration/test_ttl_move/test.py       | 116 +++++++++++++++++-
 15 files changed, 240 insertions(+), 35 deletions(-)

diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp
index 7bc7c1c7dc4..5ec17701667 100644
--- a/src/Disks/DiskEncrypted.cpp
+++ b/src/Disks/DiskEncrypted.cpp
@@ -324,7 +324,13 @@ ReservationPtr DiskEncrypted::reserve(UInt64 bytes)
 }
 
 
-void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings)
+void DiskEncrypted::copyDirectoryContent(
+    const String & from_dir,
+    const std::shared_ptr<IDisk> & to_disk,
+    const String & to_dir,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
+    const std::function<void()> & cancellation_hook)
 {
     /// Check if we can copy the file without deciphering.
     if (isSameDiskType(*this, *to_disk))
@@ -340,7 +346,7 @@ void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::sha
                 auto wrapped_from_path = wrappedPath(from_dir);
                 auto to_delegate = to_disk_enc->delegate;
                 auto wrapped_to_path = to_disk_enc->wrappedPath(to_dir);
-                delegate->copyDirectoryContent(wrapped_from_path, to_delegate, wrapped_to_path, read_settings, write_settings);
+                delegate->copyDirectoryContent(wrapped_from_path, to_delegate, wrapped_to_path, read_settings, write_settings, cancellation_hook);
                 return;
             }
         }
diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h
index 8b4461a8dee..6574d3bf456 100644
--- a/src/Disks/DiskEncrypted.h
+++ b/src/Disks/DiskEncrypted.h
@@ -112,7 +112,13 @@ public:
         delegate->listFiles(wrapped_path, file_names);
     }
 
-    void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings) override;
+    void copyDirectoryContent(
+        const String & from_dir,
+        const std::shared_ptr<IDisk> & to_disk,
+        const String & to_dir,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
+        const std::function<void()> & cancellation_hook = {}) override;
 
     std::unique_ptr<ReadBufferFromFileBase> readFile(
         const String & path,
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index c71f6f81de2..394c08e4876 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -432,13 +432,19 @@ bool inline isSameDiskType(const IDisk & one, const IDisk & another)
     return typeid(one) == typeid(another);
 }
 
-void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings)
+void DiskLocal::copyDirectoryContent(
+    const String & from_dir,
+    const std::shared_ptr<IDisk> & to_disk,
+    const String & to_dir,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
+    const std::function<void()> & cancellation_hook)
 {
     /// If throttling was configured we cannot use copying directly.
     if (isSameDiskType(*this, *to_disk) && !read_settings.local_throttler && !write_settings.local_throttler)
         fs::copy(fs::path(disk_path) / from_dir, fs::path(to_disk->getPath()) / to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
     else
-        IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, read_settings, write_settings);
+        IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, read_settings, write_settings, cancellation_hook);
 }
 
 SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index c52c192d824..767718ecb24 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -65,7 +65,13 @@ public:
 
     void replaceFile(const String & from_path, const String & to_path) override;
 
-    void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings) override;
+    void copyDirectoryContent(
+        const String & from_dir,
+        const std::shared_ptr<IDisk> & to_disk,
+        const String & to_dir,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
+        const std::function<void()> & cancellation_hook = {}) override;
 
     void listFiles(const String & path, std::vector<String> & file_names) const override;
 
diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp
index 1997ce06990..cd5f1f4ef69 100644
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@@ -24,14 +24,21 @@ bool IDisk::isDirectoryEmpty(const String & path) const
     return !iterateDirectory(path)->isValid();
 }
 
-void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path, const ReadSettings & read_settings, const WriteSettings & write_settings) /// NOLINT
+void IDisk::copyFile(
+    const String & from_file_path,
+    IDisk & to_disk,
+    const String & to_file_path,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
+    const std::function<void()> & cancellation_hook
+    )
 {
     LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.",
               getName(), getPath(), from_file_path, to_disk.getName(), to_disk.getPath(), to_file_path);
 
     auto in = readFile(from_file_path, read_settings);
     auto out = to_disk.writeFile(to_file_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, write_settings);
-    copyData(*in, *out);
+    copyData(*in, *out, cancellation_hook);
     out->finalize();
 }
 
@@ -80,7 +87,17 @@ UInt128 IDisk::getEncryptedFileIV(const String &) const
 
 using ResultsCollector = std::vector<std::future<void>>;
 
-void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, ThreadPool & pool, ResultsCollector & results, bool copy_root_dir, const ReadSettings & read_settings, const WriteSettings & write_settings)
+void asyncCopy(
+    IDisk & from_disk,
+    String from_path,
+    IDisk & to_disk,
+    String to_path,
+    ThreadPool & pool,
+    ResultsCollector & results,
+    bool copy_root_dir,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
+    const std::function<void()> & cancellation_hook)
 {
     if (from_disk.isFile(from_path))
     {
@@ -88,7 +105,7 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
         auto future = promise->get_future();
 
         pool.scheduleOrThrowOnError(
-            [&from_disk, from_path, &to_disk, to_path, &read_settings, &write_settings, promise, thread_group = CurrentThread::getGroup()]()
+            [&from_disk, from_path, &to_disk, to_path, &read_settings, &write_settings, promise, thread_group = CurrentThread::getGroup(), &cancellation_hook]()
             {
                 try
                 {
@@ -97,7 +114,7 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
                     if (thread_group)
                         CurrentThread::attachToGroup(thread_group);
 
-                    from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), read_settings, write_settings);
+                    from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), read_settings, write_settings, cancellation_hook);
                     promise->set_value();
                 }
                 catch (...)
@@ -119,11 +136,18 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
         }
 
         for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next())
-            asyncCopy(from_disk, it->path(), to_disk, dest, pool, results, true, read_settings, write_settings);
+            asyncCopy(from_disk, it->path(), to_disk, dest, pool, results, true, read_settings, write_settings, cancellation_hook);
     }
 }
 
-void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir, const ReadSettings & read_settings, WriteSettings write_settings)
+void IDisk::copyThroughBuffers(
+    const String & from_path,
+    const std::shared_ptr<IDisk> & to_disk,
+    const String & to_path,
+    bool copy_root_dir,
+    const ReadSettings & read_settings,
+    WriteSettings write_settings,
+    const std::function<void()> & cancellation_hook)
 {
     ResultsCollector results;
 
@@ -131,21 +155,27 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<I
     /// Avoid high memory usage. See test_s3_zero_copy_ttl/test.py::test_move_and_s3_memory_usage
     write_settings.s3_allow_parallel_part_upload = false;
 
-    asyncCopy(*this, from_path, *to_disk, to_path, copying_thread_pool, results, copy_root_dir, read_settings, write_settings);
+    asyncCopy(*this, from_path, *to_disk, to_path, copying_thread_pool, results, copy_root_dir, read_settings, write_settings, cancellation_hook);
 
     for (auto & result : results)
         result.wait();
     for (auto & result : results)
-        result.get();   /// May rethrow an exception
+        result.get(); /// May rethrow an exception
 }
 
 
-void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings)
+void IDisk::copyDirectoryContent(
+    const String & from_dir,
+    const std::shared_ptr<IDisk> & to_disk,
+    const String & to_dir,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
+    const std::function<void()> & cancellation_hook)
 {
     if (!to_disk->exists(to_dir))
         to_disk->createDirectories(to_dir);
 
-    copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir= */ false, read_settings, write_settings);
+    copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir= */ false, read_settings, write_settings, cancellation_hook);
 }
 
 void IDisk::truncateFile(const String &, size_t)
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 6911fd86db2..b2d42f2e732 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -193,7 +193,13 @@ public:
     virtual void replaceFile(const String & from_path, const String & to_path) = 0;
 
     /// Recursively copy files from from_dir to to_dir. Create to_dir if not exists.
-    virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings);
+    virtual void copyDirectoryContent(
+        const String & from_dir,
+        const std::shared_ptr<IDisk> & to_disk,
+        const String & to_dir,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
+        const std::function<void()> & cancellation_hook = {});
 
     /// Copy file `from_file_path` to `to_file_path` located at `to_disk`.
     virtual void copyFile( /// NOLINT
@@ -201,7 +207,8 @@ public:
         IDisk & to_disk,
         const String & to_file_path,
         const ReadSettings & read_settings = {},
-        const WriteSettings & write_settings = {});
+        const WriteSettings & write_settings = {},
+        const std::function<void()> & cancellation_hook = {});
 
     /// List files at `path` and add their names to `file_names`
     virtual void listFiles(const String & path, std::vector<String> & file_names) const = 0;
@@ -473,7 +480,14 @@ protected:
     /// Base implementation of the function copy().
     /// It just opens two files, reads data by portions from the first file, and writes it to the second one.
     /// A derived class may override copy() to provide a faster implementation.
-    void copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir, const ReadSettings & read_settings, WriteSettings write_settings);
+    void copyThroughBuffers(
+        const String & from_path,
+        const std::shared_ptr<IDisk> & to_disk,
+        const String & to_path,
+        bool copy_root_dir,
+        const ReadSettings & read_settings,
+        WriteSettings write_settings,
+        const std::function<void()> & cancellation_hook);
 
     virtual void checkAccessImpl(const String & path);
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index c1f053be7c6..739b737fe8f 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -175,7 +175,9 @@ void DiskObjectStorage::copyFile( /// NOLINT
     IDisk & to_disk,
     const String & to_file_path,
     const ReadSettings & read_settings,
-    const WriteSettings & write_settings)
+    const WriteSettings & write_settings,
+    const std::function<void()> & cancellation_hook
+    )
 {
     if (this == &to_disk)
     {
@@ -187,7 +189,7 @@ void DiskObjectStorage::copyFile( /// NOLINT
     else
     {
         /// Copy through buffers
-        IDisk::copyFile(from_file_path, to_disk, to_file_path, read_settings, write_settings);
+        IDisk::copyFile(from_file_path, to_disk, to_file_path, read_settings, write_settings, cancellation_hook);
     }
 }
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h
index 66d1b02aea7..25b39c4d974 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.h
@@ -163,7 +163,9 @@ public:
         IDisk & to_disk,
         const String & to_file_path,
         const ReadSettings & read_settings = {},
-        const WriteSettings & write_settings = {}) override;
+        const WriteSettings & write_settings = {},
+        const std::function<void()> & cancellation_hook = {}
+        ) override;
 
     void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override;
 
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
index 7fc8187aee5..0c7c50a687b 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
@@ -470,7 +470,8 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart(
     const DiskPtr & dst_disk,
     const ReadSettings & read_settings,
     const WriteSettings & write_settings,
-    Poco::Logger * log) const
+    Poco::Logger * log,
+    const std::function<void()> & cancellation_hook) const
 {
     String path_to_clone = fs::path(to) / dir_path / "";
     auto src_disk = volume->getDisk();
@@ -485,7 +486,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart(
     try
     {
         dst_disk->createDirectories(to);
-        src_disk->copyDirectoryContent(getRelativePath(), dst_disk, path_to_clone, read_settings, write_settings);
+        src_disk->copyDirectoryContent(getRelativePath(), dst_disk, path_to_clone, read_settings, write_settings, cancellation_hook);
     }
     catch (...)
     {
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
index 1826e84c28d..5792e163856 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
@@ -74,7 +74,9 @@ public:
         const DiskPtr & dst_disk,
         const ReadSettings & read_settings,
         const WriteSettings & write_settings,
-        Poco::Logger * log) const override;
+        Poco::Logger * log,
+        const std::function<void()> & cancellation_hook = {}
+        ) const override;
 
     void rename(
         std::string new_root_path,
diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h
index 072cb29626e..e91a35974e3 100644
--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@@ -262,7 +262,9 @@ public:
         const DiskPtr & disk,
         const ReadSettings & read_settings,
         const WriteSettings & write_settings,
-        Poco::Logger * log) const = 0;
+        Poco::Logger * log,
+        const std::function<void()> & cancellation_hook = {}
+        ) const = 0;
 
     /// Change part's root. from_root should be a prefix path of current root path.
     /// Right now, this is needed for rename table query.
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 9bc72577b25..23410b5fc5c 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1798,7 +1798,12 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix
         params);
 }
 
-MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name, const ReadSettings & read_settings, const WriteSettings & write_settings) const
+MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(
+    const DiskPtr & disk,
+    const String & directory_name,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
+    const std::function<void()> & cancellation_hook) const
 {
     assertOnDisk();
 
@@ -1808,7 +1813,7 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not clone data part {} to empty directory.", name);
 
     String path_to_clone = fs::path(storage.relative_data_path) / directory_name / "";
-    return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, storage.log);
+    return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, storage.log, cancellation_hook);
 }
 
 UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index a9659d2f5f4..752a6de39cf 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -380,7 +380,12 @@ public:
                                                    const DiskTransactionPtr & disk_transaction) const;
 
     /// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk
-    MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name, const ReadSettings & read_settings, const WriteSettings & write_settings) const;
+    MutableDataPartStoragePtr makeCloneOnDisk(
+        const DiskPtr & disk,
+        const String & directory_name,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
+        const std::function<void()> & cancellation_hook = {}) const;
 
     /// Checks that .bin and .mrk files exist.
     ///
diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp
index 0a3416a2700..d32bc6d1826 100644
--- a/src/Storages/MergeTree/MergeTreePartsMover.cpp
+++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp
@@ -210,8 +210,12 @@ bool MergeTreePartsMover::selectPartsForMove(
 
 MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part, const ReadSettings & read_settings, const WriteSettings & write_settings) const
 {
-    if (moves_blocker.isCancelled())
-        throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");
+    auto cancellation_hook = [&moves_blocker_ = moves_blocker]()
+    {
+        if (moves_blocker_.isCancelled())
+            throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");
+    };
+    cancellation_hook();
 
     auto settings = data->getSettings();
     auto part = moving_part.part;
@@ -255,12 +259,12 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
         {
             LOG_INFO(log, "Part {} was not fetched, we are the first who move it to another disk, so we will copy it", part->name);
             cloned_part_storage = part->getDataPartStorage().clonePart(
-                path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, log);
+                path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, log, cancellation_hook);
         }
     }
     else
     {
-        cloned_part_storage = part->makeCloneOnDisk(disk, MergeTreeData::MOVING_DIR_NAME, read_settings, write_settings);
+        cloned_part_storage = part->makeCloneOnDisk(disk, MergeTreeData::MOVING_DIR_NAME, read_settings, write_settings, cancellation_hook);
     }
 
     MergeTreeDataPartBuilder builder(*data, part->name, cloned_part_storage);
diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py
index c1c076277bb..7640ff8f737 100644
--- a/tests/integration/test_ttl_move/test.py
+++ b/tests/integration/test_ttl_move/test.py
@@ -1,5 +1,5 @@
+import inspect
 import random
-import string
 import threading
 import time
 from multiprocessing.dummy import Pool
@@ -8,6 +8,8 @@ from helpers.test_tools import assert_logs_contain_with_retry
 import pytest
 from helpers.client import QueryRuntimeException
 from helpers.cluster import ClickHouseCluster
+from helpers.network import PartitionManager
+from helpers.test_tools import assert_eq_with_retry
 
 # FIXME: each sleep(1) is a time bomb, and not only this cause false positive
 # it also makes the test not reliable (i.e. assertions may be wrong, due timing issues)
@@ -26,6 +28,7 @@ node1 = cluster.add_instance(
     with_zookeeper=True,
     tmpfs=["/jbod1:size=40M", "/jbod2:size=40M", "/external:size=200M"],
     macros={"shard": 0, "replica": 1},
+    stay_alive=True,
 )
 
 node2 = cluster.add_instance(
@@ -1813,3 +1816,114 @@ def test_ttl_move_if_exists(started_cluster, name, dest_type):
             node2.query("DROP TABLE IF EXISTS {} SYNC".format(name))
         except:
             pass
+
+
+class TestCancelBackgroundMoving:
+    @pytest.fixture()
+    def prepare_table(self, request, started_cluster):
+        name = unique_table_name(request.node.name)
+        engine = f"ReplicatedMergeTree('/clickhouse/{name}', '1')"
+
+        node1.query(
+            f"""
+            CREATE TABLE {name} (
+                s1 String,
+                d1 DateTime
+            ) ENGINE = {engine}
+            ORDER BY tuple()
+            TTL d1 + interval 5 second TO DISK 'external'
+            SETTINGS storage_policy='small_jbod_with_external'
+            """
+        )
+
+        node1.query("SYSTEM STOP MOVES")
+
+        # Insert part which is about to move
+        node1.query(
+            "INSERT INTO {} (s1, d1) VALUES (randomPrintableASCII({}), toDateTime({}))".format(
+                name, 10 * 1024 * 1024, time.time()
+            )
+        )
+
+        # Set low bandwidth to have enough time to cancel part moving
+        config = inspect.cleandoc(
+            f"""
+            <clickhouse>
+                <max_local_write_bandwidth_for_server>{ 256 * 1024 }</max_local_write_bandwidth_for_server>
+            </clickhouse>
+            """
+        )
+        node1.replace_config(
+            "/etc/clickhouse-server/config.d/disk_throttling.xml", config
+        )
+        node1.restart_clickhouse()
+
+        try:
+            yield name
+        finally:
+            node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+
+    def test_cancel_background_moving_on_stop_moves_query(self, prepare_table):
+        name = prepare_table
+
+        # Wait for background moving task to be started
+        node1.query("SYSTEM START MOVES")
+        assert_eq_with_retry(
+            node1,
+            f"SELECT count() FROM system.moves WHERE table = '{name}'".strip(),
+            "1",
+        )
+
+        # Wait for background moving task to be cancelled
+        node1.query("SYSTEM STOP MOVES")
+        assert_logs_contain_with_retry(
+            node1, "MergeTreeBackgroundExecutor.*Cancelled moving parts"
+        )
+        assert_eq_with_retry(
+            node1,
+            f"SELECT count() FROM system.moves WHERE table = '{name}'".strip(),
+            "0",
+        )
+
+        # Ensure that part was not moved
+        assert set(get_used_disks_for_table(node1, name)) == {"jbod1"}
+
+    def test_cancel_background_moving_on_table_detach(self, prepare_table):
+        name = prepare_table
+
+        # Wait for background moving task to be started
+        node1.query("SYSTEM START MOVES")
+        assert_eq_with_retry(
+            node1,
+            f"SELECT count() FROM system.moves WHERE table = '{name}'".strip(),
+            "1",
+        )
+
+        # Wait for background moving task to be cancelled
+        node1.query(f"DETACH Table {name}")
+        assert_logs_contain_with_retry(
+            node1, "MergeTreeBackgroundExecutor.*Cancelled moving parts"
+        )
+        assert_eq_with_retry(
+            node1,
+            f"SELECT count() FROM system.moves WHERE table = '{name}'".strip(),
+            "0",
+        )
+
+    def test_cancel_background_moving_on_zookeeper_disconnect(self, prepare_table):
+        name = prepare_table
+
+        # Wait for background moving task to be started
+        node1.query("SYSTEM START MOVES")
+        assert_eq_with_retry(
+            node1,
+            f"SELECT count() FROM system.moves WHERE table = '{name}'".strip(),
+            "1",
+        )
+
+        with PartitionManager() as pm:
+            pm.drop_instance_zk_connections(node1)
+            # Wait for background moving task to be cancelled
+            assert_logs_contain_with_retry(
+                node1, "MergeTreeBackgroundExecutor.*Cancelled moving parts"
+            )

From ee262be6c615f18182b4f26f2956b4c1eb834cb3 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 16 Nov 2023 10:59:17 +0000
Subject: [PATCH 0640/1097] Update comment in universal.sh

---
 docs/_includes/install/universal.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh
index 0ae77f464eb..d474aa98e76 100755
--- a/docs/_includes/install/universal.sh
+++ b/docs/_includes/install/universal.sh
@@ -20,9 +20,9 @@ then
         fi
     elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
     then
-        # If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0
-        # compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo.
-        # Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake).
+        # Dispatch between standard and compatibility builds, see cmake/cpu_features.cmake for details. Unfortunately, (1) the ARM ISA level
+        # cannot be read directly, we need to guess from the "features" in /proc/cpuinfo, and (2) the flags in /proc/cpuinfo are named
+        # differently than the flags passed to the compiler in cpu_features.cmake.
         HAS_ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/')
         if [ "${HAS_ARMV82}" ]
         then

From 649d734409fab1eb602026cd1e39b601a0b1673c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 15 Nov 2023 10:13:07 +0000
Subject: [PATCH 0641/1097] Bump gRPC to v1.56.3

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index a08fe1a3407..bc110c3dc91 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit a08fe1a34075c93bb2d606dd608b9a3953288b81
+Subproject commit bc110c3dc91b77d1e54957871df54fd39f2a49d1

From a49db81b9f10aacee9528a02bea0f7b57dc532a7 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 15 Nov 2023 10:15:11 +0000
Subject: [PATCH 0642/1097] Bump gRPC to v1.57.1

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index bc110c3dc91..fd802577cc0 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit bc110c3dc91b77d1e54957871df54fd39f2a49d1
+Subproject commit fd802577cc06226428c99297d5be3a24f5e3ab96

From 1ba408eb0b2f37e2978376920920c7863a0325b8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 15 Nov 2023 10:38:03 +0000
Subject: [PATCH 0643/1097] Bump gRPC to v1.58.2

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index fd802577cc0..2e45a02f2b2 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit fd802577cc06226428c99297d5be3a24f5e3ab96
+Subproject commit 2e45a02f2b24e3cc455d1793a469e1dbba894f94

From a250c2bb08eda91f50eebd5bffd5086ef3251b9f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 15 Nov 2023 10:42:41 +0000
Subject: [PATCH 0644/1097] Bump gRPC to v1.59.2

---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index 2e45a02f2b2..740e3dfd973 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit 2e45a02f2b24e3cc455d1793a469e1dbba894f94
+Subproject commit 740e3dfd97301a52ad8165b65285bcc149d9e817

From ac7fd357e4f75d9b16023450d8fe5df6f3e68cfa Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 15 Nov 2023 10:47:10 +0000
Subject: [PATCH 0645/1097] Bump protobuf to v23.2

---
 contrib/google-protobuf                      |  2 +-
 contrib/google-protobuf-cmake/CMakeLists.txt | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/contrib/google-protobuf b/contrib/google-protobuf
index 089b89c8d41..5b179151990 160000
--- a/contrib/google-protobuf
+++ b/contrib/google-protobuf
@@ -1 +1 @@
-Subproject commit 089b89c8d4140f0d49fe4222b047a8ea814bc752
+Subproject commit 5b1791519907360781cfe3bebe1c79e5b1b0bcba
diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt
index f6955a3d8ce..3b53ac822da 100644
--- a/contrib/google-protobuf-cmake/CMakeLists.txt
+++ b/contrib/google-protobuf-cmake/CMakeLists.txt
@@ -174,6 +174,8 @@ set(libprotobuf_files
   ${protobuf_source_dir}/src/google/protobuf/message.cc
   ${protobuf_source_dir}/src/google/protobuf/message_lite.cc
   ${protobuf_source_dir}/src/google/protobuf/parse_context.cc
+  ${protobuf_source_dir}/src/google/protobuf/port.cc
+  ${protobuf_source_dir}/src/google/protobuf/reflection_mode.cc
   ${protobuf_source_dir}/src/google/protobuf/reflection_ops.cc
   ${protobuf_source_dir}/src/google/protobuf/repeated_field.cc
   ${protobuf_source_dir}/src/google/protobuf/repeated_ptr_field.cc
@@ -213,6 +215,7 @@ set(libprotoc_files
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/enum.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/extension.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/cord_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/enum_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/map_field.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field_generators/message_field.cc
@@ -299,6 +302,13 @@ set(libprotoc_files
   ${protobuf_source_dir}/src/google/protobuf/compiler/python/pyi_generator.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/retention.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/ruby/ruby_generator.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/rust/accessors/accessors.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/rust/accessors/singular_bytes.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/rust/accessors/singular_scalar.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/rust/context.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/rust/generator.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/rust/message.cc
+  ${protobuf_source_dir}/src/google/protobuf/compiler/rust/naming.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/subprocess.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/zip_writer.cc
 )

From ebd42187ad12ce2be24833820f59bb3d81def382 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Thu, 16 Nov 2023 12:29:15 +0100
Subject: [PATCH 0646/1097] Update
 tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml

Co-authored-by: Nikita Taranov <nickita.taranov@gmail.com>
---
 .../configs/config.d/storage_conf.xml                           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
index f51b854de75..98c6f551be6 100644
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
@@ -34,7 +34,7 @@
                 <skip_access_check>true</skip_access_check>
                 <!-- Avoid extra retries to speed up tests -->
                 <retry_attempts>1</retry_attempts>
-                <aggressive_timeouts>0</aggressive_timeouts>
+                <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
                 <s3_max_single_read_retries>1</s3_max_single_read_retries>
                 <connect_timeout_ms>20000</connect_timeout_ms>
             </s3_no_retries>

From 7d37c0e07073b2a1909e80c4aea45fdc4a35be75 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Thu, 16 Nov 2023 12:29:21 +0100
Subject: [PATCH 0647/1097] Update
 tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml

Co-authored-by: Nikita Taranov <nickita.taranov@gmail.com>
---
 .../configs/config.d/storage_conf.xml                           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
index 98c6f551be6..6303e9273fc 100644
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
@@ -11,7 +11,7 @@
                 <skip_access_check>true</skip_access_check>
                 <!-- Avoid extra retries to speed up tests -->
                 <retry_attempts>0</retry_attempts>
-                <aggressive_timeouts>0</aggressive_timeouts>
+                <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
                 <connect_timeout_ms>20000</connect_timeout_ms>
             </s3>
             <s3_retryable>

From 4a1e207e7a5b02da2b2f6ea46edecd9fe6a9185c Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 16 Nov 2023 12:31:00 +0100
Subject: [PATCH 0648/1097] review notes

---
 base/poco/Net/src/HTTPSession.cpp | 34 +++++++++++++++++++++----------
 src/IO/S3/PocoHTTPClient.cpp      | 20 ++++++++++++++----
 src/IO/S3/PocoHTTPClient.h        |  6 +++---
 3 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp
index 9ebbd7d04cd..d303a4c654b 100644
--- a/base/poco/Net/src/HTTPSession.cpp
+++ b/base/poco/Net/src/HTTPSession.cpp
@@ -93,22 +93,34 @@ void HTTPSession::setTimeout(const Poco::Timespan& timeout)
 
 void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco::Timespan& sendTimeout, const Poco::Timespan& receiveTimeout)
 {
-	 _connectionTimeout = connectionTimeout;
-
-     if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds())
+     try
      {
-         _sendTimeout = sendTimeout;
+         _connectionTimeout = connectionTimeout;
 
-         if (connected())
-             _socket.setSendTimeout(_sendTimeout);
+         if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds()) {
+             _sendTimeout = sendTimeout;
+
+             if (connected())
+                 _socket.setSendTimeout(_sendTimeout);
+         }
+
+         if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds()) {
+             _receiveTimeout = receiveTimeout;
+
+             if (connected())
+                 _socket.setReceiveTimeout(_receiveTimeout);
+         }
      }
-
-     if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds())
+     catch (NetException &)
      {
-         _receiveTimeout = receiveTimeout;
 
-         if (connected())
-             _socket.setReceiveTimeout(_receiveTimeout);
+#ifndef NDEBUG
+         // mute exceptions in release
+         // just in case when changing settings on socket is not allowed
+         // however it should be OK for timeouts
+#else
+         throw;
+#endif
      }
 }
 
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index f681362e607..4a1b6def133 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -407,17 +407,29 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 /// This can lead to request signature difference on S3 side.
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri, getTimeouts(method, first_attempt), http_connection_pool_size, wait_on_pool_size_limit, proxy_configuration);
+                        target_uri,
+                        getTimeouts(method, first_attempt, /*first_byte*/ true),
+                        http_connection_pool_size,
+                        wait_on_pool_size_limit,
+                        proxy_configuration);
                 else
-                    session = makeHTTPSession(target_uri, getTimeouts(method, first_attempt), proxy_configuration);
+                    session = makeHTTPSession(
+                            target_uri,
+                            getTimeouts(method, first_attempt, /*first_byte*/ true),
+                            proxy_configuration);
             }
             else
             {
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri, getTimeouts(method, first_attempt), http_connection_pool_size, wait_on_pool_size_limit);
+                        target_uri,
+                        getTimeouts(method, first_attempt, /*first_byte*/ true),
+                        http_connection_pool_size,
+                        wait_on_pool_size_limit);
                 else
-                    session = makeHTTPSession(target_uri, getTimeouts(method, first_attempt));
+                    session = makeHTTPSession(
+                            target_uri,
+                            getTimeouts(method, first_attempt, /*first_byte*/ true));
             }
 
             /// In case of error this address will be written to logs
diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 14c4fec5dd7..5178d75e7b6 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -55,7 +55,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
     size_t http_connection_pool_size = 0;
     /// See PoolBase::BehaviourOnLimit
     bool wait_on_pool_size_limit = true;
-    bool s3_use_adaptive_timeouts = false;
+    bool s3_use_adaptive_timeouts = true;
 
     std::function<void(const DB::ProxyConfiguration &)> error_report;
 
@@ -171,7 +171,7 @@ private:
         Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
         Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
 
-    ConnectionTimeouts getTimeouts(const String & method, bool first_attempt, bool first_byte = true) const;
+    ConnectionTimeouts getTimeouts(const String & method, bool first_attempt, bool first_byte) const;
 
 protected:
     static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request);
@@ -182,7 +182,7 @@ protected:
     ConnectionTimeouts timeouts;
     const RemoteHostFilter & remote_host_filter;
     unsigned int s3_max_redirects;
-    bool s3_use_adaptive_timeouts = false;
+    bool s3_use_adaptive_timeouts = true;
     bool enable_s3_requests_logging;
     bool for_disk_s3;
 

From e53e723be81d9ba76595afa7bbe67c1bf8764c4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 16 Nov 2023 12:32:42 +0100
Subject: [PATCH 0649/1097] Apply same improvement to initializeAggregation

---
 src/Functions/initializeAggregation.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp
index 83df28808a1..eeeb03aeb30 100644
--- a/src/Functions/initializeAggregation.cpp
+++ b/src/Functions/initializeAggregation.cpp
@@ -141,10 +141,19 @@ ColumnPtr FunctionInitializeAggregation::executeImpl(const ColumnsWithTypeAndNam
         that->addBatch(0, input_rows_count, places.data(), 0, aggregate_arguments, arena.get());
     }
 
-    for (size_t i = 0; i < input_rows_count; ++i)
+    if (agg_func.isState())
+    {
         /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
         /// correctly if result contains AggregateFunction's states
-        agg_func.insertMergeResultInto(places[i], res_col, arena.get());
+        for (size_t i = 0; i < input_rows_count; ++i)
+            agg_func.insertMergeResultInto(places[i], res_col, arena.get());
+    }
+    else
+    {
+        for (size_t i = 0; i < input_rows_count; ++i)
+            agg_func.insertResultInto(places[i], res_col, arena.get());
+    }
+
     return result_holder;
 }
 

From 84656ed9af9aee3dee1296038bdfdac948333ced Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 16 Nov 2023 11:38:32 +0000
Subject: [PATCH 0650/1097] track if dashboard was customized, show with
 opacity

---
 programs/server/dashboard.html | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html
index 72df69ed809..3ed37434235 100644
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@@ -514,6 +514,7 @@ const errorMessages = [
 
 /// Query to fill `queries` list for the dashboard
 let search_query = `SELECT title, query FROM system.dashboards WHERE dashboard = 'overview'`;
+let customized = false;
 let queries = [];
 
 /// Query parameters with predefined default values.
@@ -659,9 +660,10 @@ function insertChart(i) {
         title_text.data = '';
         findParamsInQuery(q.query, params);
         buildParams();
+        refreshCustomized(true);
+        saveState();
         const idx = getCurrentIndex();
         draw(idx, chart, getParamsForURL(), q.query);
-        saveState();
     }
 
     query_editor_confirm.addEventListener('click', editConfirm);
@@ -809,6 +811,7 @@ function insertChart(i) {
         findParamsInQueries();
         buildParams();
         resize();
+        refreshCustomized(true);
         saveState();
     });
 
@@ -874,8 +877,9 @@ function massEditorApplyChanges() {
     ({params, queries} = JSON.parse(editor.value));
     hideMassEditor();
     regenerate();
-    drawAll();
+    refreshCustomized(true);
     saveState();
+    drawAll();
 }
 
 document.getElementById('edit').addEventListener('click', e => {
@@ -1182,7 +1186,11 @@ async function reloadAll(do_search) {
     disableButtons();
     try {
         updateParams();
-        search_query = document.getElementById('search-query').value;
+        if (do_search) {
+            search_query = document.getElementById('search-query').value;
+            queries = [];
+            refreshCustomized(false);
+        }
         saveState();
         if (do_search) {
             await searchQueries();
@@ -1202,7 +1210,7 @@ document.getElementById('params').onsubmit = function(event) {
 
 
 function saveState() {
-    const state = { host: host, user: user, queries: queries, params: params, search_query: search_query };
+    const state = { host, user, queries, params, search_query, customized };
     history.pushState(state, '',
         window.location.pathname + (window.location.search || '') + '#' + btoa(JSON.stringify(state)));
 }
@@ -1222,7 +1230,6 @@ async function searchQueries() {
         throw new Error("Wrong data format of the search query.");
     }
 
-    queries = [];
     for (let i = 0; i < data[0].length; i++) {
         queries.push({title: data[0][i], query: data[1][i]});
     }
@@ -1230,11 +1237,19 @@ async function searchQueries() {
     regenerate();
 }
 
+function refreshCustomized(value) {
+    if (value !== undefined) {
+        customized = value;
+    }
+    document.getElementById('search-span').style.opacity = customized ? 0.5 : 1.0;
+}
+
 function regenerate() {
     document.getElementById('url').value = host;
     document.getElementById('user').value = user;
     document.getElementById('password').value = password;
     document.getElementById('search-query').value = search_query;
+    refreshCustomized();
 
     findParamsInQueries();
     buildParams();
@@ -1253,7 +1268,7 @@ function regenerate() {
 
 window.onpopstate = function(event) {
     if (!event.state) { return; }
-    ({host, user, queries, params, search_query} = event.state);
+    ({host, user, queries, params, search_query, customized} = event.state);
 
     regenerate();
     drawAll();
@@ -1261,7 +1276,7 @@ window.onpopstate = function(event) {
 
 if (window.location.hash) {
     try {
-        ({host, user, queries, params, search_query} = JSON.parse(atob(window.location.hash.substring(1))));
+        ({host, user, queries, params, search_query, customized} = JSON.parse(atob(window.location.hash.substring(1))));
     } catch {}
 }
 

From a0934253deff176da4c2eb21844cf31f3f7a3c61 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 16 Nov 2023 11:39:36 +0000
Subject: [PATCH 0651/1097] Bump protobuf to v24.4

---
 contrib/google-protobuf                      | 2 +-
 contrib/google-protobuf-cmake/CMakeLists.txt | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/contrib/google-protobuf b/contrib/google-protobuf
index 5b179151990..0862007f6ca 160000
--- a/contrib/google-protobuf
+++ b/contrib/google-protobuf
@@ -1 +1 @@
-Subproject commit 5b1791519907360781cfe3bebe1c79e5b1b0bcba
+Subproject commit 0862007f6ca1f5723c58f10f0ca34f3f25a63b2e
diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt
index 3b53ac822da..1ed4133270b 100644
--- a/contrib/google-protobuf-cmake/CMakeLists.txt
+++ b/contrib/google-protobuf-cmake/CMakeLists.txt
@@ -82,7 +82,6 @@ set(libprotobuf_lite_files
   ${protobuf_source_dir}/src/google/protobuf/any_lite.cc
   ${protobuf_source_dir}/src/google/protobuf/arena.cc
   ${protobuf_source_dir}/src/google/protobuf/arena_align.cc
-  ${protobuf_source_dir}/src/google/protobuf/arena_config.cc
   ${protobuf_source_dir}/src/google/protobuf/arenastring.cc
   ${protobuf_source_dir}/src/google/protobuf/arenaz_sampler.cc
   ${protobuf_source_dir}/src/google/protobuf/extension_set.cc
@@ -131,17 +130,18 @@ set(libprotobuf_files
   ${protobuf_source_dir}/src/google/protobuf/any_lite.cc
   ${protobuf_source_dir}/src/google/protobuf/arena.cc
   ${protobuf_source_dir}/src/google/protobuf/arena_align.cc
-  ${protobuf_source_dir}/src/google/protobuf/arena_config.cc
   ${protobuf_source_dir}/src/google/protobuf/arenastring.cc
   ${protobuf_source_dir}/src/google/protobuf/arenaz_sampler.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/importer.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/parser.cc
+  ${protobuf_source_dir}/src/google/protobuf/cpp_features.pb.cc
   ${protobuf_source_dir}/src/google/protobuf/descriptor.cc
   ${protobuf_source_dir}/src/google/protobuf/descriptor.pb.cc
   ${protobuf_source_dir}/src/google/protobuf/descriptor_database.cc
   ${protobuf_source_dir}/src/google/protobuf/dynamic_message.cc
   ${protobuf_source_dir}/src/google/protobuf/extension_set.cc
   ${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc
+  ${protobuf_source_dir}/src/google/protobuf/feature_resolver.cc
   ${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
   ${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc
   ${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc
@@ -175,6 +175,7 @@ set(libprotobuf_files
   ${protobuf_source_dir}/src/google/protobuf/message_lite.cc
   ${protobuf_source_dir}/src/google/protobuf/parse_context.cc
   ${protobuf_source_dir}/src/google/protobuf/port.cc
+  ${protobuf_source_dir}/src/google/protobuf/raw_ptr.cc
   ${protobuf_source_dir}/src/google/protobuf/reflection_mode.cc
   ${protobuf_source_dir}/src/google/protobuf/reflection_ops.cc
   ${protobuf_source_dir}/src/google/protobuf/repeated_field.cc
@@ -210,6 +211,7 @@ add_library(protobuf::libprotobuf ALIAS _libprotobuf)
 
 
 set(libprotoc_files
+  ${protobuf_source_dir}/src/google/protobuf/compiler/allowlists/editions.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/code_generator.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/command_line_interface.cc
   ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/enum.cc

From a0840d36afbddad03c06c54fca17632917bd4d7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 16 Nov 2023 12:48:27 +0100
Subject: [PATCH 0652/1097] Apply the same to arrayReduce

---
 src/Functions/array/arrayReduce.cpp | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp
index ea087f4f9a8..46777ceb05c 100644
--- a/src/Functions/array/arrayReduce.cpp
+++ b/src/Functions/array/arrayReduce.cpp
@@ -182,10 +182,19 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
         that->addBatchArray(0, input_rows_count, places.data(), 0, aggregate_arguments, offsets->data(), arena.get());
     }
 
-    for (size_t i = 0; i < input_rows_count; ++i)
-        /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
-        /// correctly if result contains AggregateFunction's states
-        agg_func.insertMergeResultInto(places[i], res_col, arena.get());
+    if (agg_func.isState())
+    {
+        for (size_t i = 0; i < input_rows_count; ++i)
+            /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
+            /// correctly if result contains AggregateFunction's states
+            agg_func.insertMergeResultInto(places[i], res_col, arena.get());
+    }
+    else
+    {
+        for (size_t i = 0; i < input_rows_count; ++i)
+            agg_func.insertResultInto(places[i], res_col, arena.get());
+    }
+
     return result_holder;
 }
 

From e8c14562ab512d5704f2ec05f8e053346c6737fd Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 16 Nov 2023 11:50:25 +0000
Subject: [PATCH 0653/1097] Bump absl to HEAD

---
 contrib/abseil-cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp
index 5655528c418..3bd86026c93 160000
--- a/contrib/abseil-cpp
+++ b/contrib/abseil-cpp
@@ -1 +1 @@
-Subproject commit 5655528c41830f733160de4fb0b99073841bae9e
+Subproject commit 3bd86026c93da5a40006fd53403dff9d5f5e30e3

From 504aeb987b220372f4a06a6e1dc8400d80eb3201 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 16 Nov 2023 12:13:13 +0000
Subject: [PATCH 0654/1097] Better messages

---
 src/Compression/CompressionCodecFPC.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index 5763d929f6c..2a6dfda7b5c 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -416,7 +416,7 @@ private:
             std::to_integer<UInt32>(bytes.front()) & MAX_ZERO_BYTE_COUNT);
 
         if (zero_byte_count1 > VALUE_SIZE || zero_byte_count2 > VALUE_SIZE) [[unlikely]]
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Invalid compressed data");
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Invalid zero byte count(s): {} and {}", zero_byte_count1, zero_byte_count2);
 
         size_t tail_size1 = VALUE_SIZE - zero_byte_count1;
         size_t tail_size2 = VALUE_SIZE - zero_byte_count2;
@@ -424,7 +424,7 @@ private:
         size_t expected_size = 0;
         if (__builtin_add_overflow(tail_size1, tail_size2, &expected_size)
             || __builtin_add_overflow(expected_size, 1, &expected_size)) [[unlikely]]
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Invalid compressed data");
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Overflow occurred while calculating expected size");
 
         if (bytes.size() < expected_size) [[unlikely]]
             throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Unexpected end of encoded sequence");

From 74ee26752497259141630411a7d66b8ff8e8f4c1 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 16 Nov 2023 12:17:22 +0000
Subject: [PATCH 0655/1097] refactoring: error is now used not only for auth

---
 programs/server/dashboard.html | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html
index 3ed37434235..2440f87abcb 100644
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@@ -14,7 +14,7 @@
             --moving-shadow-color: rgba(0, 0, 0, 0.5);
             --input-shadow-color: rgba(0, 255, 0, 1);
             --error-color: red;
-            --auth-error-color: white;
+            --global-error-color: white;
             --legend-background: rgba(255, 255, 255, 0.75);
             --title-color: #666;
             --text-color: black;
@@ -277,12 +277,12 @@
         }
 
 
-        #auth-error {
+        #global-error {
             align-self: center;
             width: 60%;
             padding: .5rem;
 
-            color: var(--auth-error-color);
+            color: var(--global-error-color);
 
             display: flex;
             flex-flow: row nowrap;
@@ -426,7 +426,7 @@
         }
 
         #mass-editor-message {
-            color: var(--auth-error-color);
+            color: var(--global-error-color);
         }
 
         #charts > div:only-child .display-only-if-more-than-one-chart {
@@ -458,7 +458,7 @@
             <div id="chart-params"></div>
         </div>
     </form>
-    <div id="auth-error"></div>
+    <div id="global-error"></div>
 </div>
 <div id="charts"></div>
 <div id="mass-editor">
@@ -1079,26 +1079,26 @@ async function draw(idx, chart, url_params, query) {
     return true;
 }
 
-function showAuthError(message) {
+function showError(message) {
     const charts = document.getElementById('charts');
     charts.style.height = '0px';
     charts.style.opacity = '0';
     document.getElementById('add').style.display = 'none';
     document.getElementById('edit').style.display = 'none';
 
-    const authError = document.getElementById('auth-error');
-    authError.textContent = message;
-    authError.style.display = 'flex';
+    const error = document.getElementById('global-error');
+    error.textContent = message;
+    error.style.display = 'flex';
 }
 
-function hideAuthError() {
+function hideError() {
     const charts = document.getElementById('charts');
     charts.style.height = 'auto';
     charts.style.opacity = '1';
 
-    const authError = document.getElementById('auth-error');
-    authError.textContent = '';
-    authError.style.display = 'none';
+    const error = document.getElementById('global-error');
+    error.textContent = '';
+    error.style.display = 'none';
 }
 
 let firstLoad = true;
@@ -1108,12 +1108,12 @@ async function drawAll() {
     const chartsArray = document.getElementsByClassName('chart');
 
     if (!firstLoad) {
-        hideAuthError();
+        hideError();
     }
     await Promise.all([...Array(queries.length)].map(async (_, i) => {
         return draw(i, chartsArray[i], params, queries[i].query).catch((e) => {
             if (!firstLoad) {
-                showAuthError(e.message);
+                showError(e.message);
             }
             return false;
         });
@@ -1197,7 +1197,7 @@ async function reloadAll(do_search) {
         }
         await drawAll();
     } catch (e) {
-        showAuthError(e.toString());
+        showError(e.toString());
     }
     enableButtons();
 }
@@ -1295,7 +1295,7 @@ async function start() {
             drawAll();
         }
     } catch (e) {
-        showAuthError(e.toString());
+        showError(e.toString());
     }
 }
 

From 3920926cd614385237731394721991144f24997e Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 16 Nov 2023 12:40:15 +0000
Subject: [PATCH 0656/1097] add docs

---
 .../en/operations/system-tables/dashboards.md | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 docs/en/operations/system-tables/dashboards.md

diff --git a/docs/en/operations/system-tables/dashboards.md b/docs/en/operations/system-tables/dashboards.md
new file mode 100644
index 00000000000..1d6876b9f8d
--- /dev/null
+++ b/docs/en/operations/system-tables/dashboards.md
@@ -0,0 +1,68 @@
+---
+slug: /en/operations/system-tables/dashboards
+---
+# dashboards
+
+Contains queries used by `/dashboard` page accessible though [HTTP interface](/docs/en/interfaces/http.md).
+This table can be useful for monitoring and troubleshooting. The table contains a row for every chart in a dashboard.
+
+:::note
+`/dashboard` page can render queries not only from `system.dashboards`, but from any table with the same schema.
+This can be useful to create custom dashboards.
+:::
+
+Example:
+
+``` sql
+SELECT *
+FROM system.dashboards
+WHERE title ILIKE '%CPU%'
+```
+
+``` text
+Row 1:
+──────
+dashboard: overview
+title:     CPU Usage (cores)
+query:     SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUVirtualTimeMicroseconds) / 1000000
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+
+Row 2:
+──────
+dashboard: overview
+title:     CPU Wait
+query:     SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUWaitMicroseconds) / 1000000
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+
+Row 3:
+──────
+dashboard: overview
+title:     OS CPU Usage (Userspace)
+query:     SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'OSUserTimeNormalized'
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+
+Row 4:
+──────
+dashboard: overview
+title:     OS CPU Usage (Kernel)
+query:     SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'OSSystemTimeNormalized'
+GROUP BY t
+ORDER BY t WITH FILL STEP {rounding:UInt32}
+```
+
+Columns:
+
+- `dashboard` (`String`) - The dashboard name.
+- `title` (`String`) - The title of a chart.
+- `query` (`String`) - The query to obtain data to be displayed.

From 4d16c096a1c7464573c5101fd9068b9d451492dc Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 16 Nov 2023 12:09:13 +0000
Subject: [PATCH 0657/1097] Use ports from cluster

---
 tests/integration/helpers/cluster.py            | 2 ++
 tests/integration/test_storage_rabbitmq/test.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 729b30ba934..cbc511628f0 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -583,6 +583,7 @@ class ClickHouseCluster:
         self.rabbitmq_host = "rabbitmq1"
         self.rabbitmq_ip = None
         self.rabbitmq_port = 5672
+        self.rabbitmq_secure_port = 5671
         self.rabbitmq_dir = p.abspath(p.join(self.instances_dir, "rabbitmq"))
         self.rabbitmq_cookie_file = os.path.join(self.rabbitmq_dir, "erlang.cookie")
         self.rabbitmq_logs_dir = os.path.join(self.rabbitmq_dir, "logs")
@@ -1316,6 +1317,7 @@ class ClickHouseCluster:
         self.with_rabbitmq = True
         env_variables["RABBITMQ_HOST"] = self.rabbitmq_host
         env_variables["RABBITMQ_PORT"] = str(self.rabbitmq_port)
+        env_variables["RABBITMQ_SECURE_PORT"] = str(self.rabbitmq_secure_port)
         env_variables["RABBITMQ_LOGS"] = self.rabbitmq_logs_dir
         env_variables["RABBITMQ_LOGS_FS"] = "bind"
         env_variables["RABBITMQ_COOKIE_FILE"] = self.rabbitmq_cookie_file
diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index adb7f59769a..021cdf54af9 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -110,9 +110,9 @@ def rabbitmq_setup_teardown():
     ],
 )
 def test_rabbitmq_select(rabbitmq_cluster, secure):
-    port = 5672
+    port = cluster.rabbitmq_port
     if secure:
-        port = 5671
+        port = cluster.rabbitmq_secure_port
 
     instance.query(
         """

From bdf038191ac85bb9f38524a96480bbf6704d3a24 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 16 Nov 2023 13:05:37 +0000
Subject: [PATCH 0658/1097] better test_keeper_broken_logs

---
 tests/integration/test_keeper_broken_logs/test.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_keeper_broken_logs/test.py b/tests/integration/test_keeper_broken_logs/test.py
index e283d946174..49b8d985ee8 100644
--- a/tests/integration/test_keeper_broken_logs/test.py
+++ b/tests/integration/test_keeper_broken_logs/test.py
@@ -1,13 +1,7 @@
 import pytest
 from helpers.cluster import ClickHouseCluster
 import helpers.keeper_utils as keeper_utils
-import random
-import string
-import os
 import time
-from multiprocessing.dummy import Pool
-from helpers.network import PartitionManager
-from helpers.test_tools import assert_eq_with_retry
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance(
@@ -82,6 +76,13 @@ def test_single_node_broken_log(started_cluster):
         node1_conn.close()
 
         node1.stop_clickhouse()
+
+        # wait until cluster stabilizes with a new leader
+        while not keeper_utils.is_leader(
+            started_cluster, node2
+        ) and not keeper_utils.is_leader(started_cluster, node3):
+            time.sleep(1)
+
         node1.exec_in_container(
             [
                 "truncate",

From dc644b96171f376b3ebd6c95bf635643c9c32606 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 16 Nov 2023 13:09:30 +0000
Subject: [PATCH 0659/1097] add backward compatibility to avoid breaking old
 URLs on new servers

---
 programs/server/dashboard.html | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html
index 2440f87abcb..123a15c5706 100644
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@@ -1276,7 +1276,11 @@ window.onpopstate = function(event) {
 
 if (window.location.hash) {
     try {
-        ({host, user, queries, params, search_query, customized} = JSON.parse(atob(window.location.hash.substring(1))));
+        let search_query_, customized_;
+        ({host, user, queries, params, search_query_, customized_} = JSON.parse(atob(window.location.hash.substring(1))));
+        // For compatibility with old URLs' hashes
+        search_query = search_query_ !== undefined ? search_query_ : search_query;
+        customized = customized_ !== undefined ? customized_ : true;
     } catch {}
 }
 

From bb68321fc153be034fb0e2234b59bf6319cdd281 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 16 Nov 2023 09:02:11 +0000
Subject: [PATCH 0660/1097] More stable test_keeper_reconfig_replace_leader

---
 tests/integration/helpers/keeper_utils.py     | 95 +++++++++++--------
 .../test.py                                   |  7 ++
 2 files changed, 65 insertions(+), 37 deletions(-)

diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py
index 83d0f2969b7..1ca17e923e4 100644
--- a/tests/integration/helpers/keeper_utils.py
+++ b/tests/integration/helpers/keeper_utils.py
@@ -37,39 +37,59 @@ class KeeperException(Exception):
 class KeeperClient(object):
     SEPARATOR = b"\a\a\a\a\n"
 
-    def __init__(self, bin_path: str, host: str, port: int):
+    def __init__(self, bin_path: str, host: str, port: int, connection_tries=30):
         self.bin_path = bin_path
         self.host = host
         self.port = port
 
-        self.proc = subprocess.Popen(
-            [
-                bin_path,
-                "keeper-client",
-                "--host",
-                host,
-                "--port",
-                str(port),
-                "--log-level",
-                "error",
-                "--tests-mode",
-                "--no-confirmation",
-            ],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
+        retry_count = 0
 
-        self.poller = select.epoll()
-        self.poller.register(self.proc.stdout)
-        self.poller.register(self.proc.stderr)
+        while True:
+            try:
+                self.proc = subprocess.Popen(
+                    [
+                        bin_path,
+                        "keeper-client",
+                        "--host",
+                        host,
+                        "--port",
+                        str(port),
+                        "--log-level",
+                        "error",
+                        "--tests-mode",
+                        "--no-confirmation",
+                    ],
+                    stdin=subprocess.PIPE,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                )
 
-        self._fd_nums = {
-            self.proc.stdout.fileno(): self.proc.stdout,
-            self.proc.stderr.fileno(): self.proc.stderr,
-        }
+                self.poller = select.epoll()
+                self.poller.register(self.proc.stdout)
+                self.poller.register(self.proc.stderr)
 
-        self.stopped = False
+                self._fd_nums = {
+                    self.proc.stdout.fileno(): self.proc.stdout,
+                    self.proc.stderr.fileno(): self.proc.stderr,
+                }
+
+                self.stopped = False
+
+                self.get("/keeper", 60.0)
+                break
+            except Exception as e:
+                retry_count += 1
+                if (
+                    "All connection tries failed while connecting to ZooKeeper"
+                    in str(e)
+                    and retry_count < connection_tries
+                ):
+                    print(
+                        f"Got exception while connecting to Keeper: {e}\nWill reconnect, reconnect count = {retry_count}"
+                    )
+                    time.sleep(1)
+                else:
+                    raise
 
     def execute_query(self, query: str, timeout: float = 60.0) -> str:
         output = io.BytesIO()
@@ -94,7 +114,7 @@ class KeeperClient(object):
                         output.write(chunk)
 
                 elif file == self.proc.stderr:
-                    assert self.proc.stdout.readline() == self.SEPARATOR
+                    self.proc.stdout.readline()
                     raise KeeperException(self.proc.stderr.readline().strip().decode())
 
             else:
@@ -221,13 +241,12 @@ NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving request
 
 
 def wait_until_connected(cluster, node, port=9181, timeout=30.0):
-    elapsed = 0.0
+    start = time.time()
 
     while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG:
         time.sleep(0.1)
-        elapsed += 0.1
 
-        if elapsed >= timeout:
+        if time.time() - start > timeout:
             raise Exception(
                 f"{timeout}s timeout while waiting for {node.name} to start serving requests"
             )
@@ -280,14 +299,16 @@ def wait_configs_equal(left_config: str, right_zk: KeeperClient, timeout: float
     Check whether get /keeper/config result in left_config is equal
     to get /keeper/config on right_zk ZK connection.
     """
-    elapsed: float = 0.0
-    while sorted(left_config.split("\n")) != sorted(
-        get_config_str(right_zk).split("\n")
-    ):
+    start = time.time()
+    left_config = sorted(left_config.split("\n"))
+    while True:
+        right_config = sorted(get_config_str(right_zk).split("\n"))
+        if left_config == right_config:
+            return
+
         time.sleep(1)
-        elapsed += 1
-        if elapsed >= timeout:
+        if time.time() - start > timeout:
             raise Exception(
                 f"timeout while checking nodes configs to get equal. "
-                f"Left: {left_config}, right: {get_config_str(right_zk)}"
+                f"Left: {left_config}, right: {right_config}"
             )
diff --git a/tests/integration/test_keeper_reconfig_replace_leader/test.py b/tests/integration/test_keeper_reconfig_replace_leader/test.py
index 4cdd48fcf7c..8e621eef279 100644
--- a/tests/integration/test_keeper_reconfig_replace_leader/test.py
+++ b/tests/integration/test_keeper_reconfig_replace_leader/test.py
@@ -3,6 +3,7 @@
 import pytest
 from helpers.cluster import ClickHouseCluster, ClickHouseInstance
 from os.path import join, dirname, realpath
+import time
 import helpers.keeper_utils as ku
 import typing as tp
 
@@ -83,6 +84,12 @@ def test_reconfig_replace_leader(started_cluster):
     assert "node3" in config
     assert "node4" not in config
 
+    # wait until cluster stabilizes with a new leader
+    while not ku.is_leader(started_cluster, node2) and not ku.is_leader(
+        started_cluster, node3
+    ):
+        time.sleep(1)
+
     # additional 20s wait before removing leader
     ku.wait_configs_equal(config, zk2, timeout=50)
 

From 85d363fb285b0b278e8cfc413c98a481d90c3476 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Thu, 16 Nov 2023 14:58:52 +0100
Subject: [PATCH 0661/1097] Update tests

---
 .../0_stateless/00727_concat.reference        |  6 +++++
 tests/queries/0_stateless/00727_concat.sql    | 23 ++++++++++++++-----
 .../0_stateless/02233_interpolate_1.sql       |  2 +-
 .../02389_analyzer_nested_lambda.reference    |  2 +-
 .../02389_analyzer_nested_lambda.sql          |  2 +-
 .../02521_analyzer_array_join_crash.reference |  2 +-
 .../02521_analyzer_array_join_crash.sql       |  2 +-
 7 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference
index 9b6a8b3857b..7c48ba97c2b 100644
--- a/tests/queries/0_stateless/00727_concat.reference
+++ b/tests/queries/0_stateless/00727_concat.reference
@@ -25,6 +25,7 @@ With bar
 With foo
 With bar
 With 42
+With 42
 With fae310ca-d52a-4923-9e9b-02bf67f4b009
 With 2023-11-14
 With 2123-11-14
@@ -41,6 +42,11 @@ With (42,43)
 With [(0,0),(10,0),(10,10),(0,10)]
 With [[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]
 With [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]]
+-- SimpleAggregateFunction
+With 42
+With 4
+-- Nested
+With [(\'foo\',\'qaz\'),(\'bar\',\'qux\')]
 -- NULL arguments
 \N
 \N
diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql
index ba76ff53884..7d901514aea 100644
--- a/tests/queries/0_stateless/00727_concat.sql
+++ b/tests/queries/0_stateless/00727_concat.sql
@@ -1,8 +1,6 @@
 -- Tags: no-fasttest
 -- no-fasttest: json type needs rapidjson library, geo types need s2 geometry
 
--- not tested here: (Simple)AggregateFunction, Nested
-
 SET allow_experimental_object_type = 1;
 SET allow_suspicious_low_cardinality_types=1;
 
@@ -33,11 +31,12 @@ SELECT concat('With ', materialize('bar' :: LowCardinality(FixedString(3))));
 SELECT concat('With ', materialize('foo' :: LowCardinality(Nullable(String))));
 SELECT concat('With ', materialize('bar' :: LowCardinality(Nullable(FixedString(3)))));
 SELECT concat('With ', materialize(42 :: LowCardinality(Nullable(UInt32))));
+SELECT concat('With ', materialize(42 :: LowCardinality(UInt32)));
 SELECT concat('With ', materialize('fae310ca-d52a-4923-9e9b-02bf67f4b009' :: UUID));
 SELECT concat('With ', materialize('2023-11-14' :: Date));
 SELECT concat('With ', materialize('2123-11-14' :: Date32));
-SELECT concat('With ', materialize('2023-11-14 05:50:12' :: DateTime));
-SELECT concat('With ', materialize('2023-11-14 05:50:12.123' :: DateTime64(3)));
+SELECT concat('With ', materialize('2023-11-14 05:50:12' :: DateTime('Europe/Amsterdam')));
+SELECT concat('With ', materialize('2023-11-14 05:50:12.123' :: DateTime64(3, 'Europe/Amsterdam')));
 SELECT concat('With ', materialize('hallo' :: Enum('hallo' = 1)));
 SELECT concat('With ', materialize(['foo', 'bar'] :: Array(String)));
 SELECT concat('With ', materialize('{"foo": "bar"}' :: JSON));
@@ -50,14 +49,23 @@ SELECT concat('With ', materialize([(0,0),(10,0),(10,10),(0,10)] :: Ring));
 SELECT concat('With ', materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]] :: Polygon));
 SELECT concat('With ', materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]] :: MultiPolygon));
 
+SELECT '-- SimpleAggregateFunction';
+CREATE OR REPLACE TABLE concat_saf_test(x SimpleAggregateFunction(max, Int32)) ENGINE=MergeTree ORDER BY tuple();
+INSERT INTO concat_saf_test VALUES (42);
+INSERT INTO concat_saf_test SELECT max(number) FROM numbers(5);
+SELECT concat('With ', x) FROM concat_saf_test ORDER BY x DESC;
+
+SELECT '-- Nested';
+CREATE OR REPLACE TABLE concat_nested_test(kv Nested(k String, v String)) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO concat_nested_test VALUES (['foo', 'bar'], ['qaz', 'qux']);
+SELECT concat('With ', kv) FROM concat_nested_test;
+
 SELECT '-- NULL arguments';
 SELECT concat(NULL, NULL);
 SELECT concat(NULL, materialize(NULL :: Nullable(UInt64)));
 SELECT concat(materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));
-
 SELECT concat(42, materialize(NULL :: Nullable(UInt64)));
 SELECT concat('42', materialize(NULL :: Nullable(UInt64)));
-
 SELECT concat(42, materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));
 SELECT concat('42', materialize(NULL :: Nullable(UInt64)), materialize(NULL :: Nullable(UInt64)));
 
@@ -72,3 +80,6 @@ SELECT concat(42, 144);
 SELECT concat(42, 144, 255);
 
 SELECT CONCAT('Testing the ', 'alias');
+
+SELECT concat();  -- { serverError 42 }
+SELECT concat(1); -- { serverError 42 }
diff --git a/tests/queries/0_stateless/02233_interpolate_1.sql b/tests/queries/0_stateless/02233_interpolate_1.sql
index 3d416b27f45..d589a18421b 100644
--- a/tests/queries/0_stateless/02233_interpolate_1.sql
+++ b/tests/queries/0_stateless/02233_interpolate_1.sql
@@ -26,7 +26,7 @@ SELECT n, source, inter FROM (
 # Test INTERPOLATE with incompatible expression - should produce error
 SELECT n, source, inter FROM (
    SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1
-) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS inter||'inter'); -- { serverError 44 }
+) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS reverse(inter)); -- { serverError 44 }
 
 # Test INTERPOLATE with column from WITH FILL expression - should produce error
 SELECT n, source, inter FROM (
diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference b/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference
index 935c53358c0..68eb282a6a1 100644
--- a/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference
+++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference
@@ -117,5 +117,5 @@ SELECT arrayMap(x -> concat(concat(concat(concat(concat(toString(id), '___\0____
 FROM test_table WHERE concat(concat(concat(toString(id), '___\0_______\0____'), toString(id)), concat(toString(id), NULL), toString(id));
 SELECT '--';
 --
-SELECT arrayMap(x -> concat(toString(id), arrayMap(x -> toString(1), [NULL])), [NULL]) FROM test_table; -- { serverError 44 };
+SELECT arrayMap(x -> splitByChar(toString(id), arrayMap(x -> toString(1), [NULL])), [NULL]) FROM test_table; -- { serverError 44 };
 DROP TABLE test_table;
diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql
index 8f8b5537da9..48e84246d1c 100644
--- a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql
+++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql
@@ -122,7 +122,7 @@ FROM test_table WHERE concat(concat(concat(toString(id), '___\0_______\0____'),
 
 SELECT '--';
 
-SELECT arrayMap(x -> concat(toString(id), arrayMap(x -> toString(1), [NULL])), [NULL]) FROM test_table; -- { serverError 44 };
+SELECT arrayMap(x -> splitByChar(toString(id), arrayMap(x -> toString(1), [NULL])), [NULL]) FROM test_table; -- { serverError 44 };
 
 DROP TABLE test_table;
 
diff --git a/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference b/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference
index 59da8ccad1a..5e7728e0590 100644
--- a/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference
+++ b/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference
@@ -8,4 +8,4 @@ SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_el
 0	[1,2,3]	3
 SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element, arrayMap(x -> value_element, ['']) AS value;
 1048577	[1048577]
-SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> concat(x, elem, ''), ['']) AS unused; -- { serverError 44 }
+SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError 44 }
diff --git a/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql b/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql
index c7641a3bee0..53606e01ab7 100644
--- a/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql
+++ b/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql
@@ -17,7 +17,7 @@ SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_el
 
 SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element, arrayMap(x -> value_element, ['']) AS value;
 
-SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> concat(x, elem, ''), ['']) AS unused; -- { serverError 44 }
+SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError 44 }
 
 -- { echoOff }
 

From ddca2c2187d42b39fb139460cd32eb7e71adbce1 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 16 Nov 2023 14:29:53 +0100
Subject: [PATCH 0662/1097] server side waiting

---
 src/Storages/MergeTree/DataPartsExchange.cpp  | 113 ++++++++----------
 src/Storages/MergeTree/MergeTreeSettings.h    |   4 +-
 tests/clickhouse-test                         |   2 +-
 ...916_replication_protocol_wait_for_part.sql |   7 +-
 4 files changed, 57 insertions(+), 69 deletions(-)

diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 7fd6f59ed69..c39263a0b73 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -65,7 +65,6 @@ constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID = 5;
 constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY = 6;
 constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION = 7;
 constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_METADATA_VERSION = 8;
-constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE = 9;
 
 std::string getEndpointId(const std::string & node_id)
 {
@@ -121,7 +120,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
     MergeTreePartInfo::fromPartName(part_name, data.format_version);
 
     /// We pretend to work as older server version, to be sure that client will correctly process our version
-    response.addCookie({"server_protocol_version", toString(std::min(client_protocol_version, REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE))});
+    response.addCookie({"server_protocol_version", toString(std::min(client_protocol_version, REPLICATION_PROTOCOL_VERSION_WITH_METADATA_VERSION))});
 
     LOG_TRACE(log, "Sending part {}", part_name);
 
@@ -139,29 +138,6 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
     {
         part = findPart(part_name);
 
-        /// Ephemeral zero-copy lock may be lost for PreActive parts
-        /// do not expose PreActive parts
-        if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE)
-        {
-            bool part_is_ready = part->getState() != MergeTreeDataPartState::PreActive;
-            writeBinary(part_is_ready, out);
-
-            if (!part_is_ready)
-            {
-                LOG_TRACE(log, "Part {} is in PreActive state, reply to the client that part is not ready yet", part_name);
-                return;
-            }
-        }
-        else
-        {
-            bool zero_copy_enabled = data.getSettings()->allow_remote_fs_zero_copy_replication;
-            if (part->getState() == MergeTreeDataPartState::PreActive && zero_copy_enabled)
-            {
-                /// report error, client will try again later, error message would be printed
-                throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in table", part_name);
-            }
-        }
-
         CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedSend};
 
         if (part->getDataPartStorage().isStoredOnRemoteDisk())
@@ -373,6 +349,25 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk(
     return data_checksums;
 }
 
+bool wait_loop(UInt32 wait_timeout_ms, std::function<bool()> pred)
+{
+    static const UInt32 loop_delay_ms = 5;
+
+    /// this is sleep-based wait, it has to be short
+    chassert(wait_timeout_ms < 2000);
+
+    if (pred())
+        return true;
+
+    Stopwatch timer;
+    while (!pred() && timer.elapsedMilliseconds() < wait_timeout_ms)
+    {
+        sleepForMilliseconds(loop_delay_ms);
+    }
+
+    return pred();
+}
+
 MergeTreeData::DataPartPtr Service::findPart(const String & name)
 {
     /// It is important to include Outdated parts here because remote replicas cannot reliably
@@ -381,10 +376,26 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name)
 
     part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated});
 
-    if (part)
+    if (!part)
+        throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in table", name);
+
+    bool zero_copy_enabled = data.getSettings()->allow_remote_fs_zero_copy_replication;
+    if (!zero_copy_enabled)
         return part;
 
-    throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in table", name);
+    /// Ephemeral zero-copy lock may be lost for PreActive parts
+    /// do not expose PreActive parts for zero-copy
+
+    static const UInt32 wait_timeout_ms = 1000;
+    bool pred_result = wait_loop(wait_timeout_ms, [&] () { return part->getState() != MergeTreeDataPartState::PreActive; });
+
+    if (!pred_result)
+        throw Exception(
+                ErrorCodes::ABORTED,
+                "Part {} is in PreActive state for {} ms. Another host has to be asked.",
+                name, wait_timeout_ms);
+
+    return part;
 }
 
 Fetcher::Fetcher(StorageReplicatedMergeTree & data_)
@@ -442,7 +453,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected
     {
         {"endpoint",                endpoint_id},
         {"part",                    part_name},
-        {"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE)},
+        {"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_METADATA_VERSION)},
         {"compress",                "false"}
     });
 
@@ -500,43 +511,17 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected
         creds.setPassword(password);
     }
 
-    std::unique_ptr<PooledReadWriteBufferFromHTTP> in;
-    int server_protocol_version = 0;
-    bool part_is_ready = true;
-
-    static const UInt32 part_not_ready_attempts = 5;
-    static const UInt32 wait_sleep_time_ms = 100;
-
-    for (UInt32 attempt = 1; attempt <= part_not_ready_attempts; ++attempt)
-    {
-        in = std::make_unique<PooledReadWriteBufferFromHTTP>(
-            uri,
-            Poco::Net::HTTPRequest::HTTP_POST,
-            nullptr,
-            timeouts,
-            creds,
-            DBMS_DEFAULT_BUFFER_SIZE,
-            0, /* no redirects */
-            static_cast<uint64_t>(data_settings->replicated_max_parallel_fetches_for_host));
-
-        server_protocol_version = parse<int>(in->getResponseCookie("server_protocol_version", "0"));
-
-        if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_WAITING_PREACTIVE)
-            readBinary(part_is_ready, *in);
-
-        if (part_is_ready)
-            break;
-
-        sleepForMilliseconds(wait_sleep_time_ms);
-
-        if (blocker.isCancelled())
-            throw Exception(ErrorCodes::ABORTED, "Fetching of part was cancelled");
-    }
-
-    if (!part_is_ready)
-        throw Exception(ErrorCodes::ABORTED, "Part {} is still not ready in host {} after {} attempts, try another host",
-                        part_name, host, part_not_ready_attempts);
+    std::unique_ptr<PooledReadWriteBufferFromHTTP> in = std::make_unique<PooledReadWriteBufferFromHTTP>(
+        uri,
+        Poco::Net::HTTPRequest::HTTP_POST,
+        nullptr,
+        timeouts,
+        creds,
+        DBMS_DEFAULT_BUFFER_SIZE,
+        0, /* no redirects */
+        static_cast<uint64_t>(data_settings->replicated_max_parallel_fetches_for_host));
 
+    int server_protocol_version = parse<int>(in->getResponseCookie("server_protocol_version", "0"));
     String remote_fs_metadata = parse<String>(in->getResponseCookie("remote_fs_metadata", ""));
 
     DiskPtr preffered_disk = disk;
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 15c54ee3791..41476bab5b1 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -83,7 +83,7 @@ struct Settings;
     M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
-    M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0)                                                                                                                                  \
+    M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0) \
     M(Milliseconds, sleep_before_commit_local_part_in_replicated_table_ms, 0, "For testing. Do not change it.", 0) \
     \
     /* Part removal settings. */ \
@@ -122,7 +122,7 @@ struct Settings;
     M(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
     M(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \
     M(Float, fault_probability_before_part_commit, 0, "For testing. Do not change it.", 0) \
-    M(Float, fault_probability_after_part_commit, 0, "For testing. Do not change it.", 0)  \
+    M(Float, fault_probability_after_part_commit, 0, "For testing. Do not change it.", 0) \
     \
     /** Check delay of replicas settings. */ \
     M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 048f848ff27..053bd040bce 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -674,7 +674,7 @@ class MergeTreeSettingsRandomizer:
         "replace_long_file_name_to_hash": lambda: random.randint(0, 1),
         "max_file_name_length": threshold_generator(0.3, 0.3, 0, 128),
         "sleep_before_commit_local_part_in_replicated_table_ms": threshold_generator(
-            0.3, 0.3, 0, 250
+            0.7, 0.7, 0, 100
         ),
     }
 
diff --git a/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql b/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql
index ed9cfd00b45..97ef33f96e8 100644
--- a/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql
+++ b/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql
@@ -7,7 +7,7 @@ create table tableIn (n int)
     settings
         storage_policy='s3_cache',
         allow_remote_fs_zero_copy_replication=1,
-        sleep_before_commit_local_part_in_replicated_table_ms=50000;
+        sleep_before_commit_local_part_in_replicated_table_ms=5000;
 create table tableOut (n int)
     engine=ReplicatedMergeTree('/test/02916/{database}/table', '2')
     order by tuple()
@@ -15,9 +15,12 @@ create table tableOut (n int)
         storage_policy='s3_cache',
         allow_remote_fs_zero_copy_replication=1;
 
-SET send_logs_level = 'error';
+SET send_logs_level='error';
 
 insert into tableIn values(1);
 insert into tableIn values(2);
 system sync replica tableOut;
 select count() from tableOut;
+
+drop table tableIn
+drop table tableOut

From b13cd9792b599d3686a73f9114fadda7d0e090f0 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 16 Nov 2023 14:46:57 +0000
Subject: [PATCH 0663/1097] Fix cross build

---
 contrib/google-protobuf-cmake/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt
index 1ed4133270b..89bdbb89eca 100644
--- a/contrib/google-protobuf-cmake/CMakeLists.txt
+++ b/contrib/google-protobuf-cmake/CMakeLists.txt
@@ -369,7 +369,7 @@ else ()
                 "-Dprotobuf_BUILD_PROTOC_BINARIES=1"
                 "-DABSL_ROOT_DIR=${abseil_source_dir}"
                 "-DABSL_ENABLE_INSTALL=0"
-                "${protobuf_source_dir}/cmake"
+                "${protobuf_source_dir}"
             WORKING_DIRECTORY "${PROTOC_BUILD_DIR}"
             COMMAND_ECHO STDOUT)
 

From 7c758d1b2d872df670b547a540f0853cf19436f3 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 16 Nov 2023 15:30:16 +0100
Subject: [PATCH 0664/1097] Define the INIT_ENVIRONMENT through instances tags

---
 tests/ci/worker/init_runner.sh | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh
index a2737575955..0b610a108f3 100644
--- a/tests/ci/worker/init_runner.sh
+++ b/tests/ci/worker/init_runner.sh
@@ -5,10 +5,10 @@ The following content is embedded into the s3 object via the script
 deploy-runner-init.sh {staging,production}
 with additional helping information
 
-In the `user data` you should define as the following
-with appropriate <ENVIRONMENT> as 'staging' or 'production':
+In the `user data` you should define as the following text
+between `### COPY BELOW` and `### COPY ABOVE`
 
-### COPY AFTER
+### COPY BELOW
 Content-Type: multipart/mixed; boundary="//"
 MIME-Version: 1.0
 
@@ -29,11 +29,14 @@ Content-Transfer-Encoding: 7bit
 Content-Disposition: attachment; filename="userdata.txt"
 
 #!/bin/bash
-aws s3 cp s3://github-runners-data/cloud-init/<ENVIRONMENT>.sh /tmp/cloud-init.sh
+INSTANCE_ID=$(ec2metadata --instance-id)
+INIT_ENVIRONMENT=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:init-environment'].Value" --output text)
+echo "Downloading and using $INIT_ENVIRONMENT cloud-init.sh"
+aws s3 cp "s3://github-runners-data/cloud-init/${INIT_ENVIRONMENT:-production}.sh" /tmp/cloud-init.sh
 chmod 0700 /tmp/cloud-init.sh
 exec bash /tmp/cloud-init.sh
 --//
-### COPY BEFORE
+### COPY ABOVE
 EOF
 
 # THE SCRIPT START

From f02ed02e4a4e4f4f90f29f6594c2b7ea2c5a8cee Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 16 Nov 2023 16:35:34 +0100
Subject: [PATCH 0665/1097] Terminate spot instances older than 1h

---
 tests/ci/worker/init_runner.sh | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh
index 0b610a108f3..017d847739f 100644
--- a/tests/ci/worker/init_runner.sh
+++ b/tests/ci/worker/init_runner.sh
@@ -130,6 +130,23 @@ terminate_and_exit() {
 
 declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh
 
+check_spot_instance_is_old() {
+    # This function should be executed ONLY BETWEEN runnings.
+    # It's unsafe to execute while the runner is working!
+    local LIFE_CYCLE
+    LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle)
+    if [ "$LIFE_CYCLE" == "spot" ]; then
+        local UPTIME
+        UPTIME=$(< /proc/uptime)
+        UPTIME=${UPTIME%%.*}
+        if (( 3600 < UPTIME )); then
+            echo "The spot instance has uptime $UPTIME, it's time to shut it down"
+            return 0
+        fi
+    fi
+    return 1
+}
+
 check_proceed_spot_termination() {
     # The function checks and proceeds spot instance termination if exists
     # The event for spot instance termination
@@ -161,6 +178,7 @@ no_terminating_metadata() {
     # The event for rebalance recommendation. Not strict, so we have some room to make a decision here
     if curl -s --fail http://169.254.169.254/latest/meta-data/events/recommendations/rebalance; then
         echo 'Received recommendation to rebalance, checking the uptime'
+        local UPTIME
         UPTIME=$(< /proc/uptime)
         UPTIME=${UPTIME%%.*}
         # We don't shutdown the instances younger than 30m
@@ -302,6 +320,7 @@ while true; do
         # If runner is not active, check that it needs to terminate itself
         echo "Checking if the instance suppose to terminate"
         no_terminating_metadata || terminate_on_event
+        check_spot_instance_is_old && terminate_and_exit
         check_proceed_spot_termination
 
         echo "Going to configure runner"
@@ -311,6 +330,7 @@ while true; do
 
         echo "Another one check to avoid race between runner and infrastructure"
         no_terminating_metadata || terminate_on_event
+        check_spot_instance_is_old && terminate_and_exit
         check_proceed_spot_termination
 
         echo "Run"

From cc64397e9282091d073386a5fd912689257b1445 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 16 Nov 2023 19:21:58 +0300
Subject: [PATCH 0666/1097] Planner support transactions

---
 src/Planner/Planner.cpp | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 89166316261..12e8d795347 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -116,7 +116,7 @@ namespace
 void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context)
 {
     const auto & query_context = planner_context->getQueryContext();
-    if (query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
+    if (!query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
         return;
 
     if (!query_context->getCurrentTransaction())
@@ -130,13 +130,11 @@ void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context)
         else if (auto * table_function_node = table_expression->as<TableFunctionNode>())
             storage = table_function_node->getStorage();
 
-        if (storage->supportsTransactions())
-            continue;
-
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "Storage {} (table {}) does not support transactions",
-            storage->getName(),
-            storage->getStorageID().getNameForLogs());
+        if (storage && !storage->supportsTransactions())
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                "Storage {} (table {}) does not support transactions",
+                storage->getName(),
+                storage->getStorageID().getNameForLogs());
     }
 }
 
@@ -1333,9 +1331,9 @@ void Planner::buildPlanForQueryNode()
         query_node.getHaving() = {};
     }
 
-    checkStoragesSupportTransactions(planner_context);
     collectSets(query_tree, *planner_context);
     collectTableExpressionData(query_tree, planner_context);
+    checkStoragesSupportTransactions(planner_context);
 
     if (!select_query_options.only_analyze)
         collectFiltersForAnalysis(query_tree, planner_context);

From 00569baf4b9703891918be979f192d425dc81a2e Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 16 Nov 2023 19:23:55 +0300
Subject: [PATCH 0667/1097] Updated analyzer failed tests

---
 tests/analyzer_tech_debt.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index e155ee72ebb..d969b9e6fad 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -6,7 +6,6 @@
 01064_incremental_streaming_from_2_src_with_feedback
 01083_expressions_in_engine_arguments
 01155_rename_move_materialized_view
-01173_transaction_control_queries
 01214_test_storage_merge_aliases_with_where
 01244_optimize_distributed_group_by_sharding_key
 01268_mv_scalars
@@ -30,7 +29,6 @@
 02139_MV_with_scalar_subquery
 02174_cte_scalar_cache_mv
 02302_s3_file_pruning
-02345_implicit_transaction
 02352_grouby_shadows_arg
 02354_annoy
 02428_parameterized_view

From 49c58e76099b6a3771182c9bd093159054d707a0 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 16 Nov 2023 16:24:09 +0000
Subject: [PATCH 0668/1097] Disable RabbitMQ secure connection test in
 intergatiion test with TSAN

---
 tests/integration/test_storage_rabbitmq/test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 021cdf54af9..4f4d4dec02f 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -110,6 +110,9 @@ def rabbitmq_setup_teardown():
     ],
 )
 def test_rabbitmq_select(rabbitmq_cluster, secure):
+    if secure and instance.is_built_with_memory_sanitizer():
+        pytest.skip("Data races: see https://github.com/ClickHouse/ClickHouse/issues/56866")
+
     port = cluster.rabbitmq_port
     if secure:
         port = cluster.rabbitmq_secure_port

From d03a1aab7bea331738d915d23b7353ccbbac9cf4 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 16 Nov 2023 16:39:57 +0000
Subject: [PATCH 0669/1097] Automatic style fix

---
 tests/integration/test_storage_rabbitmq/test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 4f4d4dec02f..cb34f7203d6 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -111,7 +111,9 @@ def rabbitmq_setup_teardown():
 )
 def test_rabbitmq_select(rabbitmq_cluster, secure):
     if secure and instance.is_built_with_memory_sanitizer():
-        pytest.skip("Data races: see https://github.com/ClickHouse/ClickHouse/issues/56866")
+        pytest.skip(
+            "Data races: see https://github.com/ClickHouse/ClickHouse/issues/56866"
+        )
 
     port = cluster.rabbitmq_port
     if secure:

From 472cfdc86d73b0a8135f2f135c4fa6118fcfa287 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 16 Nov 2023 17:47:51 +0100
Subject: [PATCH 0670/1097] Review fix

---
 src/Core/Settings.h                             | 2 +-
 src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 5 +++--
 src/IO/ReadSettings.h                           | 2 +-
 src/Interpreters/Context.cpp                    | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ee503322e2a..3a65496adc2 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -723,7 +723,7 @@ class IColumn;
     M(Bool, skip_download_if_exceeds_query_cache, true, "Skip download from remote filesystem if exceeds query cache size", 0) \
     M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \
     M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \
-    M(UInt64, filesystem_cache_getorset_batch_size, 20, "A batch size for holding file segments for a single read range", 0) \
+    M(UInt64, filesystem_cache_segments_batch_size, 20, "Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache", 0) \
     \
     M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \
     M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 27d0b6706a6..06ee9eb4e85 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -116,18 +116,19 @@ void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog(
 
 bool CachedOnDiskReadBufferFromFile::nextFileSegmentsBatch()
 {
+    chassert(!file_segments || file_segments->empty());
     size_t size = getRemainingSizeToRead();
     if (!size)
         return false;
 
     if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
     {
-        file_segments = cache->get(cache_key, file_offset_of_buffer_end, size, settings.filesystem_cache_getorset_batch_size);
+        file_segments = cache->get(cache_key, file_offset_of_buffer_end, size, settings.filesystem_cache_segments_batch_size);
     }
     else
     {
         CreateFileSegmentSettings create_settings(FileSegmentKind::Regular);
-        file_segments = cache->getOrSet(cache_key, file_offset_of_buffer_end, size, file_size.value(), create_settings, settings.filesystem_cache_getorset_batch_size);
+        file_segments = cache->getOrSet(cache_key, file_offset_of_buffer_end, size, file_size.value(), create_settings, settings.filesystem_cache_segments_batch_size);
     }
     return !file_segments->empty();
 }
diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h
index 197ae563d25..4c8a6cb020a 100644
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@@ -100,7 +100,7 @@ struct ReadSettings
     bool enable_filesystem_cache_log = false;
     /// Don't populate cache when the read is not part of query execution (e.g. background thread).
     bool avoid_readthrough_cache_outside_query_context = true;
-    size_t filesystem_cache_getorset_batch_size = 100;
+    size_t filesystem_cache_segments_batch_size = 20;
 
     size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024);
     bool skip_download_if_exceeds_query_cache = true;
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index c2b1db2fe18..ec56412b74b 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4761,7 +4761,7 @@ ReadSettings Context::getReadSettings() const
     res.enable_filesystem_cache = settings.enable_filesystem_cache;
     res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache;
     res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log;
-    res.filesystem_cache_getorset_batch_size = settings.filesystem_cache_getorset_batch_size;
+    res.filesystem_cache_segments_batch_size = settings.filesystem_cache_segments_batch_size;
 
     res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size;
     res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache;

From 2ec96f9e9e737b2805f0408c3639c19760b445af Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Thu, 16 Nov 2023 17:56:06 +0100
Subject: [PATCH 0671/1097] Update 01052_window_view_proc_tumble_to_now.sh

---
 .../queries/0_stateless/01052_window_view_proc_tumble_to_now.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh
index e75b7d9570b..4325ebeed24 100755
--- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh
+++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh
@@ -16,7 +16,7 @@ DROP TABLE IF EXISTS wv;
 
 CREATE TABLE dst(count UInt64) Engine=MergeTree ORDER BY tuple();
 CREATE TABLE mt(a Int32) ENGINE=MergeTree ORDER BY tuple();
-CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY tumble(now('US/Samoa'), INTERVAL '5' SECOND, 'US/Samoa') AS wid;
+CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY tumble(now('US/Samoa'), INTERVAL '10' SECOND, 'US/Samoa') AS wid;
 
 INSERT INTO mt VALUES (1);
 EOF

From 409f781c0907f4862ddf57d80c6e1adc1ad8b77e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 16 Nov 2023 18:10:26 +0100
Subject: [PATCH 0672/1097] Fix test

---
 tests/queries/0_stateless/01119_session_log.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01119_session_log.sql b/tests/queries/0_stateless/01119_session_log.sql
index 8f6967b89ec..55f6228797a 100644
--- a/tests/queries/0_stateless/01119_session_log.sql
+++ b/tests/queries/0_stateless/01119_session_log.sql
@@ -4,7 +4,7 @@ select * from remote('127.0.0.2', system, one, 'default', '');
 select * from remote('127.0.0.2', system, one, 'default', 'wrong password'); -- { serverError AUTHENTICATION_FAILED }
 select * from remote('127.0.0.2', system, one, 'nonexistsnt_user_1119', ''); -- { serverError AUTHENTICATION_FAILED }
 set receive_timeout=1;
-select * from remote('127.0.0.2', system, one, ' INTERSERVER SECRET ', ''); -- { serverError AUTHENTICATION_FAILED }
+select * from remote('127.0.0.2', system, one, ' INTERSERVER SECRET ', ''); -- { serverError NO_REMOTE_SHARD_AVAILABLE }
 set receive_timeout=300;
 select * from remote('127.0.0.2', system, one, '   ', ''); -- { serverError AUTHENTICATION_FAILED }
 

From d24757bbbf6b4050429d5e2d4f1e7ecfd9fed93d Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 16 Nov 2023 18:11:40 +0100
Subject: [PATCH 0673/1097] Add assertions

---
 src/Interpreters/Cache/FileCache.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 5de24977db5..0591038fc1d 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -801,6 +801,11 @@ void FileCache::removePathIfExists(const String & path)
 void FileCache::removeAllReleasable()
 {
     assertInitialized();
+
+#ifdef ABORT_ON_LOGICAL_ERROR
+    assertCacheCorrectness();
+#endif
+
     metadata.removeAllKeys(/* if_releasable */true);
 
     if (stash)

From fdf5cfdec09d8816c1fe067fc548c6b43ba37497 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Thu, 16 Nov 2023 18:54:09 +0100
Subject: [PATCH 0674/1097] Update FileCacheSettings.cpp

---
 src/Interpreters/Cache/FileCacheSettings.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp
index ad414530c2a..e333d9a3cd8 100644
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@@ -58,7 +58,7 @@ void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetStrin
 
     if (has("load_metadata_threads"))
         load_metadata_threads = get_uint("load_metadata_threads");
-  
+
     if (boundary_alignment > max_file_segment_size)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `boundary_alignment` cannot exceed `max_file_segment_size`");
 }

From bcf6a66aa2f50909dcd38b7cd7f9f8e2908aeba3 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Thu, 16 Nov 2023 09:58:00 -0800
Subject: [PATCH 0675/1097] Switch from DWARF 4 to 5 and see what happens
 (#56770)

* Switch from DWARF 4 to 5 and see what happens

* Update comment in Dwarf.h about which versions are supported
---
 CMakeLists.txt     | 2 +-
 src/Common/Dwarf.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4bba7d14e25..73a84bd9b5a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -320,7 +320,7 @@ endif ()
 set (COMPILER_FLAGS "${COMPILER_FLAGS}")
 
 # Our built-in unwinder only supports DWARF version up to 4.
-set (DEBUG_INFO_FLAGS "-g -gdwarf-4")
+set (DEBUG_INFO_FLAGS "-g")
 
 # Disable omit frame pointer compiler optimization using -fno-omit-frame-pointer
 option(DISABLE_OMIT_FRAME_POINTER "Disable omit frame pointer compiler optimization" OFF)
diff --git a/src/Common/Dwarf.h b/src/Common/Dwarf.h
index 3b717305d70..8f8df3acd1a 100644
--- a/src/Common/Dwarf.h
+++ b/src/Common/Dwarf.h
@@ -46,8 +46,8 @@ class Elf;
  * can parse Debug Information Entries (DIEs), abbreviations, attributes (of
  * all forms), and we can interpret bytecode for the line number VM.
  *
- * We can interpret DWARF records of version 2, 3, or 4, although we don't
- * actually support many of the version 4 features (such as VLIW, multiple
+ * We can interpret DWARF records of version 2, 3, 4, or 5, although we don't
+ * actually support many of the features of versions 4 and 5 (such as VLIW, multiple
  * operations per instruction)
  *
  * Note that the DWARF record parser does not allocate heap memory at all.

From 427d9fff7092def0debeba02e9650ef5cf156755 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Thu, 16 Nov 2023 19:13:25 +0100
Subject: [PATCH 0676/1097] Set limit for memory

---
 docker/test/fuzzer/run-fuzzer.sh | 1 +
 tests/ci/stress.py               | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index af1ce0c4dd4..fd977b4b420 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -234,6 +234,7 @@ quit
     # SC2046: Quote this to prevent word splitting. Actually I need word splitting.
     # shellcheck disable=SC2012,SC2046
     timeout -s TERM --preserve-status 30m clickhouse-client \
+        --max-memory-usage-for-client=1000000000 \
         --receive_timeout=10 \
         --receive_data_timeout_ms=10000 \
         --stacktrace \
diff --git a/tests/ci/stress.py b/tests/ci/stress.py
index ae918363df7..0a6b379e714 100755
--- a/tests/ci/stress.py
+++ b/tests/ci/stress.py
@@ -125,7 +125,7 @@ def call_with_retry(query: str, timeout: int = 30, retry_count: int = 5) -> None
 def make_query_command(query: str) -> str:
     return (
         f'clickhouse client -q "{query}" --max_untracked_memory=1Gi '
-        "--memory_profiler_step=1Gi --max_memory_usage_for_user=0"
+        "--memory_profiler_step=1Gi --max_memory_usage_for_user=0 --max-memory-usage-for-client=1000000000"
     )
 
 
From d83cf03c30f5cde7d7442c6f01da04f8fb7c2a3a Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 16 Nov 2023 19:21:27 +0100
Subject: [PATCH 0677/1097] no randomization
 sleep_before_commit_local_part_in_replicated_table_ms

---
 tests/clickhouse-test | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 053bd040bce..cab7d7e79ff 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -673,9 +673,6 @@ class MergeTreeSettingsRandomizer:
         "primary_key_compress_block_size": lambda: random.randint(8000, 100000),
         "replace_long_file_name_to_hash": lambda: random.randint(0, 1),
         "max_file_name_length": threshold_generator(0.3, 0.3, 0, 128),
-        "sleep_before_commit_local_part_in_replicated_table_ms": threshold_generator(
-            0.7, 0.7, 0, 100
-        ),
     }
 
     @staticmethod

From 2a1467b8da19bfb91edcb1c4f986697f7c2f13e3 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Thu, 16 Nov 2023 19:33:17 +0100
Subject: [PATCH 0678/1097] Update test_storage_s3_queue/test.py

---
 tests/integration/test_storage_s3_queue/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py
index 9f41cfd176d..ec27b732634 100644
--- a/tests/integration/test_storage_s3_queue/test.py
+++ b/tests/integration/test_storage_s3_queue/test.py
@@ -734,7 +734,7 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
         create_mv(instance, table_name, dst_table_name)
 
     total_values = generate_random_files(
-        started_cluster, files_path, files_to_generate, row_num=1
+        started_cluster, files_path, files_to_generate, row_num=50
     )
 
     def get_count(node, table_name):

From 28e0c51e3fdcf4692ad18d9e535a9ff4771d3955 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 16 Nov 2023 20:46:17 +0100
Subject: [PATCH 0679/1097] Update avg_weighted.xml (#56797)

---
 tests/performance/avg_weighted.xml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/performance/avg_weighted.xml b/tests/performance/avg_weighted.xml
index 5aa89b08c35..edf3c19fdfa 100644
--- a/tests/performance/avg_weighted.xml
+++ b/tests/performance/avg_weighted.xml
@@ -27,10 +27,6 @@
     <query>SELECT avg(num_f)                FROM perf_avg FORMAT Null</query>
     <query>SELECT avgWeighted(num_f, num)   FROM perf_avg FORMAT Null</query>
     <query>SELECT avgWeighted(num_f, num_f) FROM perf_avg FORMAT Null</query>
-    <query>SELECT avgWeighted(num_f, num_u) FROM perf_avg FORMAT Null</query>
-    <query>SELECT avgWeighted(num_u, num_f) FROM perf_avg FORMAT Null</query>
-    <query>SELECT avgWeighted(num_u, num)   FROM perf_avg FORMAT Null</query>
-    <query>SELECT avgWeighted(num_u, num_u) FROM perf_avg FORMAT Null</query>
 
     <query>SELECT avgWeighted(num_f,                num_f)             FROM perf_avg FORMAT Null</query>
     <query>SELECT avgWeighted(toNullable(num_f),    num_f)             FROM perf_avg FORMAT Null</query>

From dfd606ef7e2f6f13f437fbdcef2292e4228fb22a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 16 Nov 2023 20:53:45 +0100
Subject: [PATCH 0680/1097] Update StorageReplicatedMergeTree.cpp

---
 src/Storages/StorageReplicatedMergeTree.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 7b731cf62d4..7770b495078 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2743,7 +2743,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
     /// we can possibly duplicate entries in queue of cloned replica.
     while (true)
     {
-        if (shutdown_called || partial_shutdown_called)
+        if (shutdown_called)
             throw Exception(ErrorCodes::ABORTED, "Cannot clone replica because shutdown called");
 
         Coordination::Stat log_pointer_stat;
@@ -3112,7 +3112,7 @@ void StorageReplicatedMergeTree::cloneMetadataIfNeeded(const String & source_rep
     String source_columns;
     while (true)
     {
-        if (shutdown_called || partial_shutdown_called)
+        if (shutdown_called)
             throw Exception(ErrorCodes::ABORTED, "Cannot clone metadata because shutdown called");
 
         Coordination::Stat metadata_stat;

From e533abef75f9eb304ceea09a74afb37672728420 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 16 Nov 2023 20:54:52 +0100
Subject: [PATCH 0681/1097] Resolve 01572_kill_window_function flakiness

---
 .../0_stateless/01572_kill_window_function.sh | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/01572_kill_window_function.sh b/tests/queries/0_stateless/01572_kill_window_function.sh
index 7103b7f7210..de6de3510a0 100755
--- a/tests/queries/0_stateless/01572_kill_window_function.sh
+++ b/tests/queries/0_stateless/01572_kill_window_function.sh
@@ -6,21 +6,20 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 set -e -o pipefail
 
+function wait_for_query_to_start()
+{
+    while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done
+}
+
 # Run a test query that takes very long to run.
 query_id="01572_kill_window_function-$CLICKHOUSE_DATABASE"
-$CLICKHOUSE_CLIENT --query_id="$query_id" --query "SELECT count(1048575) OVER (PARTITION BY intDiv(NULL, number) ORDER BY number DESC NULLS FIRST ROWS BETWEEN CURRENT ROW AND 1048575 FOLLOWING) FROM numbers(255, 1048575)" >/dev/null 2>&1 &
+$CLICKHOUSE_CLIENT --query_id="$query_id" --query "SELECT sum(number) OVER (PARTITION BY number % 10 ORDER BY number DESC NULLS FIRST ROWS BETWEEN CURRENT ROW AND 99999 FOLLOWING) FROM numbers(0, 10000000) format Null;" >/dev/null 2>&1 &
 client_pid=$!
 echo Started
 
-# Use one query to both kill the test query and verify that it has started,
-# because if we try to kill it before it starts, the test will fail.
-while [ -z "$($CLICKHOUSE_CLIENT --query "kill query where query_id = '$query_id' and current_database = currentDatabase()")" ]
-do
-    # If we don't yet see the query in the process list, the client should still
-    # be running. The query is very long.
-    kill -0 -- $client_pid
-    sleep 1
-done
+wait_for_query_to_start $query_id
+
+$CLICKHOUSE_CLIENT --query "kill query where query_id = '$query_id' and current_database = currentDatabase() format Null"
 echo Sent kill request
 
 # Wait for the client to terminate.

From ac3100747575e9f86493dfeb32a081734fac3a5c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 16 Nov 2023 21:00:39 +0100
Subject: [PATCH 0682/1097] Update DatabaseReplicated.cpp (#56796)

---
 src/Databases/DatabaseReplicated.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index a643eafdd14..5da20c42465 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1083,12 +1083,14 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
     }
     LOG_INFO(log, "All tables are created successfully");
 
-    if (max_log_ptr_at_creation != 0)
+    chassert(max_log_ptr_at_creation || our_log_ptr);
+    UInt32 first_entry_to_mark_finished = new_replica ? max_log_ptr_at_creation : our_log_ptr;
+    if (first_entry_to_mark_finished)
     {
         /// If the replica is new and some of the queries applied during recovery
         /// where issued after the replica was created, then other nodes might be
         /// waiting for this node to notify them that the query was applied.
-        for (UInt32 ptr = max_log_ptr_at_creation; ptr <= max_log_ptr; ++ptr)
+        for (UInt32 ptr = first_entry_to_mark_finished; ptr <= max_log_ptr; ++ptr)
         {
             auto entry_name = DDLTaskBase::getLogEntryName(ptr);
             auto path = fs::path(zookeeper_path) / "log" / entry_name / "finished" / getFullReplicaName();

From 3765a1c77b586455565940b217d2a40f3d49333d Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Thu, 16 Nov 2023 16:07:49 -0400
Subject: [PATCH 0683/1097] Update random-functions.md

---
 docs/en/sql-reference/functions/random-functions.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md
index 13c29329f41..6fd31e8d25c 100644
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@@ -6,9 +6,9 @@ sidebar_label: Random Numbers
 
 # Functions for Generating Random Numbers
 
-All functions in this section accept zero or one arguments. The only use of the argument (if provided) is to prevent prevent [common subexpression
-elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) such that two different execution of the same random
-function in a query return different random values.
+All functions in this section accept zero or one arguments. The only use of the argument (if provided) is to prevent [common subexpression
+elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) such that two different executions within a row of the same random
+function return different random values.
 
 Related content
 - Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse)

From 1abcb28624ee4d204e8e1723ad58e87d1d80be56 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 16 Nov 2023 23:32:17 +0100
Subject: [PATCH 0684/1097] Remove ctest

---
 CMakeLists.txt        |  1 -
 cmake/add_check.cmake | 19 -------------------
 src/CMakeLists.txt    |  4 ----
 tests/CMakeLists.txt  | 26 --------------------------
 4 files changed, 50 deletions(-)
 delete mode 100644 cmake/add_check.cmake
 delete mode 100644 tests/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9f05b2b78ce..4fe7a1e05e7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -561,7 +561,6 @@ option(CHECK_LARGE_OBJECT_SIZES "Check that there are no large object files afte
 add_subdirectory (base)
 add_subdirectory (src)
 add_subdirectory (programs)
-add_subdirectory (tests)
 add_subdirectory (utils)
 
 if (FUZZER)
diff --git a/cmake/add_check.cmake b/cmake/add_check.cmake
deleted file mode 100644
index ba30ee8676f..00000000000
--- a/cmake/add_check.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-# Adding test output on failure
-enable_testing ()
-
-if (NOT TARGET check)
-    if (CMAKE_CONFIGURATION_TYPES)
-        add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
-            --force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
-            WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
-    else ()
-        add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
-            --force-new-ctest-process --output-on-failure
-            WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
-    endif ()
-endif ()
-
-macro (add_check target)
-    add_test (NAME test_${target} COMMAND ${target} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
-    add_dependencies (check ${target})
-endmacro (add_check)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5254743e154..3733295e9b4 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -600,8 +600,6 @@ if (TARGET ch_rust::skim)
     dbms_target_link_libraries(PUBLIC ch_rust::skim)
 endif()
 
-include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
-
 if (ENABLE_TESTS)
     macro (grep_gtest_sources BASE_DIR DST_VAR)
         # Cold match files that are not in tests/ directories
@@ -645,6 +643,4 @@ if (ENABLE_TESTS)
     if (TARGET ch_contrib::parquet)
         target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::parquet)
     endif()
-
-    add_check(unit_tests_dbms)
 endif ()
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
deleted file mode 100644
index 22c89aaafa7..00000000000
--- a/tests/CMakeLists.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-enable_testing()
-
-# Run tests with "ninja check" or "make check"
-if (TARGET check)
-    message (STATUS "Target check already exists")
-else ()
-    include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake)
-endif ()
-
-option (ENABLE_CLICKHOUSE_TEST "Install clickhouse-test script and relevant tests scenarios" OFF)
-
-if (ENABLE_CLICKHOUSE_TEST)
-    install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-    install (
-        DIRECTORY queries performance config
-        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse-test
-        USE_SOURCE_PERMISSIONS
-        COMPONENT clickhouse
-        PATTERN "CMakeLists.txt" EXCLUDE
-        PATTERN ".gitignore" EXCLUDE
-    )
-endif ()
-
-if (ENABLE_TEST_INTEGRATION)
-    add_subdirectory (integration)
-endif ()

From c72136b123685f6e4704b9d39122f637ee95ce0c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 08:07:32 +0100
Subject: [PATCH 0685/1097] Simpler CMake

---
 contrib/arrow-cmake/CMakeLists.txt  | 16 ++++++++--------
 contrib/azure-cmake/CMakeLists.txt  |  3 +--
 contrib/thrift-cmake/CMakeLists.txt |  6 ------
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index 71133451889..935fc886330 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -77,16 +77,16 @@ set(FLATBUFFERS_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/flatbuffers")
 set(FLATBUFFERS_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/flatbuffers")
 set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_SRC_DIR}/include")
 
-# set flatbuffers CMake options
-set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library")
-set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library")
-set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests")
+set(FLATBUFFERS_SRCS
+  ${FLATBUFFERS_SRC_DIR}/src/idl_parser.cpp
+  ${FLATBUFFERS_SRC_DIR}/src/idl_gen_text.cpp
+  ${FLATBUFFERS_SRC_DIR}/src/reflection.cpp
+  ${FLATBUFFERS_SRC_DIR}/src/util.cpp)
 
-add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")
+add_library(_flatbuffers STATIC ${FLATBUFFERS_SRCS})
+target_include_directories(_flatbuffers PUBLIC ${FLATBUFFERS_INCLUDE_DIR})
+target_compile_definitions(_flatbuffers PRIVATE -DFLATBUFFERS_LOCALE_INDEPENDENT=0)
 
-add_library(_flatbuffers INTERFACE)
-target_link_libraries(_flatbuffers INTERFACE flatbuffers)
-target_include_directories(_flatbuffers INTERFACE ${FLATBUFFERS_INCLUDE_DIR})
 
 # ===  hdfs
 # NOTE: cannot use ch_contrib::hdfs since it's INCLUDE_DIRECTORIES does not includes trailing "hdfs/"
diff --git a/contrib/azure-cmake/CMakeLists.txt b/contrib/azure-cmake/CMakeLists.txt
index 7aba81259d3..bb44c993e79 100644
--- a/contrib/azure-cmake/CMakeLists.txt
+++ b/contrib/azure-cmake/CMakeLists.txt
@@ -48,9 +48,8 @@ set(AZURE_SDK_INCLUDES
     "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/inc/"
 )
 
-include("${AZURE_DIR}/cmake-modules/AzureTransportAdapters.cmake")
-
 add_library(_azure_sdk ${AZURE_SDK_UNIFIED_SRC})
+target_compile_definitions(_azure_sdk PRIVATE BUILD_CURL_HTTP_TRANSPORT_ADAPTER)
 
 # Originally, on Windows azure-core is built with bcrypt and crypt32 by default
 if (TARGET OpenSSL::SSL)
diff --git a/contrib/thrift-cmake/CMakeLists.txt b/contrib/thrift-cmake/CMakeLists.txt
index d6aa6b9e5f2..89a444cfb83 100644
--- a/contrib/thrift-cmake/CMakeLists.txt
+++ b/contrib/thrift-cmake/CMakeLists.txt
@@ -47,8 +47,6 @@ set(thriftcpp_threads_SOURCES
         "${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp"
         )
 
-include("${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake") # makes config.h
-
 set (HAVE_ARPA_INET_H 1)
 set (HAVE_FCNTL_H 1)
 set (HAVE_GETOPT_H 1)
@@ -81,10 +79,6 @@ if (OS_LINUX AND NOT USE_MUSL)
     set (STRERROR_R_CHAR_P 1)
 endif ()
 
-#set(PACKAGE ${PACKAGE_NAME})
-#set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
-#set(VERSION ${thrift_VERSION})
-
 # generate a config.h file
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build/cmake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/thrift/config.h")
 

From 5496e2d6ac483beeccca03a2ed92b586e33f28dd Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Fri, 17 Nov 2023 10:24:47 +0300
Subject: [PATCH 0686/1097] test for #56790

---
 .../00059_shard_global_in_mergetree.reference |  8 +++++++
 .../00059_shard_global_in_mergetree.sql       | 24 +++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 tests/queries/0_stateless/00059_shard_global_in_mergetree.reference
 create mode 100644 tests/queries/0_stateless/00059_shard_global_in_mergetree.sql

diff --git a/tests/queries/0_stateless/00059_shard_global_in_mergetree.reference b/tests/queries/0_stateless/00059_shard_global_in_mergetree.reference
new file mode 100644
index 00000000000..829419dc759
--- /dev/null
+++ b/tests/queries/0_stateless/00059_shard_global_in_mergetree.reference
@@ -0,0 +1,8 @@
+20
+20
+20
+20
+20
+20
+20
+20
diff --git a/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql b/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql
new file mode 100644
index 00000000000..b85560d2bea
--- /dev/null
+++ b/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql
@@ -0,0 +1,24 @@
+-- Tags: shard
+
+-- test for #56790
+
+CREATE TABLE test_local (x Int64)
+ENGINE = MergeTree order by x as select * from numbers(10);
+  
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local);
+
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local);
+
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where 'XXX' global in (select 'XXX');
+
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * global in (select * from test_local);
+
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * in (select * from test_local);
+
+set prefer_localhost_replica=0;
+
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where 'XXX' global in (select 'XXX');
+
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * global in (select * from test_local);
+
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * in (select * from test_local);

From c4f46c7ce575fbfcb52c69343534b9539b3f719d Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Fri, 17 Nov 2023 10:27:02 +0300
Subject: [PATCH 0687/1097] test for #56790

---
 .../0_stateless/00059_shard_global_in_mergetree.reference       | 1 -
 tests/queries/0_stateless/00059_shard_global_in_mergetree.sql   | 2 --
 2 files changed, 3 deletions(-)

diff --git a/tests/queries/0_stateless/00059_shard_global_in_mergetree.reference b/tests/queries/0_stateless/00059_shard_global_in_mergetree.reference
index 829419dc759..208e649c056 100644
--- a/tests/queries/0_stateless/00059_shard_global_in_mergetree.reference
+++ b/tests/queries/0_stateless/00059_shard_global_in_mergetree.reference
@@ -5,4 +5,3 @@
 20
 20
 20
-20
diff --git a/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql b/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql
index b85560d2bea..cbd4245a486 100644
--- a/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql
+++ b/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql
@@ -7,8 +7,6 @@ ENGINE = MergeTree order by x as select * from numbers(10);
   
 select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local);
 
-select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local);
-
 select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where 'XXX' global in (select 'XXX');
 
 select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * global in (select * from test_local);

From 224b282d947daf5275e9a38d1c62e8887eb44868 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 08:27:19 +0100
Subject: [PATCH 0688/1097] Remove garbage

---
 contrib/cassandra-cmake/CMakeLists.txt | 4 ----
 contrib/qpl-cmake/CMakeLists.txt       | 3 +--
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt
index 32611e0e151..9e729c436d5 100644
--- a/contrib/cassandra-cmake/CMakeLists.txt
+++ b/contrib/cassandra-cmake/CMakeLists.txt
@@ -83,10 +83,6 @@ set(HAVE_MEMCPY 1)
 set(HAVE_LONG_LONG 1)
 set(HAVE_UINT16_T 1)
 
-configure_file("${CASS_SRC_DIR}/third_party/sparsehash/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/sparsehash/internal/sparseconfig.h")
-
-
-
 # Determine random availability
 if (OS_LINUX)
   #set (HAVE_GETRANDOM 1) - not on every Linux kernel
diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt
index 4e6c66fe731..19501209b26 100644
--- a/contrib/qpl-cmake/CMakeLists.txt
+++ b/contrib/qpl-cmake/CMakeLists.txt
@@ -16,8 +16,7 @@ function(GetLibraryVersion _content _outputVar)
     SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE)
 endfunction()
 
-FILE(READ "${QPL_PROJECT_DIR}/CMakeLists.txt" HEADER_CONTENT)
-GetLibraryVersion("${HEADER_CONTENT}" QPL_VERSION)
+set (QPL_VERSION 1.2.0)
 
 message(STATUS "Intel QPL version: ${QPL_VERSION}")
 

From df3c066591758813c99422bab11f0811e1d487d3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 08:42:39 +0100
Subject: [PATCH 0689/1097] Remove more trash

---
 contrib/arrow-cmake/CMakeLists.txt     | 1 -
 contrib/cassandra-cmake/CMakeLists.txt | 7 +++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index 935fc886330..96d1f4adda7 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -127,7 +127,6 @@ set(ORC_SRCS
         "${ORC_SOURCE_SRC_DIR}/BpackingDefault.hh"
         "${ORC_SOURCE_SRC_DIR}/ByteRLE.cc"
         "${ORC_SOURCE_SRC_DIR}/ByteRLE.hh"
-        "${ORC_SOURCE_SRC_DIR}/CMakeLists.txt"
         "${ORC_SOURCE_SRC_DIR}/ColumnPrinter.cc"
         "${ORC_SOURCE_SRC_DIR}/ColumnReader.cc"
         "${ORC_SOURCE_SRC_DIR}/ColumnReader.hh"
diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt
index 9e729c436d5..0082364c130 100644
--- a/contrib/cassandra-cmake/CMakeLists.txt
+++ b/contrib/cassandra-cmake/CMakeLists.txt
@@ -68,8 +68,7 @@ list(APPEND INCLUDE_DIRS
   ${CASS_SRC_DIR}/third_party/hdr_histogram
   ${CASS_SRC_DIR}/third_party/http-parser
   ${CASS_SRC_DIR}/third_party/mt19937_64
-  ${CASS_SRC_DIR}/third_party/rapidjson/rapidjson
-  ${CASS_SRC_DIR}/third_party/sparsehash/src)
+  ${CASS_SRC_DIR}/third_party/rapidjson/rapidjson)
 
 list(APPEND INCLUDE_DIRS ${CASS_INCLUDE_DIR} ${CASS_SRC_DIR})
 
@@ -112,17 +111,17 @@ configure_file(
   ${CASS_ROOT_DIR}/driver_config.hpp.in
   ${CMAKE_CURRENT_BINARY_DIR}/driver_config.hpp)
 
-
 add_library(_cassandra
     ${SOURCES}
     $<TARGET_OBJECTS:_curl_hostcheck>
     $<TARGET_OBJECTS:_hdr_histogram>
     $<TARGET_OBJECTS:_http-parser>)
 
-target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip)
+target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip ch_contrib::sparsehash)
 target_include_directories(_cassandra PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${INCLUDE_DIRS})
 target_include_directories(_cassandra SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})
 target_compile_definitions(_cassandra PRIVATE CASS_BUILDING)
+target_compile_definitions(_cassandra PRIVATE -DSPARSEHASH_HASH=std::hash -Dsparsehash=google)
 
 target_link_libraries(_cassandra ch_contrib::uv)
 

From f73b3e10ac0c4094cbc907cad2533ea73ae916b6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 08:46:45 +0100
Subject: [PATCH 0690/1097] Ensure no new dependencies

---
 docker/packager/binary/build.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 6b6374d08c9..ad31397c8d9 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -22,6 +22,7 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then
   fi
 fi
 
+
 # Uncomment to debug ccache. Don't put ccache log in /output right away, or it
 # will be confusingly packed into the "performance" package.
 # export CCACHE_LOGFILE=/build/ccache.log
@@ -32,6 +33,16 @@ mkdir -p /build/build_docker
 cd /build/build_docker
 rm -f CMakeCache.txt
 
+
+# We don't want to depend on any third-party CMake files.
+# To check it, find and delete them.
+
+grep -o -P '"contrib/[^"]+"' ../.gitmodules |
+  grep -v -P 'llvm-project|abseil-cpp|qpl|grpc|corrosion' |
+  xargs -I@ find @ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' |
+  xargs rm
+
+
 if [ -n "$MAKE_DEB" ]; then
   rm -rf /build/packages/root
   # NOTE: this is for backward compatibility with previous releases,

From 163b8a814e4bd0247819c4a0c826cd28f1554bc0 Mon Sep 17 00:00:00 2001
From: Aleksei Filatov <alexfvk@yandex-team.ru>
Date: Thu, 16 Nov 2023 19:44:24 +0300
Subject: [PATCH 0691/1097] Fix clang-tidy issues and disable parallel
 execution of tests

---
 programs/disks/CommandCopy.cpp                     | 2 +-
 src/Disks/DiskEncrypted.cpp                        | 2 +-
 src/Disks/DiskEncrypted.h                          | 2 +-
 src/Disks/DiskLocal.h                              | 2 +-
 src/Disks/IDisk.cpp                                | 4 ++--
 src/Disks/IDisk.h                                  | 2 +-
 src/Storages/MergeTree/DataPartStorageOnDiskBase.h | 2 +-
 src/Storages/MergeTree/IDataPartStorage.h          | 2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.h        | 2 +-
 src/Storages/MergeTree/localBackup.cpp             | 2 +-
 tests/integration/parallel_skip.json               | 6 +++++-
 tests/integration/test_ttl_move/test.py            | 5 ++++-
 12 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp
index 296fc708411..421e4038d12 100644
--- a/programs/disks/CommandCopy.cpp
+++ b/programs/disks/CommandCopy.cpp
@@ -57,7 +57,7 @@ public:
         String relative_path_from = validatePathAndGetAsRelative(path_from);
         String relative_path_to = validatePathAndGetAsRelative(path_to);
 
-        disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {});
+        disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {});
     }
 };
 }
diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp
index 5ec17701667..ac81899156a 100644
--- a/src/Disks/DiskEncrypted.cpp
+++ b/src/Disks/DiskEncrypted.cpp
@@ -353,7 +353,7 @@ void DiskEncrypted::copyDirectoryContent(
     }
 
     /// Copy the file through buffers with deciphering.
-    IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, read_settings, write_settings);
+    IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, read_settings, write_settings, cancellation_hook);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h
index 6574d3bf456..9c497ba4cdc 100644
--- a/src/Disks/DiskEncrypted.h
+++ b/src/Disks/DiskEncrypted.h
@@ -118,7 +118,7 @@ public:
         const String & to_dir,
         const ReadSettings & read_settings,
         const WriteSettings & write_settings,
-        const std::function<void()> & cancellation_hook = {}) override;
+        const std::function<void()> & cancellation_hook) override;
 
     std::unique_ptr<ReadBufferFromFileBase> readFile(
         const String & path,
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index 767718ecb24..affce5a847e 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -71,7 +71,7 @@ public:
         const String & to_dir,
         const ReadSettings & read_settings,
         const WriteSettings & write_settings,
-        const std::function<void()> & cancellation_hook = {}) override;
+        const std::function<void()> & cancellation_hook) override;
 
     void listFiles(const String & path, std::vector<String> & file_names) const override;
 
diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp
index cd5f1f4ef69..5426f8d0904 100644
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@@ -24,7 +24,7 @@ bool IDisk::isDirectoryEmpty(const String & path) const
     return !iterateDirectory(path)->isValid();
 }
 
-void IDisk::copyFile(
+void IDisk::copyFile( /// NOLINT
     const String & from_file_path,
     IDisk & to_disk,
     const String & to_file_path,
@@ -160,7 +160,7 @@ void IDisk::copyThroughBuffers(
     for (auto & result : results)
         result.wait();
     for (auto & result : results)
-        result.get(); /// May rethrow an exception
+        result.get();  /// May rethrow an exception
 }
 
 
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index b2d42f2e732..b2f40d7f36d 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -199,7 +199,7 @@ public:
         const String & to_dir,
         const ReadSettings & read_settings,
         const WriteSettings & write_settings,
-        const std::function<void()> & cancellation_hook = {});
+        const std::function<void()> & cancellation_hook);
 
     /// Copy file `from_file_path` to `to_file_path` located at `to_disk`.
     virtual void copyFile( /// NOLINT
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
index 5792e163856..339acce5953 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
@@ -75,7 +75,7 @@ public:
         const ReadSettings & read_settings,
         const WriteSettings & write_settings,
         Poco::Logger * log,
-        const std::function<void()> & cancellation_hook = {}
+        const std::function<void()> & cancellation_hook
         ) const override;
 
     void rename(
diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h
index e91a35974e3..59251e40626 100644
--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@@ -263,7 +263,7 @@ public:
         const ReadSettings & read_settings,
         const WriteSettings & write_settings,
         Poco::Logger * log,
-        const std::function<void()> & cancellation_hook = {}
+        const std::function<void()> & cancellation_hook
         ) const = 0;
 
     /// Change part's root. from_root should be a prefix path of current root path.
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 752a6de39cf..8a694fb9857 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -385,7 +385,7 @@ public:
         const String & directory_name,
         const ReadSettings & read_settings,
         const WriteSettings & write_settings,
-        const std::function<void()> & cancellation_hook = {}) const;
+        const std::function<void()> & cancellation_hook) const;
 
     /// Checks that .bin and .mrk files exist.
     ///
diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/localBackup.cpp
index c84e13b167f..0698848fa70 100644
--- a/src/Storages/MergeTree/localBackup.cpp
+++ b/src/Storages/MergeTree/localBackup.cpp
@@ -170,7 +170,7 @@ void localBackup(
             else if (copy_instead_of_hardlinks)
             {
                 CleanupOnFail cleanup([disk, destination_path]() { disk->removeRecursive(destination_path); });
-                disk->copyDirectoryContent(source_path, disk, destination_path, read_settings, write_settings);
+                disk->copyDirectoryContent(source_path, disk, destination_path, read_settings, write_settings, /*cancellation_hook=*/{});
                 cleanup.success();
             }
             else
diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json
index d056225fee4..33dd85aceaf 100644
--- a/tests/integration/parallel_skip.json
+++ b/tests/integration/parallel_skip.json
@@ -92,5 +92,9 @@
   "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_grpc",
   "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp_and_others",
   "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_setting_in_query",
-  "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_client_suggestions_load"
+  "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_client_suggestions_load",
+
+  "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_stop_moves_query",
+  "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_table_detach",
+  "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_zookeeper_disconnect"
 ]
diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py
index 7640ff8f737..2f18a0a1afa 100644
--- a/tests/integration/test_ttl_move/test.py
+++ b/tests/integration/test_ttl_move/test.py
@@ -1925,5 +1925,8 @@ class TestCancelBackgroundMoving:
             pm.drop_instance_zk_connections(node1)
             # Wait for background moving task to be cancelled
             assert_logs_contain_with_retry(
-                node1, "MergeTreeBackgroundExecutor.*Cancelled moving parts"
+                node1,
+                "MergeTreeBackgroundExecutor.*Cancelled moving parts",
+                retry_count=30,
+                sleep_time=1,
             )

From c7d8465897e6d51a71d9c28bec51ba676fb70fa3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 09:12:49 +0100
Subject: [PATCH 0692/1097] Ensure no new dependencies

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index ad31397c8d9..d469b359d1a 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -39,7 +39,7 @@ rm -f CMakeCache.txt
 
 grep -o -P '"contrib/[^"]+"' ../.gitmodules |
   grep -v -P 'llvm-project|abseil-cpp|qpl|grpc|corrosion' |
-  xargs -I@ find @ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' |
+  xargs -I@ find ../@ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' |
   xargs rm
 
 
From f456ac97fe7c411f897a432e80055e7ed3599ad2 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Fri, 17 Nov 2023 04:19:22 -0400
Subject: [PATCH 0693/1097] fix currentdatabase issue

---
 .../00059_shard_global_in_mergetree.sql           | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql b/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql
index cbd4245a486..62eec6f324b 100644
--- a/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql
+++ b/tests/queries/0_stateless/00059_shard_global_in_mergetree.sql
@@ -2,21 +2,24 @@
 
 -- test for #56790
 
-CREATE TABLE test_local (x Int64)
-ENGINE = MergeTree order by x as select * from numbers(10);
+DROP TABLE IF EXISTS test_local;
+
+CREATE TABLE test_local (x Int64) ENGINE = MergeTree order by x as select * from numbers(10);
   
 select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local);
 
 select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where 'XXX' global in (select 'XXX');
 
-select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * global in (select * from test_local);
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * global in (select * from numbers(10));
 
-select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * in (select * from test_local);
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * in (select * from numbers(10));
 
 set prefer_localhost_replica=0;
 
 select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where 'XXX' global in (select 'XXX');
 
-select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * global in (select * from test_local);
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * global in (select * from numbers(10));
 
-select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * in (select * from test_local);
+select count() from remote('127.0.0.1,127.0.0.2', currentDatabase(), test_local) where * in (select * from numbers(10));
+
+DROP TABLE test_local;

From 864dd32b05c2eb40baf545279b15cc7aacc5937a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 09:48:42 +0100
Subject: [PATCH 0694/1097] Remove garbage

---
 .../client_scripts/allin1_ssb.sh              | 530 ------------------
 .../client_scripts/client_stressing_test.py   | 278 ---------
 .../client_scripts/queries_ssb.sql            |  10 -
 .../client_scripts/run_ssb.sh                 |   6 -
 .../database_dir/deflate/config_deflate.xml   |  49 --
 .../deflate_s2/config_deflate_s2.xml          |  49 --
 .../database_dir/lz4/config_lz4.xml           |  49 --
 .../database_dir/lz4_s2/config_lz4_s2.xml     |  49 --
 .../database_dir/zstd/config_zstd.xml         |  49 --
 .../database_dir/zstd_s2/config_zstd_s2.xml   |  49 --
 10 files changed, 1118 deletions(-)
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml
 delete mode 100644 contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml

diff --git a/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh b/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh
deleted file mode 100644
index 31017b565b6..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh
+++ /dev/null
@@ -1,530 +0,0 @@
-#!/bin/bash
-ckhost="localhost"
-ckport=("9000" "9001" "9002" "9003")
-WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
-OUTPUT_DIR="${WORKING_DIR}/output"
-LOG_DIR="${OUTPUT_DIR}/log"
-RAWDATA_DIR="${WORKING_DIR}/rawdata_dir"
-database_dir="${WORKING_DIR}/database_dir"
-CLIENT_SCRIPTS_DIR="${WORKING_DIR}/client_scripts"
-LOG_PACK_FILE="$(date +%Y-%m-%d-%H-%M-%S)"
-QUERY_FILE="queries_ssb.sql"
-SERVER_BIND_CMD[0]="numactl -m 0 -N 0"
-SERVER_BIND_CMD[1]="numactl -m 0 -N 0"
-SERVER_BIND_CMD[2]="numactl -m 1 -N 1"
-SERVER_BIND_CMD[3]="numactl -m 1 -N 1"
-CLIENT_BIND_CMD=""
-SSB_GEN_FACTOR=20
-TABLE_NAME="lineorder_flat"
-TALBE_ROWS="119994608"
-CODEC_CONFIG="lz4 deflate zstd"
-
-# define instance number
-inst_num=$1
-if [ ! -n "$1" ]; then
-        echo "Please clarify instance number from 1,2,3 or 4"
-        exit 1
-else
-        echo "Benchmarking with instance number:$1"
-fi
-
-if [ ! -d "$OUTPUT_DIR" ]; then
-mkdir $OUTPUT_DIR
-fi
-if [ ! -d "$LOG_DIR" ]; then
-mkdir $LOG_DIR
-fi
-if [ ! -d "$RAWDATA_DIR" ]; then
-mkdir $RAWDATA_DIR
-fi
-
-# define different directories
-dir_server=("" "_s2" "_s3" "_s4")
-ckreadSql="
-    CREATE TABLE customer
-    (
-            C_CUSTKEY       UInt32,
-            C_NAME          String,
-            C_ADDRESS       String,
-            C_CITY          LowCardinality(String),
-            C_NATION        LowCardinality(String),
-            C_REGION        LowCardinality(String),
-            C_PHONE         String,
-            C_MKTSEGMENT    LowCardinality(String)
-    )
-    ENGINE = MergeTree ORDER BY (C_CUSTKEY);
-
-    CREATE TABLE lineorder
-    (
-        LO_ORDERKEY             UInt32,
-        LO_LINENUMBER           UInt8,
-        LO_CUSTKEY              UInt32,
-        LO_PARTKEY              UInt32,
-        LO_SUPPKEY              UInt32,
-        LO_ORDERDATE            Date,
-        LO_ORDERPRIORITY        LowCardinality(String),
-        LO_SHIPPRIORITY         UInt8,
-        LO_QUANTITY             UInt8,
-        LO_EXTENDEDPRICE        UInt32,
-        LO_ORDTOTALPRICE        UInt32,
-        LO_DISCOUNT             UInt8,
-        LO_REVENUE              UInt32,
-        LO_SUPPLYCOST           UInt32,
-        LO_TAX                  UInt8,
-        LO_COMMITDATE           Date,
-        LO_SHIPMODE             LowCardinality(String)
-    )
-    ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
-
-    CREATE TABLE part
-    (
-            P_PARTKEY       UInt32,
-            P_NAME          String,
-            P_MFGR          LowCardinality(String),
-            P_CATEGORY      LowCardinality(String),
-            P_BRAND         LowCardinality(String),
-            P_COLOR         LowCardinality(String),
-            P_TYPE          LowCardinality(String),
-            P_SIZE          UInt8,
-            P_CONTAINER     LowCardinality(String)
-    )
-    ENGINE = MergeTree ORDER BY P_PARTKEY;
-
-    CREATE TABLE supplier
-    (
-            S_SUPPKEY       UInt32,
-            S_NAME          String,
-            S_ADDRESS       String,
-            S_CITY          LowCardinality(String),
-            S_NATION        LowCardinality(String),
-            S_REGION        LowCardinality(String),
-            S_PHONE         String
-    )
-    ENGINE = MergeTree ORDER BY S_SUPPKEY;
-"
-supplier_table="
-   CREATE TABLE supplier
-    (
-            S_SUPPKEY       UInt32,
-            S_NAME          String,
-            S_ADDRESS       String,
-            S_CITY          LowCardinality(String),
-            S_NATION        LowCardinality(String),
-            S_REGION        LowCardinality(String),
-            S_PHONE         String
-    )
-    ENGINE = MergeTree ORDER BY S_SUPPKEY;
-"
-part_table="
-    CREATE TABLE part
-    (
-            P_PARTKEY       UInt32,
-            P_NAME          String,
-            P_MFGR          LowCardinality(String),
-            P_CATEGORY      LowCardinality(String),
-            P_BRAND         LowCardinality(String),
-            P_COLOR         LowCardinality(String),
-            P_TYPE          LowCardinality(String),
-            P_SIZE          UInt8,
-            P_CONTAINER     LowCardinality(String)
-    )
-    ENGINE = MergeTree ORDER BY P_PARTKEY;
-"
-lineorder_table="
-    CREATE TABLE lineorder
-    (
-        LO_ORDERKEY             UInt32,
-        LO_LINENUMBER           UInt8,
-        LO_CUSTKEY              UInt32,
-        LO_PARTKEY              UInt32,
-        LO_SUPPKEY              UInt32,
-        LO_ORDERDATE            Date,
-        LO_ORDERPRIORITY        LowCardinality(String),
-        LO_SHIPPRIORITY         UInt8,
-        LO_QUANTITY             UInt8,
-        LO_EXTENDEDPRICE        UInt32,
-        LO_ORDTOTALPRICE        UInt32,
-        LO_DISCOUNT             UInt8,
-        LO_REVENUE              UInt32,
-        LO_SUPPLYCOST           UInt32,
-        LO_TAX                  UInt8,
-        LO_COMMITDATE           Date,
-        LO_SHIPMODE             LowCardinality(String)
-    )
-    ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
-"
-customer_table="
-    CREATE TABLE customer
-    (
-            C_CUSTKEY       UInt32,
-            C_NAME          String,
-            C_ADDRESS       String,
-            C_CITY          LowCardinality(String),
-            C_NATION        LowCardinality(String),
-            C_REGION        LowCardinality(String),
-            C_PHONE         String,
-            C_MKTSEGMENT    LowCardinality(String)
-    )
-    ENGINE = MergeTree ORDER BY (C_CUSTKEY);
-"
-
-lineorder_flat_table="
-    SET max_memory_usage = 20000000000;
-    CREATE TABLE lineorder_flat
-    ENGINE = MergeTree
-    PARTITION BY toYear(LO_ORDERDATE)
-    ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
-    SELECT
-        l.LO_ORDERKEY AS LO_ORDERKEY,
-        l.LO_LINENUMBER AS LO_LINENUMBER,
-        l.LO_CUSTKEY AS LO_CUSTKEY,
-        l.LO_PARTKEY AS LO_PARTKEY,
-        l.LO_SUPPKEY AS LO_SUPPKEY,
-        l.LO_ORDERDATE AS LO_ORDERDATE,
-        l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
-        l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
-        l.LO_QUANTITY AS LO_QUANTITY,
-        l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
-        l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
-        l.LO_DISCOUNT AS LO_DISCOUNT,
-        l.LO_REVENUE AS LO_REVENUE,
-        l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
-        l.LO_TAX AS LO_TAX,
-        l.LO_COMMITDATE AS LO_COMMITDATE,
-        l.LO_SHIPMODE AS LO_SHIPMODE,
-        c.C_NAME AS C_NAME,
-        c.C_ADDRESS AS C_ADDRESS,
-        c.C_CITY AS C_CITY,
-        c.C_NATION AS C_NATION,
-        c.C_REGION AS C_REGION,
-        c.C_PHONE AS C_PHONE,
-        c.C_MKTSEGMENT AS C_MKTSEGMENT,
-        s.S_NAME AS S_NAME,
-        s.S_ADDRESS AS S_ADDRESS,
-        s.S_CITY AS S_CITY,
-        s.S_NATION AS S_NATION,
-        s.S_REGION AS S_REGION,
-        s.S_PHONE AS S_PHONE,
-        p.P_NAME AS P_NAME,
-        p.P_MFGR AS P_MFGR,
-        p.P_CATEGORY AS P_CATEGORY,
-        p.P_BRAND AS P_BRAND,
-        p.P_COLOR AS P_COLOR,
-        p.P_TYPE AS P_TYPE,
-        p.P_SIZE AS P_SIZE,
-        p.P_CONTAINER AS P_CONTAINER
-    FROM lineorder AS l
-    INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
-    INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
-    INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
-    show settings ilike 'max_memory_usage';
-"
- 
-function insert_data(){
-        echo "insert_data:$1"
-        create_table_prefix="clickhouse client --host ${ckhost} --port $2 --multiquery -q"
-        insert_data_prefix="clickhouse client --query "
-        case $1 in
-          all)
-                clickhouse client --host ${ckhost} --port $2 --multiquery -q"$ckreadSql" && {
-                ${insert_data_prefix} "INSERT INTO customer FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/customer.tbl --port=$2
-                ${insert_data_prefix} "INSERT INTO part FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/part.tbl --port=$2
-                ${insert_data_prefix} "INSERT INTO supplier FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl --port=$2
-                ${insert_data_prefix} "INSERT INTO lineorder FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl --port=$2
-                }
-                ${create_table_prefix}"${lineorder_flat_table}" 
-          ;;
-          customer)
-                echo ${create_table_prefix}\"${customer_table}\"
-                ${create_table_prefix}"${customer_table}" && {
-                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
-                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
-                }
-          ;;
-          part)
-                echo ${create_table_prefix}\"${part_table}\"
-                ${create_table_prefix}"${part_table}" && {
-                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
-                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
-                }
-          ;;
-          supplier)
-                echo ${create_table_prefix}"${supplier_table}"
-                ${create_table_prefix}"${supplier_table}" && {
-                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
-                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
-                }
-          ;;
-          lineorder)
-                echo ${create_table_prefix}"${lineorder_table}"
-                ${create_table_prefix}"${lineorder_table}" && {
-                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
-                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
-                }
-          ;;
-          lineorder_flat)
-                echo ${create_table_prefix}"${lineorder_flat_table}"
-                ${create_table_prefix}"${lineorder_flat_table}" 
-                return 0
-          ;;
-          *)
-                exit 0
-                ;;
-
-        esac
-}
-
-function check_sql(){
-        select_sql="select * from "$1" limit 1"
-        clickhouse client --host ${ckhost} --port $2 --multiquery -q"${select_sql}"
-}
-
-function check_table(){
-        checknum=0
-        source_tables="customer part supplier lineorder lineorder_flat"
-        test_tables=${1:-${source_tables}}
-        echo "Checking table data required in server..."
-        for i in $(seq 0 $[inst_num-1])
-        do
-                for j in `echo ${test_tables}`
-                do
-                        check_sql $j ${ckport[i]} &> /dev/null || {
-                                let checknum+=1 && insert_data "$j" ${ckport[i]}
-                        }
-                done
-        done
-
-        for i in $(seq 0 $[inst_num-1])
-        do
-                echo "clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q\"select count() from ${TABLE_NAME};\""
-                var=$(clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"select count() from ${TABLE_NAME};")
-                if [ $var -eq $TALBE_ROWS ];then
-                        echo "Instance_${i} Table data integrity check OK -> Rows:$var"
-                else
-                        echo  "Instance_${i} Table data integrity check Failed -> Rows:$var"
-                        exit 1
-                fi
-        done
-        if [ $checknum -gt 0 ];then
-                echo "Need sleep 10s after first table data insertion...$checknum"
-                sleep 10
-        fi
-}
-
-function check_instance(){
-instance_alive=0
-for i in {1..10}
-do
-        sleep 1
-        netstat -nltp | grep ${1} > /dev/null
-        if [ $? -ne 1 ];then
-                instance_alive=1
-                break
-        fi
-        
-done
-
-if [ $instance_alive -eq 0 ];then
-        echo "check_instance -> clickhouse server instance faild to launch due to 10s timeout!"
-        exit 1
-else
-        echo "check_instance -> clickhouse server instance launch successfully!"
-fi
-}
-
-function start_clickhouse_for_insertion(){
-        echo "start_clickhouse_for_insertion"
-        for i in $(seq 0 $[inst_num-1])
-	do                
-                echo "cd ${database_dir}/$1${dir_server[i]}"
-                echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null"
-                
-	        cd ${database_dir}/$1${dir_server[i]}
-	        ${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null
-                check_instance ${ckport[i]}
-        done
-}
-
-function start_clickhouse_for_stressing(){
-        echo "start_clickhouse_for_stressing"
-        for i in $(seq 0 $[inst_num-1])
-	do
-                echo "cd ${database_dir}/$1${dir_server[i]}"
-                echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&"
-                
-	        cd ${database_dir}/$1${dir_server[i]}
-	        ${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&
-                check_instance ${ckport[i]}
-        done
-}
-yum -y install git make gcc sudo net-tools &> /dev/null
-pip3 install clickhouse_driver numpy &> /dev/null
-test -d ${RAWDATA_DIR}/ssb-dbgen || git clone https://github.com/vadimtk/ssb-dbgen.git ${RAWDATA_DIR}/ssb-dbgen && cd ${RAWDATA_DIR}/ssb-dbgen
-
-if [ ! -f ${RAWDATA_DIR}/ssb-dbgen/dbgen ];then
-        make && {
-        test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y |./dbgen -s ${SSB_GEN_FACTOR} -T c
-        test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl  || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T p
-        test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
-        test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T d
-        test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
-        }
-else
-        test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T c
-        test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl  || echo y |  ./dbgen -s ${SSB_GEN_FACTOR} -T p
-        test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
-        test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y |  ./dbgen -s ${SSB_GEN_FACTOR} -T d
-        test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
-
-fi
-
-filenum=`find ${RAWDATA_DIR}/ssb-dbgen/ -name "*.tbl" | wc -l`
-
-if [ $filenum -ne 5 ];then
-        echo "generate ssb data file *.tbl faild"
-        exit 1
-fi
-
-function kill_instance(){
-instance_alive=1  
-for i in {1..2}
-do
-	pkill clickhouse && sleep 5
-        instance_alive=0        
-        for i in $(seq 0 $[inst_num-1])
-        do
-                netstat -nltp | grep ${ckport[i]} > /dev/null
-                if [ $? -ne 1 ];then
-                        instance_alive=1
-                        break;
-                fi
-        done
-        if [ $instance_alive -eq 0 ];then
-                break;
-        fi        
-done
-if [ $instance_alive -eq 0 ];then
-        echo "kill_instance OK!"
-else
-        echo "kill_instance Failed -> clickhouse server instance still alive due to 10s timeout"
-        exit 1        
-fi
-}
-
-function run_test(){
-is_xml=0
-for i in $(seq 0 $[inst_num-1])
-do
-        if [ -f ${database_dir}/${1}${dir_server[i]}/config_${1}${dir_server[i]}.xml ]; then
-                is_xml=$[is_xml+1]
-        fi
-done
-if [ $is_xml -eq $inst_num ];then
-        echo "Benchmark with $inst_num instance"
-        start_clickhouse_for_insertion ${1}
-
-        for i in $(seq 0 $[inst_num-1])
-        do
-                clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
-        done
-
-        if [ $? -eq 0 ];then
-                check_table
-        fi
-        kill_instance
-
-        if [ $1 == "deflate" ];then
-	        test -f ${LOG_DIR}/${1}_server_log && deflatemsg=`cat ${LOG_DIR}/${1}_server_log | grep DeflateJobHWPool`
-	        if [ -n "$deflatemsg" ];then
-	                echo ------------------------------------------------------
-	                echo $deflatemsg
-	                echo ------------------------------------------------------
-	        fi
-	fi
-        echo "Check table data required in server_${1} -> Done! "
-        
-        start_clickhouse_for_stressing ${1}
-        for i in $(seq 0 $[inst_num-1])
-        do
-                clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
-        done
-        if [ $? -eq 0 ];then
-                test -d ${CLIENT_SCRIPTS_DIR}  && cd ${CLIENT_SCRIPTS_DIR}
-                echo "Client stressing... "
-                echo "${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log"
-                ${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log
-                echo "Completed client stressing, checking log... "
-                finish_log=`grep "Finished" ${LOG_DIR}/${1}.log | wc -l`
-	        if [ $finish_log -eq 1 ] ;then
-                        kill_instance
-	                test -f ${LOG_DIR}/${1}.log && echo  "${1}.log ===> ${LOG_DIR}/${1}.log"
-	        else
-	                kill_instance
-	                echo "No find 'Finished' in client log -> Performance test may fail"
-	                exit 1
-
-	        fi
-
-	    else
-                echo "${1} clickhouse server start fail"
-                exit 1
-        fi
-else
-        echo "clickhouse server start fail -> Please check xml files required in ${database_dir} for each instance"
-        exit 1
-
-fi
-}
-function clear_log(){
-        if [ -d "$LOG_DIR" ]; then
-                cd ${LOG_DIR} && rm -rf *
-        fi     
-}
-
-function gather_log_for_codec(){
-        cd ${OUTPUT_DIR} && mkdir -p ${LOG_PACK_FILE}/${1}
-        cp -rf ${LOG_DIR} ${OUTPUT_DIR}/${LOG_PACK_FILE}/${1}
-}
-
-function pack_log(){
-        if [ -e "${OUTPUT_DIR}/run.log" ]; then
-                cp ${OUTPUT_DIR}/run.log ${OUTPUT_DIR}/${LOG_PACK_FILE}/
-        fi
-        echo "Please check all log information in ${OUTPUT_DIR}/${LOG_PACK_FILE}"
-}
-
-function setup_check(){
-
-        iax_dev_num=`accel-config list | grep iax | wc -l`
-	if [ $iax_dev_num -eq 0 ] ;then
-                iax_dev_num=`accel-config list | grep iax | wc -l`
-                if [ $iax_dev_num -eq 0 ] ;then
-                        echo "No IAA devices available -> Please check IAA hardware setup manually!"
-                        exit 1
-                else
-	                echo "IAA enabled devices number:$iax_dev_num"
-                fi
-	else
-	        echo "IAA enabled devices number:$iax_dev_num"
-	fi        
-        libaccel_version=`accel-config -v`
-        clickhouser_version=`clickhouse server --version`
-        kernel_dxd_log=`dmesg | grep dxd`
-        echo "libaccel_version:$libaccel_version"
-        echo "clickhouser_version:$clickhouser_version"
-        echo -e "idxd section in kernel log:\n$kernel_dxd_log"
-}
-
-setup_check
-export CLICKHOUSE_WATCHDOG_ENABLE=0
-for i in  ${CODEC_CONFIG[@]}
-do
-        clear_log
-        codec=${i}
-        echo "run test------------$codec"
-        run_test $codec
-        gather_log_for_codec $codec
-done
-
-pack_log
-echo "Done."
\ No newline at end of file
diff --git a/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py b/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py
deleted file mode 100644
index f12381a198c..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py
+++ /dev/null
@@ -1,278 +0,0 @@
-from operator import eq
-import os
-import random
-import time
-import sys
-from clickhouse_driver import Client
-import numpy as np
-import subprocess
-import multiprocessing
-from multiprocessing import Manager
-
-warmup_runs = 10
-calculated_runs = 10
-seconds = 30
-max_instances_number = 8
-retest_number = 3
-retest_tolerance = 10
-
-
-def checkInt(str):
-    try:
-        int(str)
-        return True
-    except ValueError:
-        return False
-
-
-def setup_client(index):
-    if index < 4:
-        port_idx = index
-    else:
-        port_idx = index + 4
-    client = Client(
-        host="localhost",
-        database="default",
-        user="default",
-        password="",
-        port="900%d" % port_idx,
-    )
-    union_mode_query = "SET union_default_mode='DISTINCT'"
-    client.execute(union_mode_query)
-    return client
-
-
-def warm_client(clientN, clientL, query, loop):
-    for c_idx in range(clientN):
-        for _ in range(loop):
-            clientL[c_idx].execute(query)
-
-
-def read_queries(queries_list):
-    queries = list()
-    queries_id = list()
-    with open(queries_list, "r") as f:
-        for line in f:
-            line = line.rstrip()
-            line = line.split("$")
-            queries_id.append(line[0])
-            queries.append(line[1])
-    return queries_id, queries
-
-
-def run_task(client, cname, query, loop, query_latency):
-    start_time = time.time()
-    for i in range(loop):
-        client.execute(query)
-        query_latency.append(client.last_query.elapsed)
-
-    end_time = time.time()
-    p95 = np.percentile(query_latency, 95)
-    print(
-        "CLIENT: {0} end. -> P95: %f, qps: %f".format(cname)
-        % (p95, loop / (end_time - start_time))
-    )
-
-
-def run_multi_clients(clientN, clientList, query, loop):
-    client_pids = {}
-    start_time = time.time()
-    manager = multiprocessing.Manager()
-    query_latency_list0 = manager.list()
-    query_latency_list1 = manager.list()
-    query_latency_list2 = manager.list()
-    query_latency_list3 = manager.list()
-    query_latency_list4 = manager.list()
-    query_latency_list5 = manager.list()
-    query_latency_list6 = manager.list()
-    query_latency_list7 = manager.list()
-
-    for c_idx in range(clientN):
-        client_name = "Role_%d" % c_idx
-        if c_idx == 0:
-            client_pids[c_idx] = multiprocessing.Process(
-                target=run_task,
-                args=(clientList[c_idx], client_name, query, loop, query_latency_list0),
-            )
-        elif c_idx == 1:
-            client_pids[c_idx] = multiprocessing.Process(
-                target=run_task,
-                args=(clientList[c_idx], client_name, query, loop, query_latency_list1),
-            )
-        elif c_idx == 2:
-            client_pids[c_idx] = multiprocessing.Process(
-                target=run_task,
-                args=(clientList[c_idx], client_name, query, loop, query_latency_list2),
-            )
-        elif c_idx == 3:
-            client_pids[c_idx] = multiprocessing.Process(
-                target=run_task,
-                args=(clientList[c_idx], client_name, query, loop, query_latency_list3),
-            )
-        elif c_idx == 4:
-            client_pids[c_idx] = multiprocessing.Process(
-                target=run_task,
-                args=(clientList[c_idx], client_name, query, loop, query_latency_list4),
-            )
-        elif c_idx == 5:
-            client_pids[c_idx] = multiprocessing.Process(
-                target=run_task,
-                args=(clientList[c_idx], client_name, query, loop, query_latency_list5),
-            )
-        elif c_idx == 6:
-            client_pids[c_idx] = multiprocessing.Process(
-                target=run_task,
-                args=(clientList[c_idx], client_name, query, loop, query_latency_list6),
-            )
-        elif c_idx == 7:
-            client_pids[c_idx] = multiprocessing.Process(
-                target=run_task,
-                args=(clientList[c_idx], client_name, query, loop, query_latency_list7),
-            )
-        else:
-            print("ERROR: CLIENT number dismatch!!")
-            exit()
-        print("CLIENT: %s start" % client_name)
-        client_pids[c_idx].start()
-
-    for c_idx in range(clientN):
-        client_pids[c_idx].join()
-    end_time = time.time()
-    totalT = end_time - start_time
-
-    query_latencyTotal = list()
-    for item in query_latency_list0:
-        query_latencyTotal.append(item)
-    for item in query_latency_list1:
-        query_latencyTotal.append(item)
-    for item in query_latency_list2:
-        query_latencyTotal.append(item)
-    for item in query_latency_list3:
-        query_latencyTotal.append(item)
-    for item in query_latency_list4:
-        query_latencyTotal.append(item)
-    for item in query_latency_list5:
-        query_latencyTotal.append(item)
-    for item in query_latency_list6:
-        query_latencyTotal.append(item)
-    for item in query_latency_list7:
-        query_latencyTotal.append(item)
-
-    totalP95 = np.percentile(query_latencyTotal, 95) * 1000
-    return totalT, totalP95
-
-
-def run_task_caculated(client, cname, query, loop):
-    query_latency = list()
-    start_time = time.time()
-    for i in range(loop):
-        client.execute(query)
-        query_latency.append(client.last_query.elapsed)
-    end_time = time.time()
-    p95 = np.percentile(query_latency, 95)
-
-
-def run_multi_clients_caculated(clientN, clientList, query, loop):
-    client_pids = {}
-    start_time = time.time()
-    for c_idx in range(clientN):
-        client_name = "Role_%d" % c_idx
-        client_pids[c_idx] = multiprocessing.Process(
-            target=run_task_caculated,
-            args=(clientList[c_idx], client_name, query, loop),
-        )
-        client_pids[c_idx].start()
-    for c_idx in range(clientN):
-        client_pids[c_idx].join()
-    end_time = time.time()
-    totalT = end_time - start_time
-    return totalT
-
-
-if __name__ == "__main__":
-    client_number = 1
-    queries = list()
-    queries_id = list()
-
-    if len(sys.argv) != 3:
-        print(
-            "usage: python3 client_stressing_test.py [queries_file_path] [client_number]"
-        )
-        sys.exit()
-    else:
-        queries_list = sys.argv[1]
-        client_number = int(sys.argv[2])
-        print(
-            "queries_file_path: %s, client_number: %d" % (queries_list, client_number)
-        )
-        if not os.path.isfile(queries_list) or not os.access(queries_list, os.R_OK):
-            print("please check the right path for queries file")
-            sys.exit()
-        if (
-            not checkInt(sys.argv[2])
-            or int(sys.argv[2]) > max_instances_number
-            or int(sys.argv[2]) < 1
-        ):
-            print("client_number should be in [1~%d]" % max_instances_number)
-            sys.exit()
-
-    client_list = {}
-    queries_id, queries = read_queries(queries_list)
-
-    for c_idx in range(client_number):
-        client_list[c_idx] = setup_client(c_idx)
-    # clear cache
-    os.system("sync; echo 3 > /proc/sys/vm/drop_caches")
-
-    print("###Polit Run Begin")
-    for i in queries:
-        warm_client(client_number, client_list, i, 1)
-    print("###Polit Run End -> Start stressing....")
-
-    query_index = 0
-    for q in queries:
-        print(
-            "\n###START -> Index: %d, ID: %s, Query: %s"
-            % (query_index, queries_id[query_index], q)
-        )
-        warm_client(client_number, client_list, q, warmup_runs)
-        print("###Warm Done!")
-        for j in range(0, retest_number):
-            totalT = run_multi_clients_caculated(
-                client_number, client_list, q, calculated_runs
-            )
-            curr_loop = int(seconds * calculated_runs / totalT) + 1
-            print(
-                "###Calculation Done! -> loopN: %d, expected seconds:%d"
-                % (curr_loop, seconds)
-            )
-
-            print("###Stress Running! -> %d iterations......" % curr_loop)
-
-            totalT, totalP95 = run_multi_clients(
-                client_number, client_list, q, curr_loop
-            )
-
-            if totalT > (seconds - retest_tolerance) and totalT < (
-                seconds + retest_tolerance
-            ):
-                break
-            else:
-                print(
-                    "###totalT:%d is far way from expected seconds:%d. Run again ->j:%d!"
-                    % (totalT, seconds, j)
-                )
-
-        print(
-            "###Completed! -> ID: %s, clientN: %d, totalT: %.2f s, latencyAVG: %.2f ms, P95: %.2f ms, QPS_Final: %.2f"
-            % (
-                queries_id[query_index],
-                client_number,
-                totalT,
-                totalT * 1000 / (curr_loop * client_number),
-                totalP95,
-                ((curr_loop * client_number) / totalT),
-            )
-        )
-        query_index += 1
-    print("###Finished!")
diff --git a/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql b/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql
deleted file mode 100644
index abf2df6503a..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-Q1.1$SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
-Q2.1$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
-Q2.2$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
-Q2.3$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
-Q3.1$SELECT C_NATION,S_NATION,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION,S_NATION,year ORDER BY year ASC,revenue DESC;
-Q3.2$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
-Q3.3$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
-Q4.1$SELECT toYear(LO_ORDERDATE) AS year,C_NATION,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,C_NATION ORDER BY year ASC,C_NATION ASC;
-Q4.2$SELECT toYear(LO_ORDERDATE) AS year,S_NATION,P_CATEGORY,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,S_NATION,P_CATEGORY ORDER BY year ASC,S_NATION ASC,P_CATEGORY ASC;
-Q4.3$SELECT toYear(LO_ORDERDATE) AS year,S_CITY,P_BRAND,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year,S_CITY,P_BRAND ORDER BY year ASC,S_CITY ASC,P_BRAND ASC;
diff --git a/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh b/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh
deleted file mode 100644
index 6067b1058f2..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
-if [ ! -d "${WORKING_DIR}/output" ]; then
-mkdir ${WORKING_DIR}/output
-fi
-bash allin1_ssb.sh 2 > ${WORKING_DIR}/output/run.log
-echo "Please check log in: ${WORKING_DIR}/output/run.log"
\ No newline at end of file
diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml
deleted file mode 100644
index ab77a9cdcbe..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml
+++ /dev/null
@@ -1,49 +0,0 @@
-<!-- This file was generated automatically.
-     Do not edit it: it is likely to be discarded and generated again before it's read next time.
-     Files used to generate this file:
-       config.xml      -->
-
-<!-- Config that is used when server is run without config file. --><clickhouse>
-    <logger>
-        <level>trace</level>
-        <console>true</console>
-    </logger>
-
-    <http_port>8123</http_port>
-    <tcp_port>9000</tcp_port>
-    <mysql_port>9004</mysql_port>
-
-    <path>./</path>
-
-    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
-    <mark_cache_size>5368709120</mark_cache_size>
-    <mlock_executable>true</mlock_executable>
-
-    <compression>
-        <case>
-            <method>deflate_qpl</method>
-        </case>
-    </compression>
-
-    <users>
-        <default>
-            <password/>
-
-            <networks>
-                <ip>::/0</ip>
-            </networks>
-
-            <profile>default</profile>
-            <quota>default</quota>
-            <access_management>1</access_management>
-        </default>
-    </users>
-
-    <profiles>
-        <default/>
-    </profiles>
-
-    <quotas>
-        <default/>
-    </quotas>
-</clickhouse>
diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml
deleted file mode 100644
index b71456486f5..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml
+++ /dev/null
@@ -1,49 +0,0 @@
-<!-- This file was generated automatically.
-     Do not edit it: it is likely to be discarded and generated again before it's read next time.
-     Files used to generate this file:
-       config.xml      -->
-
-<!-- Config that is used when server is run without config file. --><clickhouse>
-    <logger>
-        <level>trace</level>
-        <console>true</console>
-    </logger>
-
-    <http_port>8124</http_port>
-    <tcp_port>9001</tcp_port>
-    <mysql_port>9005</mysql_port>
-
-    <path>./</path>
-
-    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
-    <mark_cache_size>5368709120</mark_cache_size>
-    <mlock_executable>true</mlock_executable>
-
-    <compression>
-        <case>
-            <method>deflate_qpl</method>
-        </case>
-    </compression>
-
-    <users>
-        <default>
-            <password/>
-
-            <networks>
-                <ip>::/0</ip>
-            </networks>
-
-            <profile>default</profile>
-            <quota>default</quota>
-            <access_management>1</access_management>
-        </default>
-    </users>
-
-    <profiles>
-        <default/>
-    </profiles>
-
-    <quotas>
-        <default/>
-    </quotas>
-</clickhouse>
diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml
deleted file mode 100644
index f4dc59b60aa..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml
+++ /dev/null
@@ -1,49 +0,0 @@
-<!-- This file was generated automatically.
-     Do not edit it: it is likely to be discarded and generated again before it's read next time.
-     Files used to generate this file:
-       config.xml      -->
-
-<!-- Config that is used when server is run without config file. --><clickhouse>
-    <logger>
-        <level>trace</level>
-        <console>true</console>
-    </logger>
-
-    <http_port>8123</http_port>
-    <tcp_port>9000</tcp_port>
-    <mysql_port>9004</mysql_port>
-
-    <path>./</path>
-
-    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
-    <mark_cache_size>5368709120</mark_cache_size>
-    <mlock_executable>true</mlock_executable>
-
-    <compression>
-        <case>
-            <method>lz4</method>
-        </case>
-    </compression>
-
-    <users>
-        <default>
-            <password/>
-
-            <networks>
-                <ip>::/0</ip>
-            </networks>
-
-            <profile>default</profile>
-            <quota>default</quota>
-            <access_management>1</access_management>
-        </default>
-    </users>
-
-    <profiles>
-        <default/>
-    </profiles>
-
-    <quotas>
-        <default/>
-    </quotas>
-</clickhouse>
diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml
deleted file mode 100644
index 357db8942d7..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml
+++ /dev/null
@@ -1,49 +0,0 @@
-<!-- This file was generated automatically.
-     Do not edit it: it is likely to be discarded and generated again before it's read next time.
-     Files used to generate this file:
-       config.xml      -->
-
-<!-- Config that is used when server is run without config file. --><clickhouse>
-    <logger>
-        <level>trace</level>
-        <console>true</console>
-    </logger>
-
-    <http_port>8124</http_port>
-    <tcp_port>9001</tcp_port>
-    <mysql_port>9005</mysql_port>
-
-    <path>./</path>
-
-    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
-    <mark_cache_size>5368709120</mark_cache_size>
-    <mlock_executable>true</mlock_executable>
-
-    <compression>
-        <case>
-            <method>lz4</method>
-        </case>
-    </compression>
-
-    <users>
-        <default>
-            <password/>
-
-            <networks>
-                <ip>::/0</ip>
-            </networks>
-
-            <profile>default</profile>
-            <quota>default</quota>
-            <access_management>1</access_management>
-        </default>
-    </users>
-
-    <profiles>
-        <default/>
-    </profiles>
-
-    <quotas>
-        <default/>
-    </quotas>
-</clickhouse>
diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml
deleted file mode 100644
index 1c4c738edaf..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml
+++ /dev/null
@@ -1,49 +0,0 @@
-<!-- This file was generated automatically.
-     Do not edit it: it is likely to be discarded and generated again before it's read next time.
-     Files used to generate this file:
-       config.xml      -->
-
-<!-- Config that is used when server is run without config file. --><clickhouse>
-    <logger>
-        <level>trace</level>
-        <console>true</console>
-    </logger>
-
-    <http_port>8123</http_port>
-    <tcp_port>9000</tcp_port>
-    <mysql_port>9004</mysql_port>
-
-    <path>./</path>
-
-    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
-    <mark_cache_size>5368709120</mark_cache_size>
-    <mlock_executable>true</mlock_executable>
-
-    <compression>
-        <case>
-            <method>zstd</method>
-        </case>
-    </compression>
-
-    <users>
-        <default>
-            <password/>
-
-            <networks>
-                <ip>::/0</ip>
-            </networks>
-
-            <profile>default</profile>
-            <quota>default</quota>
-            <access_management>1</access_management>
-        </default>
-    </users>
-
-    <profiles>
-        <default/>
-    </profiles>
-
-    <quotas>
-        <default/>
-    </quotas>
-</clickhouse>
diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml
deleted file mode 100644
index f3db01b7739..00000000000
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml
+++ /dev/null
@@ -1,49 +0,0 @@
-<!-- This file was generated automatically.
-     Do not edit it: it is likely to be discarded and generated again before it's read next time.
-     Files used to generate this file:
-       config.xml      -->
-
-<!-- Config that is used when server is run without config file. --><clickhouse>
-    <logger>
-        <level>trace</level>
-        <console>true</console>
-    </logger>
-
-    <http_port>8124</http_port>
-    <tcp_port>9001</tcp_port>
-    <mysql_port>9005</mysql_port>
-
-    <path>./</path>
-
-    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
-    <mark_cache_size>5368709120</mark_cache_size>
-    <mlock_executable>true</mlock_executable>
-
-    <compression>
-        <case>
-            <method>zstd</method>
-        </case>
-    </compression>
-
-    <users>
-        <default>
-            <password/>
-
-            <networks>
-                <ip>::/0</ip>
-            </networks>
-
-            <profile>default</profile>
-            <quota>default</quota>
-            <access_management>1</access_management>
-        </default>
-    </users>
-
-    <profiles>
-        <default/>
-    </profiles>
-
-    <quotas>
-        <default/>
-    </quotas>
-</clickhouse>

From c65607484e51a5e9aa8f59612e7817b899bb88ee Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 10:03:10 +0100
Subject: [PATCH 0695/1097] Remove garbage

---
 contrib/qpl-cmake/CMakeLists.txt | 464 ++++++++++++++++++++++++++++---
 docker/packager/binary/build.sh  |   2 +-
 2 files changed, 419 insertions(+), 47 deletions(-)

diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt
index 19501209b26..7a84048e16b 100644
--- a/contrib/qpl-cmake/CMakeLists.txt
+++ b/contrib/qpl-cmake/CMakeLists.txt
@@ -27,16 +27,422 @@ message(STATUS "Intel QPL version: ${QPL_VERSION}")
 # The qpl submodule comes with its own version of isal. It contains code which does not exist in upstream isal. It would be nice to link
 # only upstream isal (ch_contrib::isal) but at this point we can't.
 
-include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
+# ==========================================================================
+# Copyright (C) 2022 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+# ==========================================================================
+
+set(QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS "-fno-exceptions;-fno-rtti")
+
+function(modify_standard_language_flag)
+    # Declaring function parameters
+    set(OPTIONS "")
+    set(ONE_VALUE_ARGS
+        LANGUAGE_NAME
+        FLAG_NAME
+        NEW_FLAG_VALUE)
+    set(MULTI_VALUE_ARGS "")
+
+    # Parsing function parameters
+    cmake_parse_arguments(MODIFY
+                          "${OPTIONS}"
+                          "${ONE_VALUE_ARGS}"
+                          "${MULTI_VALUE_ARGS}"
+                          ${ARGN})
+
+    # Variables
+    set(FLAG_REGULAR_EXPRESSION "${MODIFY_FLAG_NAME}.*[ ]*")
+    set(NEW_VALUE "${MODIFY_FLAG_NAME}${MODIFY_NEW_FLAG_VALUE}")
+
+    # Replacing specified flag with new value
+    string(REGEX REPLACE
+           ${FLAG_REGULAR_EXPRESSION} ${NEW_VALUE}
+           NEW_COMPILE_FLAGS
+           "${CMAKE_${MODIFY_LANGUAGE_NAME}_FLAGS}")
+
+    # Returning the value
+    set(CMAKE_${MODIFY_LANGUAGE_NAME}_FLAGS ${NEW_COMPILE_FLAGS} PARENT_SCOPE)
+endfunction()
+
+function(get_function_name_with_default_bit_width in_function_name bit_width out_function_name)
+
+    if(in_function_name MATCHES ".*_i")
+
+        string(REPLACE "_i" "" in_function_name ${in_function_name})
+
+        set(${out_function_name} "${in_function_name}_${bit_width}_i" PARENT_SCOPE)
+
+    else()
+
+        set(${out_function_name} "${in_function_name}_${bit_width}" PARENT_SCOPE)
+
+    endif()
+
+endfunction()
+
+macro(get_list_of_supported_optimizations PLATFORMS_LIST)
+    list(APPEND PLATFORMS_LIST "")
+    list(APPEND PLATFORMS_LIST "px")
+    list(APPEND PLATFORMS_LIST "avx512")
+endmacro(get_list_of_supported_optimizations)
+
+function(generate_unpack_kernel_arrays current_directory PLATFORMS_LIST)
+    list(APPEND UNPACK_POSTFIX_LIST "")
+    list(APPEND UNPACK_PRLE_POSTFIX_LIST "")
+    list(APPEND PACK_POSTFIX_LIST "")
+    list(APPEND PACK_INDEX_POSTFIX_LIST "")
+    list(APPEND SCAN_POSTFIX_LIST "")
+    list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "")
+    list(APPEND DEFAULT_BIT_WIDTH_LIST "")
+
+    #create list of functions that use only 8u 16u 32u postfixes
+    list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "unpack_prle")
+    list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "extract")
+    list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "extract_i")
+    list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "select")
+    list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "select_i")
+    list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "expand")
+
+    #create default bit width list
+    list(APPEND DEFAULT_BIT_WIDTH_LIST "8u")
+    list(APPEND DEFAULT_BIT_WIDTH_LIST "16u")
+    list(APPEND DEFAULT_BIT_WIDTH_LIST "32u")
+
+    #create scan kernel postfixes
+    list(APPEND SCAN_COMPARATOR_LIST "")
+
+    list(APPEND SCAN_COMPARATOR_LIST "eq")
+    list(APPEND SCAN_COMPARATOR_LIST "ne")
+    list(APPEND SCAN_COMPARATOR_LIST "lt")
+    list(APPEND SCAN_COMPARATOR_LIST "le")
+    list(APPEND SCAN_COMPARATOR_LIST "gt")
+    list(APPEND SCAN_COMPARATOR_LIST "ge")
+    list(APPEND SCAN_COMPARATOR_LIST "range")
+    list(APPEND SCAN_COMPARATOR_LIST "not_range")
+
+    foreach(SCAN_COMPARATOR IN LISTS SCAN_COMPARATOR_LIST)
+        list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_8u")
+        list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_16u8u")
+        list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_32u8u")
+    endforeach()
+
+    # create unpack kernel postfixes
+    foreach(input_width RANGE 1 32 1)
+        if(input_width LESS 8 OR input_width EQUAL 8)
+            list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u8u")
+
+        elseif(input_width LESS 16 OR input_width EQUAL 16)
+            list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u16u")
+
+        else()
+            list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u32u")
+        endif()
+    endforeach()
+
+    # create pack kernel postfixes
+    foreach(output_width RANGE 1 8 1)
+        list(APPEND PACK_POSTFIX_LIST "_8u${output_width}u")
+    endforeach()
+
+    foreach(output_width RANGE 9 16 1)
+        list(APPEND PACK_POSTFIX_LIST "_16u${output_width}u")
+    endforeach()
+
+    foreach(output_width RANGE 17 32 1)
+        list(APPEND PACK_POSTFIX_LIST "_32u${output_width}u")
+    endforeach()
+
+    list(APPEND PACK_POSTFIX_LIST "_8u16u")
+    list(APPEND PACK_POSTFIX_LIST "_8u32u")
+    list(APPEND PACK_POSTFIX_LIST "_16u32u")
+
+    # create pack index kernel postfixes
+    list(APPEND PACK_INDEX_POSTFIX_LIST "_nu")
+    list(APPEND PACK_INDEX_POSTFIX_LIST "_8u")
+    list(APPEND PACK_INDEX_POSTFIX_LIST "_8u16u")
+    list(APPEND PACK_INDEX_POSTFIX_LIST "_8u32u")
+
+    # write to file
+    file(MAKE_DIRECTORY ${current_directory}/generated)
+
+    foreach(PLATFORM_VALUE IN LISTS PLATFORMS_LIST)
+        set(directory "${current_directory}/generated")
+        set(PLATFORM_PREFIX "${PLATFORM_VALUE}_")
+
+        #
+        # Write unpack table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}unpack.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "unpack_table_t ${PLATFORM_PREFIX}unpack_table = {\n")
+
+        #write LE kernels
+        foreach(UNPACK_POSTFIX IN LISTS UNPACK_POSTFIX_LIST)
+            file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack${UNPACK_POSTFIX},\n")
+        endforeach()
+
+        #write BE kernels
+
+        #get last element of the list
+        set(LAST_ELEMENT "")
+        list(GET UNPACK_POSTFIX_LIST -1 LAST_ELEMENT)
+
+        foreach(UNPACK_POSTFIX IN LISTS UNPACK_POSTFIX_LIST)
+
+            if(UNPACK_POSTFIX STREQUAL LAST_ELEMENT)
+                file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack_be${UNPACK_POSTFIX}};\n")
+            else()
+                file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack_be${UNPACK_POSTFIX},\n")
+            endif()
+        endforeach()
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "}\n")
+
+        #
+        # Write pack table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}pack.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "pack_table_t ${PLATFORM_PREFIX}pack_table = {\n")
+
+        #write LE kernels
+        foreach(PACK_POSTFIX IN LISTS PACK_POSTFIX_LIST)
+            file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack${PACK_POSTFIX},\n")
+        endforeach()
+
+        #write BE kernels
+
+        #get last element of the list
+        set(LAST_ELEMENT "")
+        list(GET PACK_POSTFIX_LIST -1 LAST_ELEMENT)
+
+        foreach(PACK_POSTFIX IN LISTS PACK_POSTFIX_LIST)
+
+            if(PACK_POSTFIX STREQUAL LAST_ELEMENT)
+                file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack_be${PACK_POSTFIX}};\n")
+            else()
+                file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack_be${PACK_POSTFIX},\n")
+            endif()
+        endforeach()
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "}\n")
+
+        #
+        # Write scan table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}scan.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "scan_table_t ${PLATFORM_PREFIX}scan_table = {\n")
+
+        #get last element of the list
+        set(LAST_ELEMENT "")
+        list(GET SCAN_POSTFIX_LIST -1 LAST_ELEMENT)
+
+        foreach(SCAN_POSTFIX IN LISTS SCAN_POSTFIX_LIST)
+
+            if(SCAN_POSTFIX STREQUAL LAST_ELEMENT)
+                file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}};\n")
+            else()
+                file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX},\n")
+            endif()
+        endforeach()
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "}\n")
+
+        #
+        # Write scan_i table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}scan_i.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "scan_i_table_t ${PLATFORM_PREFIX}scan_i_table = {\n")
+
+        #get last element of the list
+        set(LAST_ELEMENT "")
+        list(GET SCAN_POSTFIX_LIST -1 LAST_ELEMENT)
+
+        foreach(SCAN_POSTFIX IN LISTS SCAN_POSTFIX_LIST)
+
+            if(SCAN_POSTFIX STREQUAL LAST_ELEMENT)
+                file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}_i};\n")
+            else()
+                file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}_i,\n")
+            endif()
+        endforeach()
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "}\n")
+
+        #
+        # Write pack_index table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}pack_index.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "pack_index_table_t ${PLATFORM_PREFIX}pack_index_table = {\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_bits_nu,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u16u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u32u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_bits_be_nu,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_be_8u16u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_be_8u32u};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "}\n")
+
+        #
+        # Write default bit width functions
+        #
+        foreach(DEAULT_BIT_WIDTH_FUNCTION IN LISTS DEFAULT_BIT_WIDTH_FUNCTIONS_LIST)
+            file(WRITE ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "#include \"qplc_api.h\"\n")
+            file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+            file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+            file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "${DEAULT_BIT_WIDTH_FUNCTION}_table_t ${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}_table = {\n")
+
+            #get last element of the list
+            set(LAST_ELEMENT "")
+            list(GET DEFAULT_BIT_WIDTH_LIST -1 LAST_ELEMENT)
+
+            foreach(BIT_WIDTH IN LISTS DEFAULT_BIT_WIDTH_LIST)
+
+                set(FUNCTION_NAME "")
+                get_function_name_with_default_bit_width(${DEAULT_BIT_WIDTH_FUNCTION} ${BIT_WIDTH} FUNCTION_NAME)
+
+                if(BIT_WIDTH STREQUAL LAST_ELEMENT)
+                    file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "\t${PLATFORM_PREFIX}qplc_${FUNCTION_NAME}};\n")
+                else()
+                 file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "\t${PLATFORM_PREFIX}qplc_${FUNCTION_NAME},\n")
+                 endif()
+            endforeach()
+
+            file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "}\n")
+        endforeach()
+
+        #
+        # Write aggregates table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}aggregates.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "aggregates_table_t ${PLATFORM_PREFIX}aggregates_table = {\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_bit_aggregates_8u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_8u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_16u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_32u};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "}\n")
+
+        #
+        # Write mem_copy functions table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "memory_copy_table_t ${PLATFORM_PREFIX}memory_copy_table = {\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_8u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_16u,\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_32u};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "}\n")
+
+        #
+        # Write mem_copy functions table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}zero.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "zero_table_t ${PLATFORM_PREFIX}zero_table = {\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "\t${PLATFORM_PREFIX}qplc_zero_8u};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "}\n")
+
+        #
+        # Write move functions table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}move.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "move_table_t ${PLATFORM_PREFIX}move_table = {\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "\t${PLATFORM_PREFIX}qplc_move_8u};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "}\n")
+
+        #
+        # Write crc64 function table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}crc64.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "crc64_table_t ${PLATFORM_PREFIX}crc64_table = {\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "\t${PLATFORM_PREFIX}qplc_crc64};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "}\n")
+
+        #
+        # Write xor_checksum function table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "#include \"qplc_api.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "xor_checksum_table_t ${PLATFORM_PREFIX}xor_checksum_table = {\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "\t${PLATFORM_PREFIX}qplc_xor_checksum_8u};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "}\n")
+
+        #
+        # Write deflate functions table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_slow_icf.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_hash_table.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_histogram.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "deflate_table_t ${PLATFORM_PREFIX}deflate_table = {\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}slow_deflate_icf_body),\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}deflate_histogram_reset),\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}deflate_hash_table_reset)};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "}\n")
+
+        #
+        # Write deflate fix functions table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "#include \"deflate_slow.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "deflate_fix_table_t ${PLATFORM_PREFIX}deflate_fix_table = {\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}slow_deflate_body)};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "}\n")
+
+        #
+        # Write setup_dictionary functions table
+        #
+        file(WRITE ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "#include \"deflate_slow_utils.h\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "#include \"dispatcher/dispatcher.hpp\"\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "namespace qpl::core_sw::dispatcher\n{\n")
+        file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "setup_dictionary_table_t ${PLATFORM_PREFIX}setup_dictionary_table = {\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "\t reinterpret_cast<void *>(&${PLATFORM_PREFIX}setup_dictionary)};\n")
+
+        file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "}\n")
+
+    endforeach()
+endfunction()
 
-# check nasm compiler
-include(CheckLanguage)
-check_language(ASM_NASM)
-if(NOT CMAKE_ASM_NASM_COMPILER)
-    message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
-endif()
 
-# [SUBDIR]isal
 enable_language(ASM_NASM)
 
 set(ISAL_C_SRC ${QPL_SRC_DIR}/isal/igzip/adler32_base.c
@@ -106,11 +512,6 @@ set_target_properties(isal PROPERTIES
                         CXX_STANDARD 11
                         C_STANDARD 99)
 
-target_compile_options(isal PRIVATE
-                       "$<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>"
-                       "$<$<CONFIG:Debug>:>"
-                       "$<$<CONFIG:Release>:>")
-
 # AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available".
 # HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system.
 target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/"
@@ -163,15 +564,7 @@ foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST)
                                 PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
                                 PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
 
-    set_target_properties(qplcore_${PLATFORM_ID} PROPERTIES
-                                $<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
-
-    target_compile_options(qplcore_${PLATFORM_ID}
-                            PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
-                            PRIVATE "$<$<CONFIG:Debug>:>"
-                            PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
-
-    # Set specific compiler options and/or definitions based on a platform
+        # Set specific compiler options and/or definitions based on a platform
     if (${PLATFORM_ID} MATCHES "avx512")
         target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=2)
         target_compile_options(qplcore_${PLATFORM_ID} PRIVATE -march=skylake-avx512)
@@ -220,10 +613,7 @@ set_target_properties(qplcore_sw_dispatcher PROPERTIES CXX_STANDARD 17)
 target_compile_definitions(qplcore_sw_dispatcher PUBLIC -DQPL_LIB)
 
 target_compile_options(qplcore_sw_dispatcher
-        PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
-                                       ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
-                                       $<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
-        PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
+        PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})
 
 # [SUBDIR]core-iaa
 file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
@@ -248,14 +638,6 @@ target_include_directories(core_iaa
         PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/sources/c_api> # own_checkers.h
         PRIVATE $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>)
 
-set_target_properties(core_iaa PROPERTIES
-        $<$<C_COMPILER_ID:GNU>:C_STANDARD 17>
-        CXX_STANDARD 17)
-
-target_compile_options(core_iaa
-        PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
-        $<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>)
-
 target_compile_features(core_iaa PRIVATE c_std_11)
 
 target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK
@@ -285,10 +667,7 @@ set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
         $<TARGET_OBJECTS:middle_layer_lib>)
 
 target_compile_options(middle_layer_lib
-        PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
-                                       ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
-                                       $<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
-        PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
+        PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})
 
 target_compile_definitions(middle_layer_lib
         PUBLIC QPL_VERSION="${QPL_VERSION}"
@@ -323,15 +702,8 @@ target_include_directories(_qpl
         PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>
         PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api>)
 
-set_target_properties(_qpl PROPERTIES
-        $<$<C_COMPILER_ID:GNU>:C_STANDARD 17>
-        CXX_STANDARD 17)
-
 target_compile_options(_qpl
-        PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
-                                       ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
-                                       $<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
-        PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
+        PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})
 
 target_compile_definitions(_qpl
         PRIVATE -DQPL_LIB
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index d469b359d1a..42bfb48db70 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -38,7 +38,7 @@ rm -f CMakeCache.txt
 # To check it, find and delete them.
 
 grep -o -P '"contrib/[^"]+"' ../.gitmodules |
-  grep -v -P 'llvm-project|abseil-cpp|qpl|grpc|corrosion' |
+  grep -v -P 'llvm-project|abseil-cpp|grpc|corrosion' |
   xargs -I@ find ../@ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' |
   xargs rm
 

From 7dda3b2353b22639a0304219c01beed16407c6eb Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 17 Nov 2023 10:11:15 +0000
Subject: [PATCH 0696/1097] Review comments

---
 src/Backups/BackupCoordinationFileInfos.cpp | 65 ++++++++++++---------
 src/Backups/BackupFileInfo.h                |  7 ++-
 src/Backups/BackupImpl.cpp                  | 29 ++++++---
 src/Backups/BackupImpl.h                    |  7 +--
 4 files changed, 65 insertions(+), 43 deletions(-)

diff --git a/src/Backups/BackupCoordinationFileInfos.cpp b/src/Backups/BackupCoordinationFileInfos.cpp
index b17b755b966..3280064a5d7 100644
--- a/src/Backups/BackupCoordinationFileInfos.cpp
+++ b/src/Backups/BackupCoordinationFileInfos.cpp
@@ -50,6 +50,21 @@ BackupFileInfo BackupCoordinationFileInfos::getFileInfoByDataFileIndex(size_t da
     return *(file_infos_for_all_hosts[data_file_index]);
 }
 
+namespace
+{
+
+/// copy all the file infos that are shared between reference target and source
+void copyFileInfoToReference(const BackupFileInfo & target, BackupFileInfo & reference)
+{
+    reference.size = target.size;
+    reference.checksum = target.checksum;
+    reference.base_size = target.base_size;
+    reference.base_checksum = target.base_checksum;
+    reference.encrypted_by_disk = target.encrypted_by_disk;
+}
+
+}
+
 void BackupCoordinationFileInfos::prepare() const
 {
     if (prepared)
@@ -78,11 +93,24 @@ void BackupCoordinationFileInfos::prepare() const
     num_files = 0;
     total_size_of_files = 0;
 
+    std::vector<BackupFileInfo *> unresolved_references;
+    std::unordered_map<std::string_view, BackupFileInfo *> file_name_to_info;
+
+    const auto handle_unresolved_references = [&](const auto & try_resolve_reference)
+    {
+        for (auto * reference : unresolved_references)
+        {
+            if (!try_resolve_reference(*reference))
+                throw DB::Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "Couldn't resolve reference {} with target {}",
+                    reference->file_name,
+                    reference->reference_target);
+        }
+    };
+
     if (plain_backup)
     {
-        std::vector<BackupFileInfo *> unresolved_references;
-        std::unordered_map<std::string_view, BackupFileInfo *> file_name_to_info;
-
         const auto try_resolve_reference = [&](BackupFileInfo & reference)
         {
             auto it = file_name_to_info.find(reference.reference_target);
@@ -91,10 +119,9 @@ void BackupCoordinationFileInfos::prepare() const
                 return false;
 
             auto & target_info = it->second;
-            target_info->reference_sources.push_back(reference.file_name);
-            reference.size = target_info->size;
+            target_info->data_file_copies.push_back(reference.file_name);
+            copyFileInfoToReference(*target_info, reference);
             total_size_of_files += reference.size;
-            reference.checksum = target_info->checksum;
             return true;
         };
 
@@ -118,23 +145,12 @@ void BackupCoordinationFileInfos::prepare() const
             }
         }
 
-        for (auto * reference : unresolved_references)
-        {
-            if (!try_resolve_reference(*reference))
-                throw DB::Exception(
-                    ErrorCodes::LOGICAL_ERROR,
-                    "Couldn't resolve reference {} with target {}",
-                    reference->file_name,
-                    reference->reference_target);
-        }
+        handle_unresolved_references(try_resolve_reference);
 
         num_files = file_infos_for_all_hosts.size();
     }
     else
     {
-        std::vector<BackupFileInfo *> unresolved_references;
-        std::unordered_map<std::string_view, BackupFileInfo *> file_name_to_info;
-
         const auto try_resolve_reference = [&](BackupFileInfo & reference)
         {
             auto it = file_name_to_info.find(reference.reference_target);
@@ -143,8 +159,7 @@ void BackupCoordinationFileInfos::prepare() const
                 return false;
 
             auto & target_info = it->second;
-            reference.size = target_info->size;
-            reference.checksum = target_info->checksum;
+            copyFileInfoToReference(*target_info, reference);
             reference.data_file_name = target_info->data_file_name;
             reference.data_file_index = target_info->data_file_index;
             return true;
@@ -195,15 +210,7 @@ void BackupCoordinationFileInfos::prepare() const
             file_name_to_info.emplace(info.file_name, &info);
         }
 
-        for (auto * reference : unresolved_references)
-        {
-            if (!try_resolve_reference(*reference))
-                throw DB::Exception(
-                    ErrorCodes::LOGICAL_ERROR,
-                    "Couldn't resolve reference {} with target {}",
-                    reference->file_name,
-                    reference->reference_target);
-        }
+        handle_unresolved_references(try_resolve_reference);
 
         num_files = file_infos_for_all_hosts.size();
     }
diff --git a/src/Backups/BackupFileInfo.h b/src/Backups/BackupFileInfo.h
index 42bda3aa6ed..009fee091e0 100644
--- a/src/Backups/BackupFileInfo.h
+++ b/src/Backups/BackupFileInfo.h
@@ -42,9 +42,10 @@ struct BackupFileInfo
     /// Set if this file is just a reference to another file
     String reference_target;
 
-    /// List of files that are referencing this file
-    /// Used for plain backup which needs to resolve all references
-    Strings reference_sources;
+    /// (While writing a backup) if this list is not empty then after writing
+    /// `data_file_name` it should be copied to this list of destinations too.
+    /// This is used for plain backups.
+    Strings data_file_copies;
 
     struct LessByFileName
     {
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index 56c30fab5c2..61984d58889 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -24,6 +24,8 @@
 #include <Poco/Util/XMLConfiguration.h>
 #include <Poco/DOM/DOMParser.h>
 
+#include <ranges>
+
 
 namespace ProfileEvents
 {
@@ -452,7 +454,6 @@ void BackupImpl::readBackupMetadata()
     size_of_entries = 0;
 
     const auto * contents = config_root->getNodeByPath("contents");
-    std::vector<std::pair<String /*source*/, String /*target*/>> reference_files;
     for (const Poco::XML::Node * child = contents->firstChild(); child; child = child->nextSibling())
     {
         if (child->nodeName() == "file")
@@ -913,15 +914,20 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
 
     /// NOTE: `mutex` must be unlocked during copying otherwise writing will be in one thread maximum and hence slow.
 
-    if (use_archive)
+    const auto write_info_to_archive = [&](const auto & file_name)
     {
-        LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}, adding to archive", info.data_file_name, src_file_desc, info.data_file_index);
-        auto out = archive_writer->writeFile(info.data_file_name);
+        auto out = archive_writer->writeFile(file_name);
         auto read_buffer = entry->getReadBuffer(writer->getReadSettings());
         if (info.base_size != 0)
             read_buffer->seek(info.base_size, SEEK_SET);
         copyData(*read_buffer, *out);
         out->finalize();
+    };
+
+    if (use_archive)
+    {
+        LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}, adding to archive", info.data_file_name, src_file_desc, info.data_file_index);
+        write_info_to_archive(info.data_file_name);
     }
     else if (src_disk && from_immutable_file)
     {
@@ -935,11 +941,20 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
         writer->copyDataToFile(info.data_file_name, create_read_buffer, info.base_size, info.size - info.base_size);
     }
 
-    if (!deduplicate_files)
+    std::function<void(const String &)> copy_file_inside_backup;
+    if (use_archive)
     {
-        for (const auto & reference : info.reference_sources)
-            writer->copyFile(reference, info.data_file_name, info.size - info.base_size);
+        copy_file_inside_backup = write_info_to_archive;
     }
+    else
+    {
+        copy_file_inside_backup = [&](const auto & data_file_copy)
+        {
+            writer->copyFile(data_file_copy, info.data_file_name, info.size - info.base_size);
+        };
+    }
+
+    std::ranges::for_each(info.data_file_copies, copy_file_inside_backup);
 
     {
         std::lock_guard lock{mutex};
diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h
index a4ab3d84d0c..6070db79aa6 100644
--- a/src/Backups/BackupImpl.h
+++ b/src/Backups/BackupImpl.h
@@ -72,11 +72,14 @@ public:
     Strings listFiles(const String & directory, bool recursive) const override;
     bool hasFiles(const String & directory) const override;
     bool fileExists(const String & file_name) const override;
+    bool fileExists(const SizeAndChecksum & size_and_checksum) const override;
     UInt64 getFileSize(const String & file_name) const override;
     UInt128 getFileChecksum(const String & file_name) const override;
     SizeAndChecksum getFileSizeAndChecksum(const String & file_name) const override;
     std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) const override;
+    std::unique_ptr<SeekableReadBuffer> readFile(const SizeAndChecksum & size_and_checksum) const override;
     size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
+    size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
     void writeFile(const BackupFileInfo & info, BackupEntryPtr entry) override;
     void finalizeWriting() override;
     bool supportsWritingInMultipleThreads() const override { return !use_archive; }
@@ -111,10 +114,6 @@ private:
 
     std::unique_ptr<SeekableReadBuffer> readFileImpl(const SizeAndChecksum & size_and_checksum, bool read_encrypted) const;
 
-    bool fileExists(const SizeAndChecksum & size_and_checksum) const override;
-    std::unique_ptr<SeekableReadBuffer> readFile(const SizeAndChecksum & size_and_checksum) const override;
-    size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
-
     BackupInfo backup_info;
     const String backup_name_for_logging;
     const bool use_archive;

From 9bcedf376436bb066db639a7ab7325cd71fb3b73 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 17 Nov 2023 10:27:19 +0000
Subject: [PATCH 0697/1097] Cleanup

---
 src/Backups/BackupIO_Default.cpp   |  2 --
 src/Common/ZooKeeper/ZooKeeper.cpp |  1 -
 src/Storages/StorageKeeperMap.cpp  | 40 ++++++++++++++++--------------
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/src/Backups/BackupIO_Default.cpp b/src/Backups/BackupIO_Default.cpp
index 95f2c66b6b9..5ac522695ce 100644
--- a/src/Backups/BackupIO_Default.cpp
+++ b/src/Backups/BackupIO_Default.cpp
@@ -91,6 +91,4 @@ void BackupWriterDefault::copyFileFromDisk(const String & path_in_backup, DiskPt
 
     copyDataToFile(path_in_backup, create_read_buffer, start_pos, length);
 }
-
-
 }
diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 8a97362aa96..436a4e14f14 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -385,7 +385,6 @@ void ZooKeeper::createAncestors(const std::string & path)
     size_t last_pos = path.rfind('/');
     if (last_pos == std::string::npos || last_pos == 0)
         return;
-
     std::string current_node = path.substr(0, last_pos);
 
     while (true)
diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp
index 237b65c6a72..e3c960529de 100644
--- a/src/Storages/StorageKeeperMap.cpp
+++ b/src/Storages/StorageKeeperMap.cpp
@@ -763,28 +763,29 @@ void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collec
     auto post_collecting_task = [my_table_id = std::move(table_id), coordination, &backup_entries_collector, my_data_path_in_backup = data_path_in_backup, this]
     {
         auto path_with_data = coordination->getKeeperMapDataPath(zk_root_path);
-        if (path_with_data == my_data_path_in_backup)
+        if (path_with_data != my_data_path_in_backup)
         {
-            auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0);
-            auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size;
-
-            auto with_retries = std::make_shared<WithRetries>
-            (
-                &Poco::Logger::get(fmt::format("StorageKeeperMapBackup ({})", getStorageID().getNameForLogs())),
-                [&] { return getClient(); },
-                WithRetries::KeeperSettings::fromContext(backup_entries_collector.getContext()),
-                [](WithRetries::FaultyKeeper &) {}
-            );
-
-            backup_entries_collector.addBackupEntries(
-                std::make_shared<KeeperMapBackup>(this->zk_data_path, path_with_data, temp_disk, max_compress_block_size, std::move(with_retries))
-                    ->getBackupEntries());
+            std::string source_path = fs::path(my_data_path_in_backup) / backup_data_filename;
+            std::string target_path = fs::path(path_with_data) / backup_data_filename;
+            backup_entries_collector.addBackupEntries({{source_path, std::make_shared<BackupEntryReference>(std::move(target_path))}});
             return;
         }
 
-        std::string source_path = fs::path(my_data_path_in_backup) / backup_data_filename;
-        std::string target_path = fs::path(path_with_data) / backup_data_filename;
-        backup_entries_collector.addBackupEntries({{source_path, std::make_shared<BackupEntryReference>(std::move(target_path))}});
+        auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0);
+        auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size;
+
+        auto with_retries = std::make_shared<WithRetries>
+        (
+            &Poco::Logger::get(fmt::format("StorageKeeperMapBackup ({})", getStorageID().getNameForLogs())),
+            [&] { return getClient(); },
+            WithRetries::KeeperSettings::fromContext(backup_entries_collector.getContext()),
+            [](WithRetries::FaultyKeeper &) {}
+        );
+
+        backup_entries_collector.addBackupEntries(
+            std::make_shared<KeeperMapBackup>(
+                this->zk_data_path, path_with_data, temp_disk, max_compress_block_size, std::move(with_retries))
+                ->getBackupEntries());
     };
 
     backup_entries_collector.addPostTask(post_collecting_task);
@@ -796,7 +797,8 @@ void StorageKeeperMap::restoreDataFromBackup(RestorerFromBackup & restorer, cons
     if (!backup->hasFiles(data_path_in_backup))
         return;
 
-    auto table_id = toString(getStorageID().uuid); if (!restorer.getRestoreCoordination()->acquireInsertingDataForKeeperMap(zk_root_path, table_id))
+    auto table_id = toString(getStorageID().uuid);
+    if (!restorer.getRestoreCoordination()->acquireInsertingDataForKeeperMap(zk_root_path, table_id))
     {
         /// Other table is already restoring the data for this Keeper path.
         /// Tables defined on the same path share data

From 4f441ec1319ac3f80a70f773f98ea99a80dfc407 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 11:32:27 +0100
Subject: [PATCH 0698/1097] Own CMake for Abseil

---
 contrib/abseil-cpp-cmake/CMakeLists.txt | 3451 ++++++++++++++++++++++-
 contrib/re2-cmake/CMakeLists.txt        |   13 +-
 docker/packager/binary/build.sh         |    2 +-
 3 files changed, 3436 insertions(+), 30 deletions(-)

diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt
index 2901daf32db..e84b4d46c4a 100644
--- a/contrib/abseil-cpp-cmake/CMakeLists.txt
+++ b/contrib/abseil-cpp-cmake/CMakeLists.txt
@@ -1,33 +1,3428 @@
 set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
+set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
+
+#
+# Copyright 2017 The Abseil Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+function(absl_cc_library)
+  cmake_parse_arguments(ABSL_CC_LIB
+    "DISABLE_INSTALL;PUBLIC;TESTONLY"
+    "NAME"
+    "HDRS;SRCS;COPTS;DEFINES;LINKOPTS;DEPS"
+    ${ARGN}
+  )
+
+  set(_NAME "absl_${ABSL_CC_LIB_NAME}")
+
+  # Check if this is a header-only library
+  set(ABSL_CC_SRCS "${ABSL_CC_LIB_SRCS}")
+  foreach(src_file IN LISTS ABSL_CC_SRCS)
+    if(${src_file} MATCHES ".*\\.(h|inc)")
+      list(REMOVE_ITEM ABSL_CC_SRCS "${src_file}")
+    endif()
+  endforeach()
+
+  if(ABSL_CC_SRCS STREQUAL "")
+    set(ABSL_CC_LIB_IS_INTERFACE 1)
+  else()
+    set(ABSL_CC_LIB_IS_INTERFACE 0)
+  endif()
+
+  if(NOT ABSL_CC_LIB_IS_INTERFACE)
+      add_library(${_NAME} "")
+      target_sources(${_NAME} PRIVATE ${ABSL_CC_LIB_SRCS} ${ABSL_CC_LIB_HDRS})
+      target_link_libraries(${_NAME}
+      PUBLIC ${ABSL_CC_LIB_DEPS}
+      PRIVATE
+        ${ABSL_CC_LIB_LINKOPTS}
+        ${ABSL_DEFAULT_LINKOPTS}
+      )
+
+    target_include_directories(${_NAME}
+      PUBLIC "${ABSL_COMMON_INCLUDE_DIRS}")
+    target_compile_options(${_NAME}
+      PRIVATE ${ABSL_CC_LIB_COPTS})
+    target_compile_definitions(${_NAME} PUBLIC ${ABSL_CC_LIB_DEFINES})
+
+  else()
+    # Generating header-only library
+    add_library(${_NAME} INTERFACE)
+    target_include_directories(${_NAME}
+      INTERFACE "${ABSL_COMMON_INCLUDE_DIRS}")
+
+    target_link_libraries(${_NAME}
+      INTERFACE
+        ${ABSL_CC_LIB_DEPS}
+        ${ABSL_CC_LIB_LINKOPTS}
+        ${ABSL_DEFAULT_LINKOPTS}
+    )
+    target_compile_definitions(${_NAME} INTERFACE ${ABSL_CC_LIB_DEFINES})
+
+  endif()
+
+  add_library(absl::${ABSL_CC_LIB_NAME} ALIAS ${_NAME})
+endfunction()
+
+
+set(DIR ${ABSL_ROOT_DIR}/absl/algorithm)
+
+absl_cc_library(
+  NAME
+    algorithm
+  HDRS
+    "${DIR}/algorithm.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    algorithm_container
+  HDRS
+    "${DIR}/container.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::algorithm
+    absl::core_headers
+    absl::meta
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/base)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    atomic_hook
+  HDRS
+    "${DIR}/internal/atomic_hook.h"
+  DEPS
+    absl::config
+    absl::core_headers
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    errno_saver
+  HDRS
+    "${DIR}/internal/errno_saver.h"
+  DEPS
+    absl::config
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+)
+
+absl_cc_library(
+  NAME
+    log_severity
+  HDRS
+    "${DIR}/log_severity.h"
+  SRCS
+    "${DIR}/log_severity.cc"
+  DEPS
+    absl::config
+    absl::core_headers
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+)
+
+absl_cc_library(
+  NAME
+    nullability
+  HDRS
+    "${DIR}/nullability.h"
+  SRCS
+    "${DIR}/internal/nullability_impl.h"
+  DEPS
+    absl::core_headers
+    absl::type_traits
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    raw_logging_internal
+  HDRS
+    "${DIR}/internal/raw_logging.h"
+  SRCS
+    "${DIR}/internal/raw_logging.cc"
+  DEPS
+    absl::atomic_hook
+    absl::config
+    absl::core_headers
+    absl::errno_saver
+    absl::log_severity
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    spinlock_wait
+  HDRS
+    "${DIR}/internal/spinlock_wait.h"
+  SRCS
+    "${DIR}/internal/spinlock_akaros.inc"
+    "${DIR}/internal/spinlock_linux.inc"
+    "${DIR}/internal/spinlock_posix.inc"
+    "${DIR}/internal/spinlock_wait.cc"
+    "${DIR}/internal/spinlock_win32.inc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base_internal
+    absl::core_headers
+    absl::errno_saver
+)
+
+absl_cc_library(
+  NAME
+    config
+  HDRS
+    "${DIR}/config.h"
+    "${DIR}/options.h"
+    "${DIR}/policy_checks.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    dynamic_annotations
+  HDRS
+    "${DIR}/dynamic_annotations.h"
+  SRCS
+    "${DIR}/internal/dynamic_annotations.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    core_headers
+  HDRS
+    "${DIR}/attributes.h"
+    "${DIR}/const_init.h"
+    "${DIR}/macros.h"
+    "${DIR}/optimization.h"
+    "${DIR}/port.h"
+    "${DIR}/thread_annotations.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    malloc_internal
+  HDRS
+    "${DIR}/internal/direct_mmap.h"
+    "${DIR}/internal/low_level_alloc.h"
+  SRCS
+    "${DIR}/internal/low_level_alloc.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::base_internal
+    absl::config
+    absl::core_headers
+    absl::dynamic_annotations
+    absl::raw_logging_internal
+    Threads::Threads
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    base_internal
+  HDRS
+    "${DIR}/internal/hide_ptr.h"
+    "${DIR}/internal/identity.h"
+    "${DIR}/internal/inline_variable.h"
+    "${DIR}/internal/invoke.h"
+    "${DIR}/internal/scheduling_mode.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::type_traits
+)
+
+absl_cc_library(
+  NAME
+    base
+  HDRS
+    "${DIR}/call_once.h"
+    "${DIR}/casts.h"
+    "${DIR}/internal/cycleclock.h"
+    "${DIR}/internal/cycleclock_config.h"
+    "${DIR}/internal/low_level_scheduling.h"
+    "${DIR}/internal/per_thread_tls.h"
+    "${DIR}/internal/spinlock.h"
+    "${DIR}/internal/sysinfo.h"
+    "${DIR}/internal/thread_identity.h"
+    "${DIR}/internal/tsan_mutex_interface.h"
+    "${DIR}/internal/unscaledcycleclock.h"
+    "${DIR}/internal/unscaledcycleclock_config.h"
+  SRCS
+    "${DIR}/internal/cycleclock.cc"
+    "${DIR}/internal/spinlock.cc"
+    "${DIR}/internal/sysinfo.cc"
+    "${DIR}/internal/thread_identity.cc"
+    "${DIR}/internal/unscaledcycleclock.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::atomic_hook
+    absl::base_internal
+    absl::config
+    absl::core_headers
+    absl::dynamic_annotations
+    absl::log_severity
+    absl::raw_logging_internal
+    absl::spinlock_wait
+    absl::type_traits
+    Threads::Threads
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    throw_delegate
+  HDRS
+    "${DIR}/internal/throw_delegate.h"
+  SRCS
+    "${DIR}/internal/throw_delegate.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::raw_logging_internal
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    pretty_function
+  HDRS
+    "${DIR}/internal/pretty_function.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    endian
+  HDRS
+    "${DIR}/internal/endian.h"
+    "${DIR}/internal/unaligned_access.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::core_headers
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    scoped_set_env
+  SRCS
+    "${DIR}/internal/scoped_set_env.cc"
+  HDRS
+    "${DIR}/internal/scoped_set_env.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::raw_logging_internal
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    strerror
+  SRCS
+    "${DIR}/internal/strerror.cc"
+  HDRS
+    "${DIR}/internal/strerror.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::errno_saver
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    fast_type_id
+  HDRS
+    "${DIR}/internal/fast_type_id.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+)
+
+absl_cc_library(
+  NAME
+    prefetch
+  HDRS
+    "${DIR}/prefetch.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/cleanup)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    cleanup_internal
+  HDRS
+    "${DIR}/internal/cleanup.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base_internal
+    absl::core_headers
+    absl::utility
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    cleanup
+  HDRS
+    "${DIR}/cleanup.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::cleanup_internal
+    absl::config
+    absl::core_headers
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/container)
+
+absl_cc_library(
+  NAME
+    btree
+  HDRS
+    "${DIR}/btree_map.h"
+    "${DIR}/btree_set.h"
+    "${DIR}/internal/btree.h"
+    "${DIR}/internal/btree_container.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::container_common
+    absl::common_policy_traits
+    absl::compare
+    absl::compressed_tuple
+    absl::container_memory
+    absl::cord
+    absl::core_headers
+    absl::layout
+    absl::memory
+    absl::raw_logging_internal
+    absl::strings
+    absl::throw_delegate
+    absl::type_traits
+    absl::utility
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    compressed_tuple
+  HDRS
+    "${DIR}/internal/compressed_tuple.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::utility
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    fixed_array
+  HDRS
+   "${DIR}/fixed_array.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::compressed_tuple
+    absl::algorithm
+    absl::config
+    absl::core_headers
+    absl::dynamic_annotations
+    absl::throw_delegate
+    absl::memory
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    inlined_vector_internal
+  HDRS
+   "${DIR}/internal/inlined_vector.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::compressed_tuple
+    absl::core_headers
+    absl::memory
+    absl::span
+    absl::type_traits
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    inlined_vector
+  HDRS
+   "${DIR}/inlined_vector.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::algorithm
+    absl::core_headers
+    absl::inlined_vector_internal
+    absl::throw_delegate
+    absl::memory
+    absl::type_traits
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    counting_allocator
+  HDRS
+    "${DIR}/internal/counting_allocator.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+)
+
+absl_cc_library(
+  NAME
+    flat_hash_map
+  HDRS
+    "${DIR}/flat_hash_map.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::container_memory
+    absl::core_headers
+    absl::hash_function_defaults
+    absl::raw_hash_map
+    absl::algorithm_container
+    absl::memory
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    flat_hash_set
+  HDRS
+    "${DIR}/flat_hash_set.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::container_memory
+    absl::hash_function_defaults
+    absl::raw_hash_set
+    absl::algorithm_container
+    absl::core_headers
+    absl::memory
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    node_hash_map
+  HDRS
+    "${DIR}/node_hash_map.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::container_memory
+    absl::core_headers
+    absl::hash_function_defaults
+    absl::node_slot_policy
+    absl::raw_hash_map
+    absl::algorithm_container
+    absl::memory
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    node_hash_set
+  HDRS
+    "${DIR}/node_hash_set.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::core_headers
+    absl::hash_function_defaults
+    absl::node_slot_policy
+    absl::raw_hash_set
+    absl::algorithm_container
+    absl::memory
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    container_memory
+  HDRS
+    "${DIR}/internal/container_memory.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::memory
+    absl::type_traits
+    absl::utility
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    hash_function_defaults
+  HDRS
+    "${DIR}/internal/hash_function_defaults.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::cord
+    absl::hash
+    absl::strings
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    hash_policy_traits
+  HDRS
+    "${DIR}/internal/hash_policy_traits.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::common_policy_traits
+    absl::meta
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    common_policy_traits
+  HDRS
+    "${DIR}/internal/common_policy_traits.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::meta
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    hashtablez_sampler
+  HDRS
+    "${DIR}/internal/hashtablez_sampler.h"
+  SRCS
+    "${DIR}/internal/hashtablez_sampler.cc"
+    "${DIR}/internal/hashtablez_sampler_force_weak_definition.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::exponential_biased
+    absl::raw_logging_internal
+    absl::sample_recorder
+    absl::synchronization
+    absl::time
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    hashtable_debug
+  HDRS
+    "${DIR}/internal/hashtable_debug.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::hashtable_debug_hooks
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    hashtable_debug_hooks
+  HDRS
+    "${DIR}/internal/hashtable_debug_hooks.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    node_slot_policy
+  HDRS
+    "${DIR}/internal/node_slot_policy.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    raw_hash_map
+  HDRS
+    "${DIR}/internal/raw_hash_map.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::container_memory
+    absl::raw_hash_set
+    absl::throw_delegate
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    container_common
+  HDRS
+    "${DIR}/internal/common.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::type_traits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    raw_hash_set
+  HDRS
+    "${DIR}/internal/raw_hash_set.h"
+  SRCS
+    "${DIR}/internal/raw_hash_set.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::bits
+    absl::compressed_tuple
+    absl::config
+    absl::container_common
+    absl::container_memory
+    absl::core_headers
+    absl::dynamic_annotations
+    absl::endian
+    absl::hash
+    absl::hash_policy_traits
+    absl::hashtable_debug_hooks
+    absl::hashtablez_sampler
+    absl::memory
+    absl::meta
+    absl::optional
+    absl::prefetch
+    absl::raw_logging_internal
+    absl::utility
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    layout
+  HDRS
+    "${DIR}/internal/layout.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::meta
+    absl::strings
+    absl::span
+    absl::utility
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/crc)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    crc_cpu_detect
+  HDRS
+    "${DIR}/internal/cpu_detect.h"
+  SRCS
+    "${DIR}/internal/cpu_detect.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::config
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    crc_internal
+  HDRS
+    "${DIR}/internal/crc.h"
+    "${DIR}/internal/crc32_x86_arm_combined_simd.h"
+  SRCS
+    "${DIR}/internal/crc.cc"
+    "${DIR}/internal/crc_internal.h"
+    "${DIR}/internal/crc_x86_arm_combined.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::crc_cpu_detect
+    absl::config
+    absl::core_headers
+    absl::endian
+    absl::prefetch
+    absl::raw_logging_internal
+    absl::memory
+    absl::bits
+)
+
+absl_cc_library(
+  NAME
+    crc32c
+  HDRS
+    "${DIR}/crc32c.h"
+    "${DIR}/internal/crc32c.h"
+    "${DIR}/internal/crc_memcpy.h"
+  SRCS
+    "${DIR}/crc32c.cc"
+    "${DIR}/internal/crc32c_inline.h"
+    "${DIR}/internal/crc_memcpy_fallback.cc"
+    "${DIR}/internal/crc_memcpy_x86_arm_combined.cc"
+    "${DIR}/internal/crc_non_temporal_memcpy.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::crc_cpu_detect
+    absl::crc_internal
+    absl::non_temporal_memcpy
+    absl::config
+    absl::core_headers
+    absl::endian
+    absl::prefetch
+    absl::str_format
+    absl::strings
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    non_temporal_arm_intrinsics
+  HDRS
+    "${DIR}/internal/non_temporal_arm_intrinsics.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    non_temporal_memcpy
+  HDRS
+    "${DIR}/internal/non_temporal_memcpy.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::non_temporal_arm_intrinsics
+    absl::config
+    absl::core_headers
+)
+
+absl_cc_library(
+  NAME
+    crc_cord_state
+  HDRS
+    "${DIR}/internal/crc_cord_state.h"
+  SRCS
+    "${DIR}/internal/crc_cord_state.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::crc32c
+    absl::config
+    absl::strings
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/debugging)
+
+absl_cc_library(
+  NAME
+    stacktrace
+  HDRS
+    "${DIR}/stacktrace.h"
+    "${DIR}/internal/stacktrace_aarch64-inl.inc"
+    "${DIR}/internal/stacktrace_arm-inl.inc"
+    "${DIR}/internal/stacktrace_config.h"
+    "${DIR}/internal/stacktrace_emscripten-inl.inc"
+    "${DIR}/internal/stacktrace_generic-inl.inc"
+    "${DIR}/internal/stacktrace_powerpc-inl.inc"
+    "${DIR}/internal/stacktrace_riscv-inl.inc"
+    "${DIR}/internal/stacktrace_unimplemented-inl.inc"
+    "${DIR}/internal/stacktrace_win32-inl.inc"
+    "${DIR}/internal/stacktrace_x86-inl.inc"
+  SRCS
+    "${DIR}/stacktrace.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::debugging_internal
+    absl::config
+    absl::core_headers
+    absl::dynamic_annotations
+    absl::raw_logging_internal
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    symbolize
+  HDRS
+    "${DIR}/symbolize.h"
+    "${DIR}/internal/symbolize.h"
+  SRCS
+    "${DIR}/symbolize.cc"
+    "${DIR}/symbolize_darwin.inc"
+    "${DIR}/symbolize_elf.inc"
+    "${DIR}/symbolize_emscripten.inc"
+    "${DIR}/symbolize_unimplemented.inc"
+    "${DIR}/symbolize_win32.inc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::debugging_internal
+    absl::demangle_internal
+    absl::base
+    absl::config
+    absl::core_headers
+    absl::dynamic_annotations
+    absl::malloc_internal
+    absl::raw_logging_internal
+    absl::strings
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    examine_stack
+  HDRS
+    "${DIR}/internal/examine_stack.h"
+  SRCS
+    "${DIR}/internal/examine_stack.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::stacktrace
+    absl::symbolize
+    absl::config
+    absl::core_headers
+    absl::raw_logging_internal
+)
+
+absl_cc_library(
+  NAME
+    failure_signal_handler
+  HDRS
+    "${DIR}/failure_signal_handler.h"
+  SRCS
+    "${DIR}/failure_signal_handler.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::examine_stack
+    absl::stacktrace
+    absl::base
+    absl::config
+    absl::core_headers
+    absl::raw_logging_internal
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    debugging_internal
+  HDRS
+    "${DIR}/internal/address_is_readable.h"
+    "${DIR}/internal/elf_mem_image.h"
+    "${DIR}/internal/vdso_support.h"
+  SRCS
+    "${DIR}/internal/address_is_readable.cc"
+    "${DIR}/internal/elf_mem_image.cc"
+    "${DIR}/internal/vdso_support.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::core_headers
+    absl::config
+    absl::dynamic_annotations
+    absl::errno_saver
+    absl::raw_logging_internal
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    demangle_internal
+  HDRS
+    "${DIR}/internal/demangle.h"
+  SRCS
+    "${DIR}/internal/demangle.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::core_headers
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    leak_check
+  HDRS
+    "${DIR}/leak_check.h"
+  SRCS
+    "${DIR}/leak_check.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+  PUBLIC
+)
+
+# component target
+absl_cc_library(
+  NAME
+    debugging
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::stacktrace
+    absl::leak_check
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/flags)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    flags_path_util
+  HDRS
+    "${DIR}/internal/path_util.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::strings
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    flags_program_name
+  SRCS
+    "${DIR}/internal/program_name.cc"
+  HDRS
+    "${DIR}/internal/program_name.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::flags_path_util
+    absl::strings
+    absl::synchronization
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    flags_config
+  SRCS
+    "${DIR}/usage_config.cc"
+  HDRS
+    "${DIR}/config.h"
+    "${DIR}/usage_config.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::flags_path_util
+    absl::flags_program_name
+    absl::core_headers
+    absl::strings
+    absl::synchronization
+)
+
+absl_cc_library(
+  NAME
+    flags_marshalling
+  SRCS
+    "${DIR}/marshalling.cc"
+  HDRS
+    "${DIR}/marshalling.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::log_severity
+    absl::int128
+    absl::optional
+    absl::strings
+    absl::str_format
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    flags_commandlineflag_internal
+  SRCS
+    "${DIR}/internal/commandlineflag.cc"
+  HDRS
+    "${DIR}/internal/commandlineflag.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::dynamic_annotations
+    absl::fast_type_id
+)
+
+absl_cc_library(
+  NAME
+    flags_commandlineflag
+  SRCS
+    "${DIR}/commandlineflag.cc"
+  HDRS
+    "${DIR}/commandlineflag.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::fast_type_id
+    absl::flags_commandlineflag_internal
+    absl::optional
+    absl::strings
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    flags_private_handle_accessor
+  SRCS
+    "${DIR}/internal/private_handle_accessor.cc"
+  HDRS
+    "${DIR}/internal/private_handle_accessor.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::flags_commandlineflag
+    absl::flags_commandlineflag_internal
+    absl::strings
+)
+
+absl_cc_library(
+  NAME
+    flags_reflection
+  SRCS
+    "${DIR}/reflection.cc"
+  HDRS
+    "${DIR}/reflection.h"
+    "${DIR}/internal/registry.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::flags_commandlineflag
+    absl::flags_private_handle_accessor
+    absl::flags_config
+    absl::strings
+    absl::synchronization
+    absl::flat_hash_map
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    flags_internal
+  SRCS
+    "${DIR}/internal/flag.cc"
+  HDRS
+    "${DIR}/internal/flag.h"
+    "${DIR}/internal/sequence_lock.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::flags_commandlineflag
+    absl::flags_commandlineflag_internal
+    absl::flags_config
+    absl::flags_marshalling
+    absl::synchronization
+    absl::meta
+    absl::utility
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    flags
+  SRCS
+    "${DIR}/flag.cc"
+  HDRS
+    "${DIR}/declare.h"
+    "${DIR}/flag.h"
+    "${DIR}/internal/flag_msvc.inc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::flags_commandlineflag
+    absl::flags_config
+    absl::flags_internal
+    absl::flags_reflection
+    absl::base
+    absl::core_headers
+    absl::strings
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    flags_usage_internal
+  SRCS
+    "${DIR}/internal/usage.cc"
+  HDRS
+    "${DIR}/internal/usage.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::flags_config
+    absl::flags
+    absl::flags_commandlineflag
+    absl::flags_internal
+    absl::flags_path_util
+    absl::flags_private_handle_accessor
+    absl::flags_program_name
+    absl::flags_reflection
+    absl::strings
+    absl::synchronization
+)
+
+absl_cc_library(
+  NAME
+    flags_usage
+  SRCS
+    "${DIR}/usage.cc"
+  HDRS
+    "${DIR}/usage.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::flags_usage_internal
+    absl::raw_logging_internal
+    absl::strings
+    absl::synchronization
+)
+
+absl_cc_library(
+  NAME
+    flags_parse
+  SRCS
+    "${DIR}/parse.cc"
+  HDRS
+    "${DIR}/internal/parse.h"
+    "${DIR}/parse.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::algorithm_container
+    absl::config
+    absl::core_headers
+    absl::flags_config
+    absl::flags
+    absl::flags_commandlineflag
+    absl::flags_commandlineflag_internal
+    absl::flags_internal
+    absl::flags_private_handle_accessor
+    absl::flags_program_name
+    absl::flags_reflection
+    absl::flags_usage
+    absl::strings
+    absl::synchronization
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/functional)
+
+absl_cc_library(
+  NAME
+    any_invocable
+  SRCS
+    "${DIR}/internal/any_invocable.h"
+  HDRS
+    "${DIR}/any_invocable.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base_internal
+    absl::config
+    absl::core_headers
+    absl::type_traits
+    absl::utility
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    bind_front
+  SRCS
+    "${DIR}/internal/front_binder.h"
+  HDRS
+    "${DIR}/bind_front.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base_internal
+    absl::compressed_tuple
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    function_ref
+  SRCS
+    "${DIR}/internal/function_ref.h"
+  HDRS
+    "${DIR}/function_ref.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base_internal
+    absl::core_headers
+    absl::any_invocable
+    absl::meta
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/hash)
+
+absl_cc_library(
+  NAME
+    hash
+  HDRS
+    "${DIR}/hash.h"
+  SRCS
+    "${DIR}/internal/hash.cc"
+    "${DIR}/internal/hash.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::bits
+    absl::city
+    absl::config
+    absl::core_headers
+    absl::endian
+    absl::fixed_array
+    absl::function_ref
+    absl::meta
+    absl::int128
+    absl::strings
+    absl::optional
+    absl::variant
+    absl::utility
+    absl::low_level_hash
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    city
+  HDRS
+    "${DIR}/internal/city.h"
+  SRCS
+    "${DIR}/internal/city.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::endian
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    low_level_hash
+  HDRS
+    "${DIR}/internal/low_level_hash.h"
+  SRCS
+    "${DIR}/internal/low_level_hash.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::endian
+    absl::int128
+    absl::prefetch
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/log)
+
+# Internal targets
+absl_cc_library(
+  NAME
+    log_internal_check_impl
+  SRCS
+  HDRS
+    "${DIR}/internal/check_impl.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::core_headers
+    absl::log_internal_check_op
+    absl::log_internal_conditions
+    absl::log_internal_message
+    absl::log_internal_strip
+)
+
+absl_cc_library(
+  NAME
+    log_internal_check_op
+  SRCS
+    "${DIR}/internal/check_op.cc"
+  HDRS
+    "${DIR}/internal/check_op.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::log_internal_nullguard
+    absl::log_internal_nullstream
+    absl::log_internal_strip
+    absl::strings
+)
+
+absl_cc_library(
+  NAME
+    log_internal_conditions
+  SRCS
+    "${DIR}/internal/conditions.cc"
+  HDRS
+    "${DIR}/internal/conditions.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::core_headers
+    absl::log_internal_voidify
+)
+
+absl_cc_library(
+  NAME
+    log_internal_config
+  SRCS
+  HDRS
+    "${DIR}/internal/config.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+)
+
+absl_cc_library(
+  NAME
+    log_internal_flags
+  SRCS
+  HDRS
+    "${DIR}/internal/flags.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::flags
+)
+
+absl_cc_library(
+  NAME
+    log_internal_format
+  SRCS
+    "${DIR}/internal/log_format.cc"
+  HDRS
+    "${DIR}/internal/log_format.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::log_internal_append_truncated
+    absl::log_internal_config
+    absl::log_internal_globals
+    absl::log_severity
+    absl::strings
+    absl::str_format
+    absl::time
+    absl::span
+)
+
+absl_cc_library(
+  NAME
+    log_internal_globals
+  SRCS
+    "${DIR}/internal/globals.cc"
+  HDRS
+    "${DIR}/internal/globals.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::log_severity
+    absl::raw_logging_internal
+    absl::strings
+    absl::time
+)
+
+absl_cc_library(
+  NAME
+    log_internal_log_impl
+  SRCS
+  HDRS
+    "${DIR}/internal/log_impl.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::log_internal_conditions
+    absl::log_internal_message
+    absl::log_internal_strip
+)
+
+absl_cc_library(
+  NAME
+    log_internal_proto
+  SRCS
+    "${DIR}/internal/proto.cc"
+  HDRS
+    "${DIR}/internal/proto.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::core_headers
+    absl::strings
+    absl::span
+)
+
+absl_cc_library(
+  NAME
+    log_internal_message
+  SRCS
+    "${DIR}/internal/log_message.cc"
+  HDRS
+    "${DIR}/internal/log_message.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::core_headers
+    absl::errno_saver
+    absl::inlined_vector
+    absl::examine_stack
+    absl::log_internal_append_truncated
+    absl::log_internal_format
+    absl::log_internal_globals
+    absl::log_internal_proto
+    absl::log_internal_log_sink_set
+    absl::log_internal_nullguard
+    absl::log_globals
+    absl::log_entry
+    absl::log_severity
+    absl::log_sink
+    absl::log_sink_registry
+    absl::memory
+    absl::raw_logging_internal
+    absl::strings
+    absl::strerror
+    absl::time
+    absl::span
+)
+
+absl_cc_library(
+  NAME
+    log_internal_log_sink_set
+  SRCS
+    "${DIR}/internal/log_sink_set.cc"
+  HDRS
+    "${DIR}/internal/log_sink_set.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+    $<$<BOOL:${ANDROID}>:-llog>
+  DEPS
+    absl::base
+    absl::cleanup
+    absl::config
+    absl::core_headers
+    absl::log_internal_config
+    absl::log_internal_globals
+    absl::log_globals
+    absl::log_entry
+    absl::log_severity
+    absl::log_sink
+    absl::raw_logging_internal
+    absl::synchronization
+    absl::span
+    absl::strings
+)
+
+absl_cc_library(
+  NAME
+    log_internal_nullguard
+  SRCS
+    "${DIR}/internal/nullguard.cc"
+  HDRS
+    "${DIR}/internal/nullguard.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+)
+
+absl_cc_library(
+  NAME
+    log_internal_nullstream
+  SRCS
+  HDRS
+    "${DIR}/internal/nullstream.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::log_severity
+    absl::strings
+)
+
+absl_cc_library(
+  NAME
+    log_internal_strip
+  SRCS
+  HDRS
+    "${DIR}/internal/strip.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::log_internal_message
+    absl::log_internal_nullstream
+    absl::log_severity
+)
+
+absl_cc_library(
+  NAME
+    log_internal_voidify
+  SRCS
+  HDRS
+    "${DIR}/internal/voidify.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+)
+
+absl_cc_library(
+  NAME
+    log_internal_append_truncated
+  SRCS
+  HDRS
+    "${DIR}/internal/append_truncated.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::strings
+    absl::span
+)
+
+# Public targets
+absl_cc_library(
+  NAME
+    absl_check
+  SRCS
+  HDRS
+    "${DIR}/absl_check.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::log_internal_check_impl
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    absl_log
+  SRCS
+  HDRS
+    "${DIR}/absl_log.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::log_internal_log_impl
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    check
+  SRCS
+  HDRS
+    "${DIR}/check.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::log_internal_check_impl
+    absl::core_headers
+    absl::log_internal_check_op
+    absl::log_internal_conditions
+    absl::log_internal_message
+    absl::log_internal_strip
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    die_if_null
+  SRCS
+    "${DIR}/die_if_null.cc"
+  HDRS
+    "${DIR}/die_if_null.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::log
+    absl::strings
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    log_flags
+  SRCS
+    "${DIR}/flags.cc"
+  HDRS
+    "${DIR}/flags.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::log_globals
+    absl::log_severity
+    absl::log_internal_config
+    absl::log_internal_flags
+    absl::flags
+    absl::flags_marshalling
+    absl::strings
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    log_globals
+  SRCS
+    "${DIR}/globals.cc"
+  HDRS
+    "${DIR}/globals.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::atomic_hook
+    absl::config
+    absl::core_headers
+    absl::hash
+    absl::log_severity
+    absl::raw_logging_internal
+    absl::strings
+)
+
+absl_cc_library(
+  NAME
+    log_initialize
+  SRCS
+    "${DIR}/initialize.cc"
+  HDRS
+    "${DIR}/initialize.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::log_globals
+    absl::log_internal_globals
+    absl::time
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    log
+  SRCS
+  HDRS
+    "${DIR}/log.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::log_internal_log_impl
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    log_entry
+  SRCS
+    "${DIR}/log_entry.cc"
+  HDRS
+    "${DIR}/log_entry.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::log_internal_config
+    absl::log_severity
+    absl::span
+    absl::strings
+    absl::time
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    log_sink
+  SRCS
+    "${DIR}/log_sink.cc"
+  HDRS
+    "${DIR}/log_sink.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::log_entry
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    log_sink_registry
+  SRCS
+  HDRS
+    "${DIR}/log_sink_registry.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::log_sink
+    absl::log_internal_log_sink_set
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    log_streamer
+  SRCS
+  HDRS
+    "${DIR}/log_streamer.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::absl_log
+    absl::log_severity
+    absl::optional
+    absl::strings
+    absl::strings_internal
+    absl::utility
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    log_internal_structured
+  HDRS
+    "${DIR}/internal/structured.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::log_internal_message
+    absl::strings
+)
+
+absl_cc_library(
+  NAME
+    log_structured
+  HDRS
+    "${DIR}/structured.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::log_internal_structured
+    absl::strings
+  PUBLIC
+)
+
+absl_cc_library(
+    NAME
+      log_internal_fnmatch
+    SRCS
+      "${DIR}/internal/fnmatch.cc"
+    HDRS
+      "${DIR}/internal/fnmatch.h"
+    COPTS
+      ${ABSL_DEFAULT_COPTS}
+    LINKOPTS
+      ${ABSL_DEFAULT_LINKOPTS}
+    DEPS
+      absl::config
+      absl::strings
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/memory)
+
+absl_cc_library(
+  NAME
+    memory
+  HDRS
+    "${DIR}/memory.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::core_headers
+    absl::meta
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/meta)
+
+absl_cc_library(
+  NAME
+    type_traits
+  HDRS
+    "${DIR}/type_traits.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+  PUBLIC
+)
+
+# component target
+absl_cc_library(
+  NAME
+    meta
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::type_traits
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/numeric)
+
+absl_cc_library(
+  NAME
+    bits
+  HDRS
+    "${DIR}/bits.h"
+    "${DIR}/internal/bits.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::core_headers
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    int128
+  HDRS
+    "${DIR}/int128.h"
+  SRCS
+    "${DIR}/int128.cc"
+    "${DIR}/int128_have_intrinsic.inc"
+    "${DIR}/int128_no_intrinsic.inc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::bits
+  PUBLIC
+)
+
+# component target
+absl_cc_library(
+  NAME
+    numeric
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::int128
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    numeric_representation
+  HDRS
+    "${DIR}/internal/representation.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    sample_recorder
+  HDRS
+    "${DIR}/internal/sample_recorder.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::synchronization
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/profiling)
+
+absl_cc_library(
+  NAME
+    exponential_biased
+  SRCS
+    "${DIR}/internal/exponential_biased.cc"
+  HDRS
+    "${DIR}/internal/exponential_biased.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+)
+
+absl_cc_library(
+  NAME
+    periodic_sampler
+  SRCS
+    "${DIR}/internal/periodic_sampler.cc"
+  HDRS
+    "${DIR}/internal/periodic_sampler.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::core_headers
+    absl::exponential_biased
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/random)
+
+absl_cc_library(
+  NAME
+    random_random
+  HDRS
+    "${DIR}/random.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::random_distributions
+    absl::random_internal_nonsecure_base
+    absl::random_internal_pcg_engine
+    absl::random_internal_pool_urbg
+    absl::random_internal_randen_engine
+    absl::random_seed_sequences
+)
+
+absl_cc_library(
+  NAME
+    random_bit_gen_ref
+  HDRS
+    "${DIR}/bit_gen_ref.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::core_headers
+    absl::random_internal_distribution_caller
+    absl::random_internal_fast_uniform_bits
+    absl::type_traits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_mock_helpers
+  HDRS
+    "${DIR}/internal/mock_helpers.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::fast_type_id
+    absl::optional
+)
+
+absl_cc_library(
+  NAME
+    random_distributions
+  SRCS
+    "${DIR}/discrete_distribution.cc"
+    "${DIR}/gaussian_distribution.cc"
+  HDRS
+    "${DIR}/bernoulli_distribution.h"
+    "${DIR}/beta_distribution.h"
+    "${DIR}/discrete_distribution.h"
+    "${DIR}/distributions.h"
+    "${DIR}/exponential_distribution.h"
+    "${DIR}/gaussian_distribution.h"
+    "${DIR}/log_uniform_int_distribution.h"
+    "${DIR}/poisson_distribution.h"
+    "${DIR}/uniform_int_distribution.h"
+    "${DIR}/uniform_real_distribution.h"
+    "${DIR}/zipf_distribution.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::base_internal
+    absl::config
+    absl::core_headers
+    absl::random_internal_generate_real
+    absl::random_internal_distribution_caller
+    absl::random_internal_fast_uniform_bits
+    absl::random_internal_fastmath
+    absl::random_internal_iostream_state_saver
+    absl::random_internal_traits
+    absl::random_internal_uniform_helper
+    absl::random_internal_wide_multiply
+    absl::strings
+    absl::type_traits
+)
+
+absl_cc_library(
+  NAME
+    random_seed_gen_exception
+  SRCS
+    "${DIR}/seed_gen_exception.cc"
+  HDRS
+    "${DIR}/seed_gen_exception.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+)
+
+absl_cc_library(
+  NAME
+    random_seed_sequences
+  SRCS
+    "${DIR}/seed_sequences.cc"
+  HDRS
+    "${DIR}/seed_sequences.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::inlined_vector
+    absl::random_internal_pool_urbg
+    absl::random_internal_salted_seed_seq
+    absl::random_internal_seed_material
+    absl::random_seed_gen_exception
+    absl::span
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_traits
+  HDRS
+    "${DIR}/internal/traits.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_distribution_caller
+  HDRS
+    "${DIR}/internal/distribution_caller.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::utility
+    absl::fast_type_id
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_fast_uniform_bits
+  HDRS
+    "${DIR}/internal/fast_uniform_bits.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_seed_material
+  SRCS
+    "${DIR}/internal/seed_material.cc"
+  HDRS
+    "${DIR}/internal/seed_material.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::core_headers
+    absl::optional
+    absl::random_internal_fast_uniform_bits
+    absl::raw_logging_internal
+    absl::span
+    absl::strings
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_pool_urbg
+  SRCS
+    "${DIR}/internal/pool_urbg.cc"
+  HDRS
+    "${DIR}/internal/pool_urbg.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::core_headers
+    absl::endian
+    absl::random_internal_randen
+    absl::random_internal_seed_material
+    absl::random_internal_traits
+    absl::random_seed_gen_exception
+    absl::raw_logging_internal
+    absl::span
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_salted_seed_seq
+  HDRS
+    "${DIR}/internal/salted_seed_seq.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::inlined_vector
+    absl::optional
+    absl::span
+    absl::random_internal_seed_material
+    absl::type_traits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_iostream_state_saver
+  HDRS
+    "${DIR}/internal/iostream_state_saver.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::int128
+    absl::type_traits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_generate_real
+  HDRS
+    "${DIR}/internal/generate_real.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::bits
+    absl::random_internal_fastmath
+    absl::random_internal_traits
+    absl::type_traits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_wide_multiply
+  HDRS
+    "${DIR}/internal/wide_multiply.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::bits
+    absl::config
+    absl::int128
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_fastmath
+  HDRS
+    "${DIR}/internal/fastmath.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::bits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_nonsecure_base
+  HDRS
+    "${DIR}/internal/nonsecure_base.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::core_headers
+    absl::inlined_vector
+    absl::random_internal_pool_urbg
+    absl::random_internal_salted_seed_seq
+    absl::random_internal_seed_material
+    absl::span
+    absl::type_traits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_pcg_engine
+  HDRS
+    "${DIR}/internal/pcg_engine.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::int128
+    absl::random_internal_fastmath
+    absl::random_internal_iostream_state_saver
+    absl::type_traits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_randen_engine
+  HDRS
+    "${DIR}/internal/randen_engine.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::endian
+    absl::random_internal_iostream_state_saver
+    absl::random_internal_randen
+    absl::raw_logging_internal
+    absl::type_traits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_platform
+  HDRS
+    "${DIR}/internal/randen_traits.h"
+    "${DIR}/internal/platform.h"
+  SRCS
+    "${DIR}/internal/randen_round_keys.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_randen
+  SRCS
+    "${DIR}/internal/randen.cc"
+  HDRS
+    "${DIR}/internal/randen.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::random_internal_platform
+    absl::random_internal_randen_hwaes
+    absl::random_internal_randen_slow
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_randen_slow
+  SRCS
+    "${DIR}/internal/randen_slow.cc"
+  HDRS
+    "${DIR}/internal/randen_slow.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::random_internal_platform
+    absl::config
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_randen_hwaes
+  SRCS
+    "${DIR}/internal/randen_detect.cc"
+  HDRS
+    "${DIR}/internal/randen_detect.h"
+    "${DIR}/internal/randen_hwaes.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+    ${ABSL_RANDOM_RANDEN_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::random_internal_platform
+    absl::random_internal_randen_hwaes_impl
+    absl::config
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_randen_hwaes_impl
+  SRCS
+    "${DIR}/internal/randen_hwaes.cc"
+    "${DIR}/internal/randen_hwaes.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+    ${ABSL_RANDOM_RANDEN_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::random_internal_platform
+    absl::config
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    random_internal_uniform_helper
+  HDRS
+    "${DIR}/internal/uniform_helper.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  LINKOPTS
+    ${ABSL_DEFAULT_LINKOPTS}
+  DEPS
+    absl::config
+    absl::random_internal_traits
+    absl::type_traits
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/status)
+
+absl_cc_library(
+  NAME
+    status
+  HDRS
+    "${DIR}/status.h"
+  SRCS
+    "${DIR}/internal/status_internal.h"
+    "${DIR}/status.cc"
+    "${DIR}/status_payload_printer.h"
+    "${DIR}/status_payload_printer.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEFINES
+    "$<$<PLATFORM_ID:AIX>:_LINUX_SOURCE_COMPAT>"
+  DEPS
+    absl::atomic_hook
+    absl::config
+    absl::cord
+    absl::core_headers
+    absl::function_ref
+    absl::inlined_vector
+    absl::memory
+    absl::optional
+    absl::raw_logging_internal
+    absl::span
+    absl::stacktrace
+    absl::strerror
+    absl::str_format
+    absl::strings
+    absl::symbolize
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    statusor
+  HDRS
+    "${DIR}/statusor.h"
+  SRCS
+    "${DIR}/statusor.cc"
+    "${DIR}/internal/statusor_internal.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::core_headers
+    absl::raw_logging_internal
+    absl::status
+    absl::strings
+    absl::type_traits
+    absl::utility
+    absl::variant
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/strings)
+
+absl_cc_library(
+  NAME
+    string_view
+  HDRS
+    "${DIR}/string_view.h"
+  SRCS
+    "${DIR}/string_view.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::core_headers
+    absl::throw_delegate
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    strings
+  HDRS
+    "${DIR}/ascii.h"
+    "${DIR}/charconv.h"
+    "${DIR}/escaping.h"
+    "${DIR}/has_absl_stringify.h"
+    "${DIR}/internal/damerau_levenshtein_distance.h"
+    "${DIR}/internal/string_constant.h"
+    "${DIR}/internal/has_absl_stringify.h"
+    "${DIR}/match.h"
+    "${DIR}/numbers.h"
+    "${DIR}/str_cat.h"
+    "${DIR}/str_join.h"
+    "${DIR}/str_replace.h"
+    "${DIR}/str_split.h"
+    "${DIR}/strip.h"
+    "${DIR}/substitute.h"
+  SRCS
+    "${DIR}/ascii.cc"
+    "${DIR}/charconv.cc"
+    "${DIR}/escaping.cc"
+    "${DIR}/internal/charconv_bigint.cc"
+    "${DIR}/internal/charconv_bigint.h"
+    "${DIR}/internal/charconv_parse.cc"
+    "${DIR}/internal/charconv_parse.h"
+    "${DIR}/internal/damerau_levenshtein_distance.cc"
+    "${DIR}/internal/memutil.cc"
+    "${DIR}/internal/memutil.h"
+    "${DIR}/internal/stringify_sink.h"
+    "${DIR}/internal/stringify_sink.cc"
+    "${DIR}/internal/stl_type_traits.h"
+    "${DIR}/internal/str_join_internal.h"
+    "${DIR}/internal/str_split_internal.h"
+    "${DIR}/match.cc"
+    "${DIR}/numbers.cc"
+    "${DIR}/str_cat.cc"
+    "${DIR}/str_replace.cc"
+    "${DIR}/str_split.cc"
+    "${DIR}/substitute.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::string_view
+    absl::strings_internal
+    absl::base
+    absl::bits
+    absl::charset
+    absl::config
+    absl::core_headers
+    absl::endian
+    absl::int128
+    absl::memory
+    absl::raw_logging_internal
+    absl::throw_delegate
+    absl::type_traits
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    charset
+  HDRS
+    charset.h
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::core_headers
+    absl::string_view
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    strings_internal
+  HDRS
+    "${DIR}/internal/escaping.cc"
+    "${DIR}/internal/escaping.h"
+    "${DIR}/internal/ostringstream.h"
+    "${DIR}/internal/resize_uninitialized.h"
+    "${DIR}/internal/utf8.h"
+  SRCS
+    "${DIR}/internal/ostringstream.cc"
+    "${DIR}/internal/utf8.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::endian
+    absl::raw_logging_internal
+    absl::type_traits
+)
+
+absl_cc_library(
+  NAME
+    str_format
+  HDRS
+    "${DIR}/str_format.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::str_format_internal
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    str_format_internal
+  HDRS
+    "${DIR}/internal/str_format/arg.h"
+    "${DIR}/internal/str_format/bind.h"
+    "${DIR}/internal/str_format/checker.h"
+    "${DIR}/internal/str_format/constexpr_parser.h"
+    "${DIR}/internal/str_format/extension.h"
+    "${DIR}/internal/str_format/float_conversion.h"
+    "${DIR}/internal/str_format/output.h"
+    "${DIR}/internal/str_format/parser.h"
+  SRCS
+    "${DIR}/internal/str_format/arg.cc"
+    "${DIR}/internal/str_format/bind.cc"
+    "${DIR}/internal/str_format/extension.cc"
+    "${DIR}/internal/str_format/float_conversion.cc"
+    "${DIR}/internal/str_format/output.cc"
+    "${DIR}/internal/str_format/parser.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::bits
+    absl::strings
+    absl::config
+    absl::core_headers
+    absl::inlined_vector
+    absl::numeric_representation
+    absl::type_traits
+    absl::utility
+    absl::int128
+    absl::span
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    cord_internal
+  HDRS
+    "${DIR}/internal/cord_data_edge.h"
+    "${DIR}/internal/cord_internal.h"
+    "${DIR}/internal/cord_rep_btree.h"
+    "${DIR}/internal/cord_rep_btree_navigator.h"
+    "${DIR}/internal/cord_rep_btree_reader.h"
+    "${DIR}/internal/cord_rep_crc.h"
+    "${DIR}/internal/cord_rep_consume.h"
+    "${DIR}/internal/cord_rep_flat.h"
+  SRCS
+    "${DIR}/internal/cord_internal.cc"
+    "${DIR}/internal/cord_rep_btree.cc"
+    "${DIR}/internal/cord_rep_btree_navigator.cc"
+    "${DIR}/internal/cord_rep_btree_reader.cc"
+    "${DIR}/internal/cord_rep_crc.cc"
+    "${DIR}/internal/cord_rep_consume.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base_internal
+    absl::compressed_tuple
+    absl::config
+    absl::container_memory
+    absl::core_headers
+    absl::crc_cord_state
+    absl::endian
+    absl::inlined_vector
+    absl::layout
+    absl::raw_logging_internal
+    absl::strings
+    absl::throw_delegate
+    absl::type_traits
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    cordz_update_tracker
+  HDRS
+    "${DIR}/internal/cordz_update_tracker.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    cordz_functions
+  HDRS
+    "${DIR}/internal/cordz_functions.h"
+  SRCS
+    "${DIR}/internal/cordz_functions.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::exponential_biased
+    absl::raw_logging_internal
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    cordz_statistics
+  HDRS
+    "${DIR}/internal/cordz_statistics.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::core_headers
+    absl::cordz_update_tracker
+    absl::synchronization
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    cordz_handle
+  HDRS
+    "${DIR}/internal/cordz_handle.h"
+  SRCS
+    "${DIR}/internal/cordz_handle.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::raw_logging_internal
+    absl::synchronization
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    cordz_info
+  HDRS
+    "${DIR}/internal/cordz_info.h"
+  SRCS
+    "${DIR}/internal/cordz_info.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::cord_internal
+    absl::cordz_functions
+    absl::cordz_handle
+    absl::cordz_statistics
+    absl::cordz_update_tracker
+    absl::core_headers
+    absl::inlined_vector
+    absl::span
+    absl::raw_logging_internal
+    absl::stacktrace
+    absl::synchronization
+    absl::time
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    cordz_sample_token
+  HDRS
+    "${DIR}/internal/cordz_sample_token.h"
+  SRCS
+    "${DIR}/internal/cordz_sample_token.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::cordz_handle
+    absl::cordz_info
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    cordz_update_scope
+  HDRS
+    "${DIR}/internal/cordz_update_scope.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::cord_internal
+    absl::cordz_info
+    absl::cordz_update_tracker
+    absl::core_headers
+)
+
+absl_cc_library(
+  NAME
+    cord
+  HDRS
+    "${DIR}/cord.h"
+    "${DIR}/cord_buffer.h"
+  SRCS
+    "${DIR}/cord.cc"
+    "${DIR}/cord_analysis.cc"
+    "${DIR}/cord_analysis.h"
+    "${DIR}/cord_buffer.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::cord_internal
+    absl::cordz_functions
+    absl::cordz_info
+    absl::cordz_update_scope
+    absl::cordz_update_tracker
+    absl::core_headers
+    absl::crc32c
+    absl::crc_cord_state
+    absl::endian
+    absl::function_ref
+    absl::inlined_vector
+    absl::optional
+    absl::raw_logging_internal
+    absl::span
+    absl::strings
+    absl::type_traits
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/synchronization)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    graphcycles_internal
+  HDRS
+    "${DIR}/internal/graphcycles.h"
+  SRCS
+    "${DIR}/internal/graphcycles.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::base_internal
+    absl::config
+    absl::core_headers
+    absl::malloc_internal
+    absl::raw_logging_internal
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    kernel_timeout_internal
+  HDRS
+    "${DIR}/internal/kernel_timeout.h"
+  SRCS
+    "${DIR}/internal/kernel_timeout.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::config
+    absl::core_headers
+    absl::raw_logging_internal
+    absl::time
+)
+
+absl_cc_library(
+  NAME
+    synchronization
+  HDRS
+    "${DIR}/barrier.h"
+    "${DIR}/blocking_counter.h"
+    "${DIR}/internal/create_thread_identity.h"
+    "${DIR}/internal/futex.h"
+    "${DIR}/internal/futex_waiter.h"
+    "${DIR}/internal/per_thread_sem.h"
+    "${DIR}/internal/pthread_waiter.h"
+    "${DIR}/internal/sem_waiter.h"
+    "${DIR}/internal/stdcpp_waiter.h"
+    "${DIR}/internal/waiter.h"
+    "${DIR}/internal/waiter_base.h"
+    "${DIR}/internal/win32_waiter.h"
+    "${DIR}/mutex.h"
+    "${DIR}/notification.h"
+  SRCS
+    "${DIR}/barrier.cc"
+    "${DIR}/blocking_counter.cc"
+    "${DIR}/internal/create_thread_identity.cc"
+    "${DIR}/internal/futex_waiter.cc"
+    "${DIR}/internal/per_thread_sem.cc"
+    "${DIR}/internal/pthread_waiter.cc"
+    "${DIR}/internal/sem_waiter.cc"
+    "${DIR}/internal/stdcpp_waiter.cc"
+    "${DIR}/internal/waiter_base.cc"
+    "${DIR}/internal/win32_waiter.cc"
+    "${DIR}/notification.cc"
+    "${DIR}/mutex.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::graphcycles_internal
+    absl::kernel_timeout_internal
+    absl::atomic_hook
+    absl::base
+    absl::base_internal
+    absl::config
+    absl::core_headers
+    absl::dynamic_annotations
+    absl::malloc_internal
+    absl::raw_logging_internal
+    absl::stacktrace
+    absl::symbolize
+    absl::time
+    Threads::Threads
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/time)
+
+absl_cc_library(
+  NAME
+    time
+  HDRS
+    "${DIR}/civil_time.h"
+    "${DIR}/clock.h"
+    "${DIR}/time.h"
+  SRCS
+    "${DIR}/civil_time.cc"
+    "${DIR}/clock.cc"
+    "${DIR}/duration.cc"
+    "${DIR}/format.cc"
+    "${DIR}/internal/get_current_time_chrono.inc"
+    "${DIR}/internal/get_current_time_posix.inc"
+    "${DIR}/time.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base
+    absl::civil_time
+    absl::core_headers
+    absl::int128
+    absl::raw_logging_internal
+    absl::strings
+    absl::time_zone
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    civil_time
+  HDRS
+    "${DIR}/internal/cctz/include/cctz/civil_time.h"
+    "${DIR}/internal/cctz/include/cctz/civil_time_detail.h"
+  SRCS
+  "${DIR}/internal/cctz/src/civil_time_detail.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+)
+
+absl_cc_library(
+  NAME
+    time_zone
+  HDRS
+    "${DIR}/internal/cctz/include/cctz/time_zone.h"
+    "${DIR}/internal/cctz/include/cctz/zone_info_source.h"
+  SRCS
+    "${DIR}/internal/cctz/src/time_zone_fixed.cc"
+    "${DIR}/internal/cctz/src/time_zone_fixed.h"
+    "${DIR}/internal/cctz/src/time_zone_format.cc"
+    "${DIR}/internal/cctz/src/time_zone_if.cc"
+    "${DIR}/internal/cctz/src/time_zone_if.h"
+    "${DIR}/internal/cctz/src/time_zone_impl.cc"
+    "${DIR}/internal/cctz/src/time_zone_impl.h"
+    "${DIR}/internal/cctz/src/time_zone_info.cc"
+    "${DIR}/internal/cctz/src/time_zone_info.h"
+    "${DIR}/internal/cctz/src/time_zone_libc.cc"
+    "${DIR}/internal/cctz/src/time_zone_libc.h"
+    "${DIR}/internal/cctz/src/time_zone_lookup.cc"
+    "${DIR}/internal/cctz/src/time_zone_posix.cc"
+    "${DIR}/internal/cctz/src/time_zone_posix.h"
+    "${DIR}/internal/cctz/src/tzfile.h"
+    "${DIR}/internal/cctz/src/zone_info_source.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    Threads::Threads
+    $<$<PLATFORM_ID:Darwin>:-Wl,-framework,CoreFoundation>
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/types)
+
+absl_cc_library(
+  NAME
+    any
+  HDRS
+    "${DIR}/any.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::bad_any_cast
+    absl::config
+    absl::core_headers
+    absl::fast_type_id
+    absl::type_traits
+    absl::utility
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    bad_any_cast
+  HDRS
+   "${DIR}/bad_any_cast.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::bad_any_cast_impl
+    absl::config
+  PUBLIC
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
+    bad_any_cast_impl
+  SRCS
+   "${DIR}/bad_any_cast.h"
+   "${DIR}/bad_any_cast.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::raw_logging_internal
+)
+
+absl_cc_library(
+  NAME
+    span
+  HDRS
+    "${DIR}/span.h"
+  SRCS
+    "${DIR}/internal/span.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::algorithm
+    absl::core_headers
+    absl::throw_delegate
+    absl::type_traits
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    optional
+  HDRS
+    "${DIR}/optional.h"
+  SRCS
+    "${DIR}/internal/optional.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::bad_optional_access
+    absl::base_internal
+    absl::config
+    absl::core_headers
+    absl::memory
+    absl::type_traits
+    absl::utility
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    bad_optional_access
+  HDRS
+    "${DIR}/bad_optional_access.h"
+  SRCS
+    "${DIR}/bad_optional_access.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::raw_logging_internal
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    bad_variant_access
+  HDRS
+    "${DIR}/bad_variant_access.h"
+  SRCS
+    "${DIR}/bad_variant_access.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+    absl::raw_logging_internal
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    variant
+  HDRS
+    "${DIR}/variant.h"
+  SRCS
+    "${DIR}/internal/variant.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::bad_variant_access
+    absl::base_internal
+    absl::config
+    absl::core_headers
+    absl::type_traits
+    absl::utility
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    compare
+  HDRS
+    "${DIR}/compare.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::core_headers
+    absl::type_traits
+  PUBLIC
+)
+
+set(DIR ${ABSL_ROOT_DIR}/absl/utility)
+
+absl_cc_library(
+  NAME
+    utility
+  HDRS
+    "${DIR}/utility.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::base_internal
+    absl::config
+    absl::type_traits
+  PUBLIC
+)
+
+absl_cc_library(
+  NAME
+    if_constexpr
+  HDRS
+    "${DIR}/internal/if_constexpr.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::config
+  PUBLIC
+)
 
-set(ABSL_PROPAGATE_CXX_STD ON)
-add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")
 
 add_library(_abseil_swiss_tables INTERFACE)
-
-target_link_libraries(_abseil_swiss_tables INTERFACE
-  absl::flat_hash_map
-  absl::flat_hash_set
-)
-
-get_target_property(FLAT_HASH_MAP_INCLUDE_DIR absl::flat_hash_map INTERFACE_INCLUDE_DIRECTORIES)
-target_include_directories (_abseil_swiss_tables SYSTEM BEFORE INTERFACE ${FLAT_HASH_MAP_INCLUDE_DIR})
-
-get_target_property(FLAT_HASH_SET_INCLUDE_DIR absl::flat_hash_set INTERFACE_INCLUDE_DIRECTORIES)
-target_include_directories (_abseil_swiss_tables SYSTEM BEFORE INTERFACE ${FLAT_HASH_SET_INCLUDE_DIR})
-
+target_include_directories (_abseil_swiss_tables SYSTEM BEFORE INTERFACE ${ABSL_ROOT_DIR})
 add_library(ch_contrib::abseil_swiss_tables ALIAS _abseil_swiss_tables)
-
-set(ABSL_FORMAT_SRC
-    ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/arg.cc
-    ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/bind.cc
-    ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/extension.cc
-    ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/float_conversion.cc
-    ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/output.cc
-    ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/parser.cc
-)
-
-add_library(_abseil_str_format ${ABSL_FORMAT_SRC})
-target_include_directories(_abseil_str_format PUBLIC ${ABSL_ROOT_DIR})
-
-add_library(ch_contrib::abseil_str_format ALIAS _abseil_str_format)
diff --git a/contrib/re2-cmake/CMakeLists.txt b/contrib/re2-cmake/CMakeLists.txt
index e72b5e1fca8..f773bc65a69 100644
--- a/contrib/re2-cmake/CMakeLists.txt
+++ b/contrib/re2-cmake/CMakeLists.txt
@@ -27,6 +27,17 @@ set(RE2_SOURCES
 
 add_library(_re2 ${RE2_SOURCES})
 target_include_directories(_re2 PUBLIC "${SRC_DIR}")
-target_link_libraries(_re2 ch_contrib::abseil_str_format)
+target_link_libraries(_re2 PRIVATE
+    absl::base
+    absl::core_headers
+    absl::fixed_array
+    absl::flat_hash_map
+    absl::flat_hash_set
+    absl::inlined_vector
+    absl::strings
+    absl::str_format
+    absl::synchronization
+    absl::optional
+    absl::span)
 
 add_library(ch_contrib::re2 ALIAS _re2)
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 42bfb48db70..c764a4dd8c1 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -38,7 +38,7 @@ rm -f CMakeCache.txt
 # To check it, find and delete them.
 
 grep -o -P '"contrib/[^"]+"' ../.gitmodules |
-  grep -v -P 'llvm-project|abseil-cpp|grpc|corrosion' |
+  grep -v -P 'llvm-project|grpc|corrosion' |
   xargs -I@ find ../@ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' |
   xargs rm
 

From 0548eefbb784de32ec7e409ca9150482f3a171a9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 11:35:48 +0100
Subject: [PATCH 0699/1097] Simpler CMake

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index c764a4dd8c1..37440fe8202 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -38,7 +38,7 @@ rm -f CMakeCache.txt
 # To check it, find and delete them.
 
 grep -o -P '"contrib/[^"]+"' ../.gitmodules |
-  grep -v -P 'llvm-project|grpc|corrosion' |
+  grep -v -P 'llvm-project|google-protobuf|grpc|corrosion' |
   xargs -I@ find ../@ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' |
   xargs rm
 

From 74a8f3191dc96f3ac46187b556b5d127b7ae6030 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Fri, 17 Nov 2023 11:38:56 +0100
Subject: [PATCH 0700/1097] Update HTTPSession.cpp

---
 base/poco/Net/src/HTTPSession.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp
index d303a4c654b..8f951b3102c 100644
--- a/base/poco/Net/src/HTTPSession.cpp
+++ b/base/poco/Net/src/HTTPSession.cpp
@@ -113,13 +113,12 @@ void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco
      }
      catch (NetException &)
      {
-
 #ifndef NDEBUG
+         throw;
+#else
          // mute exceptions in release
          // just in case when changing settings on socket is not allowed
          // however it should be OK for timeouts
-#else
-         throw;
 #endif
      }
 }

From d035e5f44fc8820a0aef96b30106ed9e78bb6408 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Thu, 16 Nov 2023 17:41:05 +0100
Subject: [PATCH 0701/1097] Improve diagnostics in test
 02908_many_requests_to_system_replicas

---
 .../02908_many_requests_to_system_replicas.sh     | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh
index 70dc5f4d8c4..c620fcf4bea 100755
--- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh
+++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh
@@ -14,12 +14,13 @@ echo "Creating $NUM_TABLES tables"
 
 function init_table()
 {
+    set -e
     i=$1
-    curl $CLICKHOUSE_URL --silent --fail --data "CREATE TABLE test_02908_r1_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r1') ORDER BY tuple()"
-    curl $CLICKHOUSE_URL --silent --fail --data "CREATE TABLE test_02908_r2_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r2') ORDER BY tuple()"
-    curl $CLICKHOUSE_URL --silent --fail --data "CREATE TABLE test_02908_r3_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r3') ORDER BY tuple()"
+    curl $CLICKHOUSE_URL --silent --fail --show-error --data "CREATE TABLE test_02908_r1_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r1') ORDER BY tuple()" 2>&1
+    curl $CLICKHOUSE_URL --silent --fail --show-error --data "CREATE TABLE test_02908_r2_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r2') ORDER BY tuple()" 2>&1
+    curl $CLICKHOUSE_URL --silent --fail --show-error --data "CREATE TABLE test_02908_r3_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r3') ORDER BY tuple()" 2>&1
 
-    curl $CLICKHOUSE_URL --silent --fail --data "INSERT INTO test_02908_r1_$i  SELECT rand64() FROM numbers(5);"
+    curl $CLICKHOUSE_URL --silent --fail --show-error --data "INSERT INTO test_02908_r1_$i  SELECT rand64() FROM numbers(5);" 2>&1
 }
 
 export init_table;
@@ -36,13 +37,13 @@ echo "Making making $CONCURRENCY requests to system.replicas"
 
 for i in `seq 1 $CONCURRENCY`;
 do
-    curl $CLICKHOUSE_URL --silent --fail --data "SELECT * FROM system.replicas WHERE database=currentDatabase() FORMAT Null;" &
+    curl $CLICKHOUSE_URL --silent --fail --show-error --data "SELECT * FROM system.replicas WHERE database=currentDatabase() FORMAT Null;" 2>&1 || echo "query $i failed" &
 done
 
 echo "Query system.replicas while waiting for other concurrent requests to finish"
 # lost_part_count column is read from ZooKeeper
-curl $CLICKHOUSE_URL --silent --fail --data "SELECT sum(lost_part_count) FROM system.replicas WHERE database=currentDatabase();";
+curl $CLICKHOUSE_URL --silent --fail --show-error --data "SELECT sum(lost_part_count) FROM system.replicas WHERE database=currentDatabase();" 2>&1;
 # is_leader column is filled without ZooKeeper
-curl $CLICKHOUSE_URL --silent --fail --data "SELECT sum(is_leader) FROM system.replicas WHERE database=currentDatabase();";
+curl $CLICKHOUSE_URL --silent --fail --show-error --data "SELECT sum(is_leader) FROM system.replicas WHERE database=currentDatabase();" 2>&1;
 
 wait;

From 6c3793acb0162454a6327cf1c4e9c097d924fc78 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Fri, 17 Nov 2023 12:03:00 +0100
Subject: [PATCH 0702/1097] Allow delegate disk to handle retries for
 createDirectories

---
 src/Disks/DiskEncrypted.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h
index 8b4461a8dee..eddf08f4e91 100644
--- a/src/Disks/DiskEncrypted.h
+++ b/src/Disks/DiskEncrypted.h
@@ -60,9 +60,9 @@ public:
 
     void createDirectories(const String & path) override
     {
-        auto tx = createEncryptedTransaction();
-        tx->createDirectories(path);
-        tx->commit();
+        auto wrapped_path = wrappedPath(path);
+        /// Delegate disk can have retry logic for recursive directory creation. Let it handle it.
+        delegate->createDirectories(wrapped_path);
     }
 
     void clearDirectory(const String & path) override

From cdfc0e07e5161dc2d381c0ad74205c0514e94850 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Nov 2023 11:06:03 +0000
Subject: [PATCH 0703/1097] Update version_date.tsv and changelogs after
 v23.10.4.25-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.10.4.25-stable.md | 28 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  2 ++
 5 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.10.4.25-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 1f4fd39bc26..63de9f6c462 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 41be7e611a3..d26bb344fef 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 0ff6ae2e227..53a36818121 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.10.4.25-stable.md b/docs/changelogs/v23.10.4.25-stable.md
new file mode 100644
index 00000000000..2d7d2a38e04
--- /dev/null
+++ b/docs/changelogs/v23.10.4.25-stable.md
@@ -0,0 +1,28 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.10.4.25-stable (330fd687d41) FIXME as compared to v23.10.3.5-stable (b2ba7637a41)
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#56633](https://github.com/ClickHouse/ClickHouse/issues/56633): In [#54043](https://github.com/ClickHouse/ClickHouse/issues/54043) the setup plan started to appear in the logs. It should be only in the `runner_get_all_tests.log` only. As well, send the failed infrastructure event to CI db. [#56214](https://github.com/ClickHouse/ClickHouse/pull/56214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#56737](https://github.com/ClickHouse/ClickHouse/issues/56737): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Fix restore from backup with `flatten_nested` and `data_type_default_nullable` [#56306](https://github.com/ClickHouse/ClickHouse/pull/56306) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 0f2684cd91d..ace5546aadb 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
 v23.10.2.13-stable	2023-11-08
 v23.10.1.1976-stable	2023-11-02
@@ -31,6 +32,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08
 v23.3.15.29-lts	2023-10-31
 v23.3.14.78-lts	2023-10-18

From c695405c85454e5f0266342fdbe02ab000a670cb Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Nov 2023 11:08:05 +0000
Subject: [PATCH 0704/1097] Update version_date.tsv and changelogs after
 v23.3.17.13-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.3.17.13-lts.md   | 23 +++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  2 ++
 5 files changed, 28 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.3.17.13-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 1f4fd39bc26..63de9f6c462 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 41be7e611a3..d26bb344fef 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 0ff6ae2e227..53a36818121 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.3.17.13-lts.md b/docs/changelogs/v23.3.17.13-lts.md
new file mode 100644
index 00000000000..a18ced70d46
--- /dev/null
+++ b/docs/changelogs/v23.3.17.13-lts.md
@@ -0,0 +1,23 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.3.17.13-lts (e867d59020f) FIXME as compared to v23.3.16.7-lts (fb4125cc92a)
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#56731](https://github.com/ClickHouse/ClickHouse/issues/56731): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 0f2684cd91d..ace5546aadb 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
 v23.10.2.13-stable	2023-11-08
 v23.10.1.1976-stable	2023-11-02
@@ -31,6 +32,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08
 v23.3.15.29-lts	2023-10-31
 v23.3.14.78-lts	2023-10-18

From 09b3f5e541935a1dd15ae1cab056ae680cec46ed Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Nov 2023 11:10:10 +0000
Subject: [PATCH 0705/1097] Update version_date.tsv and changelogs after
 v23.8.7.24-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.8.7.24-lts.md    | 31 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  3 +++
 5 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.8.7.24-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 1f4fd39bc26..63de9f6c462 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 41be7e611a3..d26bb344fef 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 0ff6ae2e227..53a36818121 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.8.7.24-lts.md b/docs/changelogs/v23.8.7.24-lts.md
new file mode 100644
index 00000000000..37862c17315
--- /dev/null
+++ b/docs/changelogs/v23.8.7.24-lts.md
@@ -0,0 +1,31 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.8.7.24-lts (812b95e14ba) FIXME as compared to v23.8.6.16-lts (077df679bed)
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#56733](https://github.com/ClickHouse/ClickHouse/issues/56733): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NO CL CATEGORY
+
+* Backported in [#56601](https://github.com/ClickHouse/ClickHouse/issues/56601):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 0f2684cd91d..c0b9fd22964 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
 v23.10.2.13-stable	2023-11-08
 v23.10.1.1976-stable	2023-11-02
@@ -5,6 +6,7 @@ v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
 v23.8.5.16-lts	2023-10-31
 v23.8.4.69-lts	2023-10-19
@@ -31,6 +33,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08
 v23.3.15.29-lts	2023-10-31
 v23.3.14.78-lts	2023-10-18

From af2f06db5282fa801b82d1b9def56284f49ce77c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Nov 2023 11:12:16 +0000
Subject: [PATCH 0706/1097] Update version_date.tsv and changelogs after
 v23.9.5.29-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.9.5.29-stable.md | 34 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  4 ++++
 5 files changed, 41 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.9.5.29-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 1f4fd39bc26..63de9f6c462 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 41be7e611a3..d26bb344fef 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 0ff6ae2e227..53a36818121 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.10.3.5"
+ARG VERSION="23.10.4.25"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.9.5.29-stable.md b/docs/changelogs/v23.9.5.29-stable.md
new file mode 100644
index 00000000000..02572d0e562
--- /dev/null
+++ b/docs/changelogs/v23.9.5.29-stable.md
@@ -0,0 +1,34 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.9.5.29-stable (f8554c1a1ff) FIXME as compared to v23.9.4.11-stable (74c1f49dd6a)
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#56631](https://github.com/ClickHouse/ClickHouse/issues/56631): In [#54043](https://github.com/ClickHouse/ClickHouse/issues/54043) the setup plan started to appear in the logs. It should be only in the `runner_get_all_tests.log` only. As well, send the failed infrastructure event to CI db. [#56214](https://github.com/ClickHouse/ClickHouse/pull/56214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#56735](https://github.com/ClickHouse/ClickHouse/issues/56735): Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NO CL CATEGORY
+
+* Backported in [#56603](https://github.com/ClickHouse/ClickHouse/issues/56603):. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Improve enrich image [#55793](https://github.com/ClickHouse/ClickHouse/pull/55793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 0f2684cd91d..014ee5e9a17 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,10 +1,13 @@
+v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
 v23.10.2.13-stable	2023-11-08
 v23.10.1.1976-stable	2023-11-02
+v23.9.5.29-stable	2023-11-17
 v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
 v23.8.5.16-lts	2023-10-31
 v23.8.4.69-lts	2023-10-19
@@ -31,6 +34,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08
 v23.3.15.29-lts	2023-10-31
 v23.3.14.78-lts	2023-10-18

From d6fdfdd45f590860a12023c25b2af735f2853796 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 12:16:54 +0100
Subject: [PATCH 0707/1097] Remove outdated instructions

---
 tests/instructions/coverity.txt      | 28 -----------
 tests/instructions/cppcheck.txt      | 22 ---------
 tests/instructions/heap-profiler.txt | 14 ------
 tests/instructions/kafka.txt         | 45 -----------------
 tests/instructions/sanitizers.md     | 72 ----------------------------
 tests/instructions/syntax.txt        |  5 --
 tests/instructions/tscancode.txt     | 26 ----------
 7 files changed, 212 deletions(-)
 delete mode 100644 tests/instructions/coverity.txt
 delete mode 100644 tests/instructions/cppcheck.txt
 delete mode 100644 tests/instructions/heap-profiler.txt
 delete mode 100644 tests/instructions/kafka.txt
 delete mode 100644 tests/instructions/sanitizers.md
 delete mode 100644 tests/instructions/syntax.txt
 delete mode 100644 tests/instructions/tscancode.txt

diff --git a/tests/instructions/coverity.txt b/tests/instructions/coverity.txt
deleted file mode 100644
index f8d6d68d326..00000000000
--- a/tests/instructions/coverity.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-# Download tool at https://scan.coverity.com/download?tab=cxx
-
-tar xf cov-analysis-linux64-2017.07.tar.gz
-export PATH=$PATH:/home/milovidov/cov-analysis-linux64-2017.07/bin
-
-mkdir ClickHouse_coverity
-cd ClickHouse_coverity
-git clone --recursive git@github.com:yandex/ClickHouse.git .
-
-mkdir build
-cd build
-
-# "Debug" is for faster build
-CC=gcc-7 CXX=g++-7 cmake -D CMAKE_BUILD_TYPE=Debug -D CCACHE_FOUND=0 ..
-
-# Build all targets that we don't want to analyze.
-cd contrib && make -j24 && cd ..
-
-cov-configure --comptype gcc --compiler gcc-7 --template
-
-cov-build --dir cov-int make -j24
-
-# Build is painful slow. Some targets compile in about one hour. Total time is about 4..5 hours.
-
-tar czvf clickhouse.tgz cov-int
-
-# tarball is 1.2 GB.
-# Upload result at https://scan.coverity.com/projects/yandex-clickhouse/builds/new
diff --git a/tests/instructions/cppcheck.txt b/tests/instructions/cppcheck.txt
deleted file mode 100644
index 1bc6d1f6c09..00000000000
--- a/tests/instructions/cppcheck.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-# Install cppcheck
-
-mkdir cppcheck && cd cppcheck
-git clone git@github.com:danmar/cppcheck.git .
-mkdir build && cd build
-CC=gcc-7 CXX=g++-7 cmake -D CMAKE_BUILD_TYPE=Release ..
-make -j24
-sudo make install
-
-# Perform analysis
-
-cd ClickHouse_clean/build
-cppcheck -j24 --project=compile_commands.json --enable=all 2> cppcheck-errors.txt
-
-# or (from directory with sources)
-# cppcheck -i contrib -i build --enable=all . 2> cppcheck-errors.txt
-
-# Check is pretty fast.
-# It gives many false positives.
-# But the result is worth looking and at least few real errors found.
-
-grep -v -F 'contrib/' cppcheck-errors.txt
diff --git a/tests/instructions/heap-profiler.txt b/tests/instructions/heap-profiler.txt
deleted file mode 100644
index 3c35e9cf518..00000000000
--- a/tests/instructions/heap-profiler.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Build clickhouse without tcmalloc. cmake -D ENABLE_TCMALLOC=0
-
-Copy clickhouse binary to your server.
-scp programs/clickhouse server:~
-
-ssh to your server
-
-Stop clickhouse:
-sudo service clickhouse-server stop
-
-Run clickhouse with heap profiler from the terminal:
-sudo -u clickhouse LD_PRELOAD=/usr/lib/libtcmalloc.so HEAPPROFILE=/var/log/clickhouse-server/heap.hprof ./clickhouse server --config /etc/clickhouse-server/config.xml
-
-Profiles will appear in /var/log/clickhouse-server/
diff --git a/tests/instructions/kafka.txt b/tests/instructions/kafka.txt
deleted file mode 100644
index 69e87f38b24..00000000000
--- a/tests/instructions/kafka.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-Use this config for docker-compose:
-
-    version: '3'
-
-    services:
-
-    kafka:
-        depends_on:
-        - zookeeper
-        hostname: kafka
-        image: wurstmeister/kafka
-        environment:
-        KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:9094
-        KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
-        KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
-        KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
-        ports:
-        - "9092:9092"
-        - "9094:9094"
-
-        security_opt:
-        - label:disable
-
-    zookeeper:
-        hostname: zookeeper
-        image: zookeeper
-
-        security_opt:
-        - label:disable
-
-Start containers with `docker-compose up`.
-
-In clickhouse-client create table like:
-
-    CREATE TABLE kafka ( a UInt8,  b String) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'CSV') SETTINGS kafka_row_delimiter = '\n';
-
-Login inside Kafka container and stream some data:
-
-    docker exec -it <kafka_container_id> bash --login
-    vi data.csv
-    cat data.csv | /opt/kafka/bin/kafka-console-producer.sh --topic topic --broker-list localhost:9092
-    
-Read data in clickhouse:
-
-    SELECT * FROM kafka;
diff --git a/tests/instructions/sanitizers.md b/tests/instructions/sanitizers.md
deleted file mode 100644
index 3c50f6cbab7..00000000000
--- a/tests/instructions/sanitizers.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# How to use Address Sanitizer
-
-Note: We use Address Sanitizer to run functional tests for every commit automatically.
-
-```
-mkdir build_asan && cd build_asan
-```
-
-Note: using clang instead of gcc is strongly recommended. Make sure you have installed required packages (`clang`, `lld`). It may be required to specify non-standard `lld` binary using `LINKER_NAME` option (e.g. `-D LINKER_NAME=lld-8`).
-
-```
-CC=clang CXX=clang++ cmake -D SANITIZE=address ..
-ninja
-```
-
-## Copy binary to your server
-
-```
-scp ./programs/clickhouse yourserver:~/clickhouse-asan
-```
-
-## Start ClickHouse and run tests
-
-```
-sudo -u clickhouse ./clickhouse-asan server --config /etc/clickhouse-server/config.xml
-```
-
-
-# How to use Thread Sanitizer
-
-```
-mkdir build_tsan && cd build_tsan
-```
-
-```
-CC=clang CXX=clang++ cmake -D SANITIZE=thread ..
-ninja
-```
-
-## Start ClickHouse and run tests
-
-```
-sudo -u clickhouse TSAN_OPTIONS='halt_on_error=1' ./clickhouse-tsan server --config /etc/clickhouse-server/config.xml
-```
-
-
-# How to use Undefined Behaviour Sanitizer
-
-```
-mkdir build_ubsan && cd build_ubsan
-```
-
-Note: clang is mandatory, because gcc (in version 8) has false positives due to devirtualization and it has less amount of checks.
-
-```
-CC=clang CXX=clang++ cmake -D SANITIZE=undefined ..
-ninja
-```
-
-## Start ClickHouse and run tests
-
-```
-sudo -u clickhouse UBSAN_OPTIONS='print_stacktrace=1' ./clickhouse-ubsan server --config /etc/clickhouse-server/config.xml
-```
-
-
-# How to use Memory Sanitizer
-
-```
-CC=clang CXX=clang++ cmake -D SANITIZE=memory ..
-ninja
-```
diff --git a/tests/instructions/syntax.txt b/tests/instructions/syntax.txt
deleted file mode 100644
index 228b0eb6045..00000000000
--- a/tests/instructions/syntax.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Relatively quick syntax check (20 minutes on 16-core server)
-
-mkdir build && cd build
-cmake -D CMAKE_BUILD_TYPE=Debug ..
-time jq --raw-output '.[] | .command' compile_commands.json | grep -P -- ' -o [^ ]+\.o' | grep -v -P -- '-c .+/contrib/' | grep -vP '\.(s|asm)$' | sed -r -e 's/ -o [^ ]+\.o/ -fsyntax-only/' | sort -R | xargs -I{} -P$(nproc) sh -c '{}'
diff --git a/tests/instructions/tscancode.txt b/tests/instructions/tscancode.txt
deleted file mode 100644
index 33a4eb34f35..00000000000
--- a/tests/instructions/tscancode.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-# TScanCode is a static analyzer from Tencent
-# It looks like to be based on CppCheck
-
-git clone git@github.com:Tencent/TscanCode.git
-cd TscanCode/trunk
-make -j4
-
-# It looks weird that TScanCode itself compiles with multiple warnings like 'unused-but-set-variable' and 'misleading-indentation'
-
-# Run analysis:
-
-./tscancode -j4 --enable=all ~/work/ClickHouse 2> result.txt
-
-# It has no way to remove specific directories. We have to checkout ClickHouse to separate directory and manually remove "contrib".
-# Otherwise it segfaults when analysing llvm submodule.
-
-# It works quite fast:
-
-real    0m17.174s
-user    0m45.498s
-sys     0m0.496s
-
-wc -l result.txt
-61 result.txt
-
-# It gives almost all false positives.

From 9523bd0ad8d0841c2b76a00e18e09c20f50e46d7 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 17 Nov 2023 12:17:01 +0100
Subject: [PATCH 0708/1097] Fix config

---
 tests/clickhouse-test                 | 2 +-
 tests/config/users.d/s3_cache_new.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 36ac409a4cb..1fad1583fa5 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -577,7 +577,7 @@ class SettingsRandomizer:
         ),
         "remote_filesystem_read_method": lambda: random.choice(["read", "threadpool"]),
         "local_filesystem_read_prefetch": lambda: random.randint(0, 1),
-        "filesystem_cache_getorset_batch_size": lambda: random.choice([0, 3, 10, 50]),
+        "filesystem_cache_segments_batch_size": lambda: random.choice([0, 3, 10, 50]),
         "read_from_filesystem_cache_if_exists_otherwise_bypass_cache": lambda: random.randint(
             0, 1
         ),
diff --git a/tests/config/users.d/s3_cache_new.xml b/tests/config/users.d/s3_cache_new.xml
index 638b7267960..0afa3d68fc6 100644
--- a/tests/config/users.d/s3_cache_new.xml
+++ b/tests/config/users.d/s3_cache_new.xml
@@ -1,7 +1,7 @@
 <clickhouse>
     <profiles>
         <default>
-          <filesystem_cache_getorset_batch_size>10</filesystem_cache_getorset_batch_size>
+          <filesystem_cache_segments_batch_size>10</filesystem_cache_segments_batch_size>
         </default>
     </profiles>
 </clickhouse>

From 216450e789f62b509bbef9f451f852e8527ac034 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 12:23:06 +0100
Subject: [PATCH 0709/1097] Some tasks are done

---
 tests/instructions/easy_tasks_sorted_ru.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/instructions/easy_tasks_sorted_ru.md b/tests/instructions/easy_tasks_sorted_ru.md
index 17e9708eef5..bc95e6b1c37 100644
--- a/tests/instructions/easy_tasks_sorted_ru.md
+++ b/tests/instructions/easy_tasks_sorted_ru.md
@@ -201,9 +201,9 @@ https://clickhouse.com/docs/en/operations/table_engines/external_data/
 
 ## Возможность ATTACH партиции с меньшим или большим количеством столбцов.
 
-## Поддержка неконстантного аргумента с тайм-зоной у некоторых функций для работы с датой и временем.
+## + Поддержка неконстантного аргумента с тайм-зоной у некоторых функций для работы с датой и временем.
 
-## Возможность задавать параметры соединений для табличных функций, движков таблиц и для реплик из отдельных разделов конфигурации.
+## + Возможность задавать параметры соединений для табличных функций, движков таблиц и для реплик из отдельных разделов конфигурации.
 
 ## + Настройка rollup_use_nulls.
 

From 86119dbc3f56a78234eaa67ea40760bb0e30e7fa Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 17 Nov 2023 12:26:50 +0100
Subject: [PATCH 0710/1097] fix data race

---
 src/Storages/MergeTree/DataPartsExchange.cpp           | 10 ++++++++--
 .../02916_replication_protocol_wait_for_part.sql       |  4 ++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index c39263a0b73..0192fb1868b 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -349,7 +349,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk(
     return data_checksums;
 }
 
-bool wait_loop(UInt32 wait_timeout_ms, std::function<bool()> pred)
+bool wait_loop(UInt32 wait_timeout_ms, const std::function<bool()> & pred)
 {
     static const UInt32 loop_delay_ms = 5;
 
@@ -360,6 +360,7 @@ bool wait_loop(UInt32 wait_timeout_ms, std::function<bool()> pred)
         return true;
 
     Stopwatch timer;
+    sleepForMilliseconds(loop_delay_ms);
     while (!pred() && timer.elapsedMilliseconds() < wait_timeout_ms)
     {
         sleepForMilliseconds(loop_delay_ms);
@@ -387,8 +388,13 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name)
     /// do not expose PreActive parts for zero-copy
 
     static const UInt32 wait_timeout_ms = 1000;
-    bool pred_result = wait_loop(wait_timeout_ms, [&] () { return part->getState() != MergeTreeDataPartState::PreActive; });
+    auto pred = [&] ()
+    {
+        auto lock = data.lockParts();
+        return part->getState() != MergeTreeDataPartState::PreActive;
+    };
 
+    bool pred_result = wait_loop(wait_timeout_ms, pred);
     if (!pred_result)
         throw Exception(
                 ErrorCodes::ABORTED,
diff --git a/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql b/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql
index 97ef33f96e8..010e29a34e8 100644
--- a/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql
+++ b/tests/queries/0_stateless/02916_replication_protocol_wait_for_part.sql
@@ -22,5 +22,5 @@ insert into tableIn values(2);
 system sync replica tableOut;
 select count() from tableOut;
 
-drop table tableIn
-drop table tableOut
+drop table tableIn;
+drop table tableOut;

From b2dc5ada6e1702332d15fbd515c728e5d06cb7d2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 17 Nov 2023 11:31:52 +0000
Subject: [PATCH 0711/1097] Fix tryDecodeBase64() with invalid input

---
 src/Functions/FunctionBase64Conversion.h      | 10 ++++-----
 .../00732_base64_functions.reference          |  6 ++---
 .../0_stateless/00732_base64_functions.sql    | 22 +++++++++++++------
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h
index f52dec0eaf7..de922747ccd 100644
--- a/src/Functions/FunctionBase64Conversion.h
+++ b/src/Functions/FunctionBase64Conversion.h
@@ -76,12 +76,10 @@ struct TryBase64Decode
     static size_t perform(const std::span<const UInt8> src, UInt8 * dst)
     {
         size_t outlen = 0;
-        base64_decode(reinterpret_cast<const char *>(src.data()), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
+        int rc = base64_decode(reinterpret_cast<const char *>(src.data()), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
 
-        // during decoding character array can be partially polluted
-        // if fail, revert back and clean
-        if (!outlen)
-            *dst = 0;
+        if (rc != 1)
+            outlen = 0;
 
         return outlen;
     }
@@ -147,7 +145,7 @@ private:
         for (size_t row = 0; row < src_row_count; ++row)
         {
             const size_t src_length = src_offsets[row] - src_offset_prev - 1;
-            const auto outlen = Func::perform({src, src_length}, dst_pos);
+            const size_t outlen = Func::perform({src, src_length}, dst_pos);
 
             /// Base64 library is using AVX-512 with some shuffle operations.
             /// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
diff --git a/tests/queries/0_stateless/00732_base64_functions.reference b/tests/queries/0_stateless/00732_base64_functions.reference
index f97c19427e7..8f91ffa74ab 100644
--- a/tests/queries/0_stateless/00732_base64_functions.reference
+++ b/tests/queries/0_stateless/00732_base64_functions.reference
@@ -21,9 +21,9 @@ fooba
 foobar
 1	1
 1	1
-fooba
-~�
+
+
+
 Zm9v
 foo
 foo
-TEcgT3B0aW11cw==
diff --git a/tests/queries/0_stateless/00732_base64_functions.sql b/tests/queries/0_stateless/00732_base64_functions.sql
index 99268004003..3c60bf939fe 100644
--- a/tests/queries/0_stateless/00732_base64_functions.sql
+++ b/tests/queries/0_stateless/00732_base64_functions.sql
@@ -2,17 +2,23 @@
 
 SET send_logs_level = 'fatal';
 
-SELECT base64Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val);
+SELECT base64Encode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT base64Decode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT tryBase64Decode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT base64Encode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT base64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT tryBase64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 
+-- test with valid inputs
+
+SELECT base64Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val);
 SELECT base64Decode(val) FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val);
 SELECT tryBase64Decode(val) FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val);
 
 SELECT base64Decode(base64Encode('foo')) = 'foo', base64Encode(base64Decode('Zm9v')) == 'Zm9v';
 SELECT tryBase64Decode(base64Encode('foo')) = 'foo', base64Encode(tryBase64Decode('Zm9v')) == 'Zm9v';
 
-SELECT base64Encode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT base64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT tryBase64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+-- test with invalid inputs
 
 SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError INCORRECT_DATA }
 SELECT tryBase64Decode('Zm9vYmF=Zm9v');
@@ -20,9 +26,11 @@ SELECT tryBase64Decode('Zm9vYmF=Zm9v');
 SELECT base64Decode('foo'); -- { serverError INCORRECT_DATA }
 SELECT tryBase64Decode('foo');
 
+SELECT base64Decode('aoeo054640eu='); -- { serverError INCORRECT_DATA }
+SELECT tryBase64Decode('aoeo054640eu=');
+
+-- test FixedString arguments
+
 select base64Encode(toFixedString('foo', 3));
 select base64Decode(toFixedString('Zm9v', 4));
 select tryBase64Decode(toFixedString('Zm9v', 4));
-
--- This query reproduces a bug in TurboBase64 library (which we no longer use)
-select distinct base64Encode(materialize('LG Optimus')) from numbers(100);

From 318c7a06f92db53d1cf7389a37387193da187ff6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 12:39:00 +0100
Subject: [PATCH 0712/1097] Avoid dependencies with no fixed versions

---
 docker/packager/binary/Dockerfile | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index fb033e28959..65f79da44a0 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -6,29 +6,27 @@ FROM clickhouse/test-util:latest AS cctools
 ENV CC=clang-${LLVM_VERSION}
 ENV CXX=clang++-${LLVM_VERSION}
 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-# DO NOT PUT ANYTHING BEFORE THREE NEXT `RUN` DIRECTIVES
+# DO NOT PUT ANYTHING BEFORE THE NEXT TWO `RUN` DIRECTIVES
 # THE MOST HEAVY OPERATION MUST BE THE FIRST IN THE CACHE
 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 # libtapi is required to support .tbh format from recent MacOS SDKs
 RUN git clone --depth 1 https://github.com/tpoechtrager/apple-libtapi.git \
     && cd apple-libtapi \
+    && git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \
     && INSTALLPREFIX=/cctools ./build.sh \
     && ./install.sh \
     && cd .. \
     && rm -rf apple-libtapi
 
 # Build and install tools for cross-linking to Darwin (x86-64)
-RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
-    && cd cctools-port/cctools \
-    && ./configure --prefix=/cctools --with-libtapi=/cctools \
-        --target=x86_64-apple-darwin \
-    && make install -j$(nproc) \
-    && cd ../.. \
-    && rm -rf cctools-port
-
 # Build and install tools for cross-linking to Darwin (aarch64)
 RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
     && cd cctools-port/cctools \
+    && git checkout 59f5fb87a3d2c6fd2ba3df6533015cd6172001c6 \
+    && ./configure --prefix=/cctools --with-libtapi=/cctools \
+        --target=x86_64-apple-darwin \
+    && make install -j$(nproc) \
+    && make clean \
     && ./configure --prefix=/cctools --with-libtapi=/cctools \
         --target=aarch64-apple-darwin \
     && make install -j$(nproc) \

From 45b69566012ece56f9e284f2521c4bb13fb43c7f Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Fri, 17 Nov 2023 12:42:06 +0100
Subject: [PATCH 0713/1097] Fix race on zk_log

---
 src/Common/ZooKeeper/ZooKeeperImpl.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
index fd845016f8a..4335ea4655f 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@@ -1147,7 +1147,8 @@ void ZooKeeper::pushRequest(RequestInfo && info)
     {
         checkSessionDeadline();
         info.time = clock::now();
-        if (zk_log)
+        auto maybe_zk_log = std::atomic_load(&zk_log);
+        if (maybe_zk_log)
         {
             info.request->thread_id = getThreadId();
             info.request->query_id = String(CurrentThread::getQueryId());

From 1d5bc13e2aa18dd30553eede612df350973425c8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 12:50:51 +0100
Subject: [PATCH 0714/1097] Check what will happen if I remove some lines

---
 docker/packager/binary/Dockerfile | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index fb033e28959..2d3b83814b8 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -62,19 +62,12 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
     rustup target add aarch64-unknown-linux-musl && \
     rustup target add riscv64gc-unknown-linux-gnu
 
-# NOTE: Seems like gcc-11 is too new for ubuntu20 repository
 # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work):
 RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
     && apt-get update \
     && apt-get install --yes \
         binutils-riscv64-linux-gnu \
         build-essential \
-        g++-11 \
-        gcc-11 \
-        gcc-aarch64-linux-gnu \
-        libc6 \
-        libc6-dev \
-        libc6-dev-arm64-cross \
         python3-boto3 \
         yasm \
         zstd \

From 2a9d05e24541d098dfe375567d5d369698a859bf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 12:59:04 +0100
Subject: [PATCH 0715/1097] Remove more lines

---
 docker/packager/packager | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index e63a4912e7c..b5bcbada1da 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -236,16 +236,14 @@ def parse_env_variables(
         cc = compiler
         result.append("DEB_ARCH=amd64")
 
-    cxx = cc.replace("gcc", "g++").replace("clang", "clang++")
+    cxx = cc.replace("clang", "clang++")
 
     if package_type == "deb":
-        # NOTE: This are the env for packages/build script
+        # NOTE: This is the env for packages/build script
         result.append("MAKE_DEB=true")
         cmake_flags.append("-DENABLE_TESTS=0")
         cmake_flags.append("-DENABLE_UTILS=0")
-        cmake_flags.append("-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON")
         cmake_flags.append("-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON")
-        cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON")
         cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
         cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
         cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
@@ -265,12 +263,7 @@ def parse_env_variables(
     elif package_type == "fuzzers":
         cmake_flags.append("-DENABLE_FUZZING=1")
         cmake_flags.append("-DENABLE_PROTOBUF=1")
-        cmake_flags.append("-DUSE_INTERNAL_PROTOBUF_LIBRARY=1")
         cmake_flags.append("-DWITH_COVERAGE=1")
-        cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON")
-        # cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
-        # cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
-        # cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
         # Reduce linking and building time by avoid *install/all dependencies
         cmake_flags.append("-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON")
 

From 354fb5182bfc15c37687da9918680430e6478eb8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 13:01:33 +0100
Subject: [PATCH 0716/1097] Remove some code that I don't understand

---
 cmake/darwin/toolchain-aarch64.cmake    | 6 ------
 cmake/darwin/toolchain-x86_64.cmake     | 6 ------
 cmake/freebsd/toolchain-aarch64.cmake   | 6 ------
 cmake/freebsd/toolchain-ppc64le.cmake   | 6 ------
 cmake/freebsd/toolchain-x86_64.cmake    | 6 ------
 cmake/linux/toolchain-aarch64.cmake     | 6 ------
 cmake/linux/toolchain-ppc64le.cmake     | 6 ------
 cmake/linux/toolchain-riscv64.cmake     | 6 ------
 cmake/linux/toolchain-s390x.cmake       | 6 ------
 cmake/linux/toolchain-x86_64-musl.cmake | 6 ------
 cmake/linux/toolchain-x86_64.cmake      | 6 ------
 11 files changed, 66 deletions(-)

diff --git a/cmake/darwin/toolchain-aarch64.cmake b/cmake/darwin/toolchain-aarch64.cmake
index 569b02bb642..178153c1098 100644
--- a/cmake/darwin/toolchain-aarch64.cmake
+++ b/cmake/darwin/toolchain-aarch64.cmake
@@ -9,9 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "aarch64-apple-darwin")
 set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-aarch64")
 
 set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it doesn't work in CMake
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/darwin/toolchain-x86_64.cmake b/cmake/darwin/toolchain-x86_64.cmake
index c4527d2fc0d..b9cbe72a2b6 100644
--- a/cmake/darwin/toolchain-x86_64.cmake
+++ b/cmake/darwin/toolchain-x86_64.cmake
@@ -9,9 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "x86_64-apple-darwin")
 set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-x86_64")
 
 set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it doesn't work in CMake
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/freebsd/toolchain-aarch64.cmake b/cmake/freebsd/toolchain-aarch64.cmake
index 8a8da00f3be..0d7eba7c198 100644
--- a/cmake/freebsd/toolchain-aarch64.cmake
+++ b/cmake/freebsd/toolchain-aarch64.cmake
@@ -13,9 +13,3 @@ set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it
 # Will be changed later, but somehow needed to be set here.
 set (CMAKE_AR "ar")
 set (CMAKE_RANLIB "ranlib")
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/freebsd/toolchain-ppc64le.cmake b/cmake/freebsd/toolchain-ppc64le.cmake
index c3f6594204d..f9878bb47be 100644
--- a/cmake/freebsd/toolchain-ppc64le.cmake
+++ b/cmake/freebsd/toolchain-ppc64le.cmake
@@ -13,9 +13,3 @@ set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it
 # Will be changed later, but somehow needed to be set here.
 set (CMAKE_AR "ar")
 set (CMAKE_RANLIB "ranlib")
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/freebsd/toolchain-x86_64.cmake b/cmake/freebsd/toolchain-x86_64.cmake
index 460de6a7d39..60489da1d65 100644
--- a/cmake/freebsd/toolchain-x86_64.cmake
+++ b/cmake/freebsd/toolchain-x86_64.cmake
@@ -13,9 +13,3 @@ set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it
 # Will be changed later, but somehow needed to be set here.
 set (CMAKE_AR "ar")
 set (CMAKE_RANLIB "ranlib")
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake
index 2dedef8859f..954f3da4331 100644
--- a/cmake/linux/toolchain-aarch64.cmake
+++ b/cmake/linux/toolchain-aarch64.cmake
@@ -20,9 +20,3 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/aarch64-linux-gnu/libc")
 set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/linux/toolchain-ppc64le.cmake b/cmake/linux/toolchain-ppc64le.cmake
index c46ea954b71..ae10cac9a55 100644
--- a/cmake/linux/toolchain-ppc64le.cmake
+++ b/cmake/linux/toolchain-ppc64le.cmake
@@ -20,9 +20,3 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/powerpc64le-linux-gnu/libc")
 set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/linux/toolchain-riscv64.cmake b/cmake/linux/toolchain-riscv64.cmake
index 7f876f88d72..7f0e30869fc 100644
--- a/cmake/linux/toolchain-riscv64.cmake
+++ b/cmake/linux/toolchain-riscv64.cmake
@@ -27,9 +27,3 @@ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=bfd")
 # ld.lld: error: section size decrease is too large
 # But GNU BinUtils work.
 set (LINKER_NAME "riscv64-linux-gnu-ld.bfd" CACHE STRING "Linker name" FORCE)
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/linux/toolchain-s390x.cmake b/cmake/linux/toolchain-s390x.cmake
index 945eb9affa4..b89275d5812 100644
--- a/cmake/linux/toolchain-s390x.cmake
+++ b/cmake/linux/toolchain-s390x.cmake
@@ -23,9 +23,3 @@ set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64")
 set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64")
 set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64")
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/linux/toolchain-x86_64-musl.cmake b/cmake/linux/toolchain-x86_64-musl.cmake
index bc327e5ac25..250e52d5f58 100644
--- a/cmake/linux/toolchain-x86_64-musl.cmake
+++ b/cmake/linux/toolchain-x86_64-musl.cmake
@@ -21,11 +21,5 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
 set (USE_MUSL 1)
 add_definitions(-DUSE_MUSL=1)
diff --git a/cmake/linux/toolchain-x86_64.cmake b/cmake/linux/toolchain-x86_64.cmake
index 55b9df79f70..8f54cbb0b48 100644
--- a/cmake/linux/toolchain-x86_64.cmake
+++ b/cmake/linux/toolchain-x86_64.cmake
@@ -32,9 +32,3 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
-
-set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

From a3083f305ba50de44e33bcd97a089cb41e24ce22 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 13:06:38 +0100
Subject: [PATCH 0717/1097] Remove more code that I don't understand

---
 cmake/freebsd/toolchain-aarch64.cmake   | 4 ----
 cmake/freebsd/toolchain-ppc64le.cmake   | 4 ----
 cmake/freebsd/toolchain-x86_64.cmake    | 4 ----
 cmake/linux/toolchain-aarch64.cmake     | 4 ----
 cmake/linux/toolchain-ppc64le.cmake     | 4 ----
 cmake/linux/toolchain-riscv64.cmake     | 4 ----
 cmake/linux/toolchain-s390x.cmake       | 4 ----
 cmake/linux/toolchain-x86_64-musl.cmake | 4 ----
 cmake/linux/toolchain-x86_64.cmake      | 4 ----
 9 files changed, 36 deletions(-)

diff --git a/cmake/freebsd/toolchain-aarch64.cmake b/cmake/freebsd/toolchain-aarch64.cmake
index 0d7eba7c198..53b7856ed03 100644
--- a/cmake/freebsd/toolchain-aarch64.cmake
+++ b/cmake/freebsd/toolchain-aarch64.cmake
@@ -9,7 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "aarch64-unknown-freebsd12")
 set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-aarch64")
 
 set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it doesn't work in CMake
-
-# Will be changed later, but somehow needed to be set here.
-set (CMAKE_AR "ar")
-set (CMAKE_RANLIB "ranlib")
diff --git a/cmake/freebsd/toolchain-ppc64le.cmake b/cmake/freebsd/toolchain-ppc64le.cmake
index f9878bb47be..bb23f0fbafc 100644
--- a/cmake/freebsd/toolchain-ppc64le.cmake
+++ b/cmake/freebsd/toolchain-ppc64le.cmake
@@ -9,7 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "powerpc64le-unknown-freebsd13")
 set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-ppc64le")
 
 set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it doesn't work in CMake
-
-# Will be changed later, but somehow needed to be set here.
-set (CMAKE_AR "ar")
-set (CMAKE_RANLIB "ranlib")
diff --git a/cmake/freebsd/toolchain-x86_64.cmake b/cmake/freebsd/toolchain-x86_64.cmake
index 60489da1d65..4635880b4a6 100644
--- a/cmake/freebsd/toolchain-x86_64.cmake
+++ b/cmake/freebsd/toolchain-x86_64.cmake
@@ -9,7 +9,3 @@ set (CMAKE_ASM_COMPILER_TARGET "x86_64-pc-freebsd11")
 set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-x86_64")
 
 set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it doesn't work in CMake
-
-# Will be changed later, but somehow needed to be set here.
-set (CMAKE_AR "ar")
-set (CMAKE_RANLIB "ranlib")
diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake
index 954f3da4331..b80cc01296d 100644
--- a/cmake/linux/toolchain-aarch64.cmake
+++ b/cmake/linux/toolchain-aarch64.cmake
@@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
 set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
 set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
 
-# Will be changed later, but somehow needed to be set here.
-set (CMAKE_AR "ar")
-set (CMAKE_RANLIB "ranlib")
-
 set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-aarch64")
 
 set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/aarch64-linux-gnu/libc")
diff --git a/cmake/linux/toolchain-ppc64le.cmake b/cmake/linux/toolchain-ppc64le.cmake
index ae10cac9a55..98e8f7e8489 100644
--- a/cmake/linux/toolchain-ppc64le.cmake
+++ b/cmake/linux/toolchain-ppc64le.cmake
@@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "powerpc64le-linux-gnu")
 set (CMAKE_CXX_COMPILER_TARGET "powerpc64le-linux-gnu")
 set (CMAKE_ASM_COMPILER_TARGET "powerpc64le-linux-gnu")
 
-# Will be changed later, but somehow needed to be set here.
-set (CMAKE_AR "ar")
-set (CMAKE_RANLIB "ranlib")
-
 set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-powerpc64le")
 
 set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/powerpc64le-linux-gnu/libc")
diff --git a/cmake/linux/toolchain-riscv64.cmake b/cmake/linux/toolchain-riscv64.cmake
index 7f0e30869fc..ae5a38f08eb 100644
--- a/cmake/linux/toolchain-riscv64.cmake
+++ b/cmake/linux/toolchain-riscv64.cmake
@@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "riscv64-linux-gnu")
 set (CMAKE_CXX_COMPILER_TARGET "riscv64-linux-gnu")
 set (CMAKE_ASM_COMPILER_TARGET "riscv64-linux-gnu")
 
-# Will be changed later, but somehow needed to be set here.
-set (CMAKE_AR "ar")
-set (CMAKE_RANLIB "ranlib")
-
 set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-riscv64")
 
 set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}")
diff --git a/cmake/linux/toolchain-s390x.cmake b/cmake/linux/toolchain-s390x.cmake
index b89275d5812..d34329fb3bb 100644
--- a/cmake/linux/toolchain-s390x.cmake
+++ b/cmake/linux/toolchain-s390x.cmake
@@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "s390x-linux-gnu")
 set (CMAKE_CXX_COMPILER_TARGET "s390x-linux-gnu")
 set (CMAKE_ASM_COMPILER_TARGET "s390x-linux-gnu")
 
-# Will be changed later, but somehow needed to be set here.
-set (CMAKE_AR "ar")
-set (CMAKE_RANLIB "ranlib")
-
 set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-s390x")
 
 set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/s390x-linux-gnu/libc")
diff --git a/cmake/linux/toolchain-x86_64-musl.cmake b/cmake/linux/toolchain-x86_64-musl.cmake
index 250e52d5f58..fa7b3eaf0d1 100644
--- a/cmake/linux/toolchain-x86_64-musl.cmake
+++ b/cmake/linux/toolchain-x86_64-musl.cmake
@@ -9,10 +9,6 @@ set (CMAKE_C_COMPILER_TARGET "x86_64-linux-musl")
 set (CMAKE_CXX_COMPILER_TARGET "x86_64-linux-musl")
 set (CMAKE_ASM_COMPILER_TARGET "x86_64-linux-musl")
 
-# Will be changed later, but somehow needed to be set here.
-set (CMAKE_AR "ar")
-set (CMAKE_RANLIB "ranlib")
-
 set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_64-musl")
 
 set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}")
diff --git a/cmake/linux/toolchain-x86_64.cmake b/cmake/linux/toolchain-x86_64.cmake
index 8f54cbb0b48..e341219a7e5 100644
--- a/cmake/linux/toolchain-x86_64.cmake
+++ b/cmake/linux/toolchain-x86_64.cmake
@@ -19,10 +19,6 @@ set (CMAKE_C_COMPILER_TARGET "x86_64-linux-gnu")
 set (CMAKE_CXX_COMPILER_TARGET "x86_64-linux-gnu")
 set (CMAKE_ASM_COMPILER_TARGET "x86_64-linux-gnu")
 
-# Will be changed later, but somehow needed to be set here.
-set (CMAKE_AR "ar")
-set (CMAKE_RANLIB "ranlib")
-
 set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_64")
 
 set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-gnu/libc")

From 215cd7b9f285bbfa5d07882dfd64820e3428c402 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 15:13:16 +0300
Subject: [PATCH 0718/1097] Update build.sh

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 37440fe8202..f943011df9d 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -38,7 +38,7 @@ rm -f CMakeCache.txt
 # To check it, find and delete them.
 
 grep -o -P '"contrib/[^"]+"' ../.gitmodules |
-  grep -v -P 'llvm-project|google-protobuf|grpc|corrosion' |
+  grep -v -P 'llvm-project|google-protobuf|grpc|abseil-cpp|corrosion' |
   xargs -I@ find ../@ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' |
   xargs rm
 

From 19dd29e8af5f79323a8f86c6a72f68ae2b45fa6e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 15:19:00 +0300
Subject: [PATCH 0719/1097] Update Dockerfile

---
 docker/packager/binary/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 65f79da44a0..d7864a11672 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -22,7 +22,7 @@ RUN git clone --depth 1 https://github.com/tpoechtrager/apple-libtapi.git \
 # Build and install tools for cross-linking to Darwin (aarch64)
 RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
     && cd cctools-port/cctools \
-    && git checkout 59f5fb87a3d2c6fd2ba3df6533015cd6172001c6 \
+    && git checkout 2ea20c36c10fa1ec70ada3d5aeb2c205d4aa591e \
     && ./configure --prefix=/cctools --with-libtapi=/cctools \
         --target=x86_64-apple-darwin \
     && make install -j$(nproc) \

From ea3cd71225794724ed63213d3567a9167133eade Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 15:42:10 +0300
Subject: [PATCH 0720/1097] Update Dockerfile

---
 docker/packager/binary/Dockerfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index d7864a11672..8b5049c0fc7 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -10,7 +10,7 @@ ENV CXX=clang++-${LLVM_VERSION}
 # THE MOST HEAVY OPERATION MUST BE THE FIRST IN THE CACHE
 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 # libtapi is required to support .tbh format from recent MacOS SDKs
-RUN git clone --depth 1 https://github.com/tpoechtrager/apple-libtapi.git \
+RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
     && cd apple-libtapi \
     && git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \
     && INSTALLPREFIX=/cctools ./build.sh \
@@ -20,9 +20,9 @@ RUN git clone --depth 1 https://github.com/tpoechtrager/apple-libtapi.git \
 
 # Build and install tools for cross-linking to Darwin (x86-64)
 # Build and install tools for cross-linking to Darwin (aarch64)
-RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
+RUN git clone https://github.com/tpoechtrager/cctools-port.git \
     && cd cctools-port/cctools \
-    && git checkout 2ea20c36c10fa1ec70ada3d5aeb2c205d4aa591e \
+    && git checkout 319ef50ea51a73acfc3d691396c05005e48647da \
     && ./configure --prefix=/cctools --with-libtapi=/cctools \
         --target=x86_64-apple-darwin \
     && make install -j$(nproc) \

From b42db2ec298d67ad0c4cb5f190fc7c1f1815e781 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 13:50:13 +0100
Subject: [PATCH 0721/1097] Update fasttest

---
 docker/test/fasttest/run.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 1b72dab5e3c..d3695ba2613 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -206,7 +206,7 @@ function build
     (
         cd "$FASTTEST_BUILD"
         TIMEFORMAT=$'\nreal\t%3R\nuser\t%3U\nsys\t%3S'
-        ( time ninja clickhouse-bundle) |& ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
+        ( time ninja clickhouse-bundle clickhouse-stripped) |& ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
         BUILD_SECONDS_ELAPSED=$(awk '/^....-..-.. ..:..:.. real\t[0-9]/ {print $4}' < "$FASTTEST_OUTPUT/build_log.txt")
         echo "build_clickhouse_fasttest_binary: [ OK ] $BUILD_SECONDS_ELAPSED sec." \
           | ts '%Y-%m-%d %H:%M:%S' \
@@ -215,7 +215,6 @@ function build
             mkdir -p "$FASTTEST_OUTPUT/binaries/"
             cp programs/clickhouse "$FASTTEST_OUTPUT/binaries/clickhouse"
 
-            strip programs/clickhouse -o programs/clickhouse-stripped
             zstd --threads=0 programs/clickhouse-stripped -o "$FASTTEST_OUTPUT/binaries/clickhouse-stripped.zst"
         fi
         ccache_status

From 9eb0b74167b6b72343fba25bbc31fd42f919ef1b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 15:54:32 +0300
Subject: [PATCH 0722/1097] Update Dockerfile

---
 docker/packager/binary/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 8b5049c0fc7..5e630ea5a3f 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -22,7 +22,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
 # Build and install tools for cross-linking to Darwin (aarch64)
 RUN git clone https://github.com/tpoechtrager/cctools-port.git \
     && cd cctools-port/cctools \
-    && git checkout 319ef50ea51a73acfc3d691396c05005e48647da \
+    && git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \
     && ./configure --prefix=/cctools --with-libtapi=/cctools \
         --target=x86_64-apple-darwin \
     && make install -j$(nproc) \

From 4e6f265e3e864ffdbd2cb2e7c39d3da095019352 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 17 Nov 2023 14:36:48 +0100
Subject: [PATCH 0723/1097] Fix tests

---
 tests/integration/test_storage_iceberg/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index f5b2733eeb8..d5f8d04e258 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -525,7 +525,7 @@ def test_metadata_file_format_with_uuid(started_cluster, format_version):
     spark = started_cluster.spark_session
     minio_client = started_cluster.minio_client
     bucket = started_cluster.minio_bucket
-    TABLE_NAME = "test_metadata_selection_" + format_version
+    TABLE_NAME = "test_metadata_selection_with_uuid_" + format_version
 
     spark.sql(
         f"CREATE TABLE {TABLE_NAME} (id bigint, data string) USING iceberg TBLPROPERTIES ('format-version' = '2', 'write.update.mode'='merge-on-read', 'write.delete.mode'='merge-on-read', 'write.merge.mode'='merge-on-read')"

From 332e7f565e613ab3a519749534472d14b30845dd Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Fri, 17 Nov 2023 14:41:26 +0100
Subject: [PATCH 0724/1097] Fix concat tests

---
 tests/queries/0_stateless/00727_concat.reference |  2 +-
 tests/queries/0_stateless/00727_concat.sql       | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference
index 7c48ba97c2b..1e102051fd0 100644
--- a/tests/queries/0_stateless/00727_concat.reference
+++ b/tests/queries/0_stateless/00727_concat.reference
@@ -46,7 +46,7 @@ With [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30
 With 42
 With 4
 -- Nested
-With [(\'foo\',\'qaz\'),(\'bar\',\'qux\')]
+With [\'foo\',\'bar\'][\'qaz\',\'qux\']
 -- NULL arguments
 \N
 \N
diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql
index 7d901514aea..edeaf9340dd 100644
--- a/tests/queries/0_stateless/00727_concat.sql
+++ b/tests/queries/0_stateless/00727_concat.sql
@@ -50,15 +50,19 @@ SELECT concat('With ', materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(
 SELECT concat('With ', materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]] :: MultiPolygon));
 
 SELECT '-- SimpleAggregateFunction';
-CREATE OR REPLACE TABLE concat_saf_test(x SimpleAggregateFunction(max, Int32)) ENGINE=MergeTree ORDER BY tuple();
+DROP TABLE IF EXISTS concat_saf_test;
+CREATE TABLE concat_saf_test(x SimpleAggregateFunction(max, Int32)) ENGINE=MergeTree ORDER BY tuple();
 INSERT INTO concat_saf_test VALUES (42);
 INSERT INTO concat_saf_test SELECT max(number) FROM numbers(5);
 SELECT concat('With ', x) FROM concat_saf_test ORDER BY x DESC;
+DROP TABLE concat_saf_test;
 
 SELECT '-- Nested';
-CREATE OR REPLACE TABLE concat_nested_test(kv Nested(k String, v String)) ENGINE = MergeTree ORDER BY tuple();
+DROP TABLE IF EXISTS concat_nested_test;
+CREATE TABLE concat_nested_test(attrs Nested(k String, v String)) ENGINE = MergeTree ORDER BY tuple();
 INSERT INTO concat_nested_test VALUES (['foo', 'bar'], ['qaz', 'qux']);
-SELECT concat('With ', kv) FROM concat_nested_test;
+SELECT concat('With ', attrs.k, attrs.v) FROM concat_nested_test;
+DROP TABLE concat_nested_test;
 
 SELECT '-- NULL arguments';
 SELECT concat(NULL, NULL);

From 571a35c84d70fc54ce6852b1fc2084ab2e5adf47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 17 Nov 2023 14:45:19 +0100
Subject: [PATCH 0725/1097] Make some tests independent on macro settings

---
 ...licated_minimalistic_part_header_zookeeper.sh | 11 +++++++----
 .../00953_zookeeper_suetin_deduplication_bug.sh  | 14 +++++++-------
 ...6_inactive_replica_cleanup_nodes_zookeeper.sh | 11 +++++++----
 ...1586_replicated_mutations_empty_partition.sql |  4 ++--
 ...nt_alter_mutations_kill_many_replicas_long.sh |  7 +++++--
 .../01700_system_zookeeper_path_in.reference     |  2 --
 .../01700_system_zookeeper_path_in.sql           | 16 ++++++++--------
 .../02439_merge_selecting_partitions.sql         |  2 +-
 8 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh b/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh
index bab2304cec2..12d889a7137 100755
--- a/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh
+++ b/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh
@@ -7,6 +7,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+SHARD=$($CLICKHOUSE_CLIENT --query "Select getMacro('shard')")
+REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')")
+
 $CLICKHOUSE_CLIENT -nm -q "
 
 DROP TABLE IF EXISTS part_header_r1;
@@ -54,8 +57,8 @@ elapsed=1
 until [ $elapsed -eq 5 ];
 do
     sleep $(( elapsed++ ))
-    count1=$($CLICKHOUSE_CLIENT --query="SELECT count(name) FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_00814/part_header/s1/replicas/1r1/parts'")
-    count2=$($CLICKHOUSE_CLIENT --query="SELECT count(name) FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_00814/part_header/s1/replicas/2r1/parts'")
+    count1=$($CLICKHOUSE_CLIENT --query="SELECT count(name) FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_00814/part_header/$SHARD/replicas/1$REPLICA/parts'")
+    count2=$($CLICKHOUSE_CLIENT --query="SELECT count(name) FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_00814/part_header/$SHARD/replicas/2$REPLICA/parts'")
     [[ $count1 == 1 && $count2 == 1 ]] && break
 done
 
@@ -64,10 +67,10 @@ $CLICKHOUSE_CLIENT -nm -q "
 SELECT '*** Test part removal ***';
 SELECT '*** replica 1 ***';
 SELECT name FROM system.parts WHERE active AND database = currentDatabase() AND table = 'part_header_r1';
-SELECT name FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_00814/part_header/s1/replicas/1r1/parts';
+SELECT name FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_00814/part_header/$SHARD/replicas/1$REPLICA/parts';
 SELECT '*** replica 2 ***';
 SELECT name FROM system.parts WHERE active AND database = currentDatabase() AND table = 'part_header_r2';
-SELECT name FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_00814/part_header/s1/replicas/2r1/parts';
+SELECT name FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_00814/part_header/$SHARD/replicas/2$REPLICA/parts';
 
 SELECT '*** Test ALTER ***';
 ALTER TABLE part_header_r1 MODIFY COLUMN y String;
diff --git a/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh b/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh
index ad0146b9d99..57a41526900 100755
--- a/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh
+++ b/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 CLICKHOUSE_TEST_ZOOKEEPER_PREFIX="${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}"
-
+SHARD=$($CLICKHOUSE_CLIENT --query "Select getMacro('shard')")
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS elog;"
 
@@ -30,33 +30,33 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 3, 'h
 
 $CLICKHOUSE_CLIENT --query="SELECT count(*) from elog" # 3 rows
 
-count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'")
+count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/$SHARD/blocks'")
 while [[ $count != 2 ]]
 do
     sleep 1
-    count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'")
+    count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/$SHARD/blocks'")
 done
 
 $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 1, 'hello')"
 
 $CLICKHOUSE_CLIENT --query="SELECT count(*) from elog" # 4 rows
 
-count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'")
+count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/$SHARD/blocks'")
 while [[ $count != 2 ]]
 do
     sleep 1
-    count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'")
+    count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/$SHARD/blocks'")
 done
 
 $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 2, 'hello')"
 
 $CLICKHOUSE_CLIENT --query="SELECT count(*) from elog" # 5 rows
 
-count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'")
+count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/$SHARD/blocks'")
 while [[ $count != 2 ]]
 do
     sleep 1
-    count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'")
+    count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/$SHARD/blocks'")
 done
 
 $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 2, 'hello')"
diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index 2d761df998e..67a2a70b509 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -5,6 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+SHARD=$($CLICKHOUSE_CLIENT --query "Select getMacro('shard')")
+REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')")
+
 # Check that if we have one inactive replica and a huge number of INSERTs to active replicas,
 # the number of nodes in ZooKeeper does not grow unbounded.
 
@@ -32,16 +35,16 @@ for _ in {1..60}; do
 done
 
 
-$CLICKHOUSE_CLIENT --query "SELECT numChildren < $((SCALE / 4)) FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1' AND name = 'log'";
+$CLICKHOUSE_CLIENT --query "SELECT numChildren < $((SCALE / 4)) FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD' AND name = 'log'";
 echo -e '\n---\n';
-$CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/1r1' AND name = 'is_lost'";
-$CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/2r1' AND name = 'is_lost'";
+$CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD/replicas/1$REPLICA' AND name = 'is_lost'";
+$CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD/replicas/2$REPLICA' AND name = 'is_lost'";
 echo -e '\n---\n';
 
 $CLICKHOUSE_CLIENT --query "ATTACH TABLE r2"
 $CLICKHOUSE_CLIENT --receive_timeout 600 --query "SYSTEM SYNC REPLICA r2" # Need to increase timeout, otherwise it timed out in debug build
 
-$CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/2r1' AND name = 'is_lost'";
+$CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD/replicas/2$REPLICA' AND name = 'is_lost'";
 
 $CLICKHOUSE_CLIENT -n --query "
     DROP TABLE IF EXISTS r1;
diff --git a/tests/queries/0_stateless/01586_replicated_mutations_empty_partition.sql b/tests/queries/0_stateless/01586_replicated_mutations_empty_partition.sql
index b5ad6c06e96..c4a3c939c26 100644
--- a/tests/queries/0_stateless/01586_replicated_mutations_empty_partition.sql
+++ b/tests/queries/0_stateless/01586_replicated_mutations_empty_partition.sql
@@ -16,7 +16,7 @@ INSERT INTO replicated_mutations_empty_partitions SETTINGS insert_keeper_fault_i
 
 SELECT count(distinct value) FROM replicated_mutations_empty_partitions;
 
-SELECT count() FROM system.zookeeper WHERE path = '/clickhouse/test/'||currentDatabase()||'/01586_replicated_mutations_empty_partitions/s1/block_numbers';
+SELECT count() FROM system.zookeeper WHERE path = '/clickhouse/test/'||currentDatabase()||'/01586_replicated_mutations_empty_partitions/'||getMacro('shard')||'/block_numbers';
 
 ALTER TABLE replicated_mutations_empty_partitions DROP PARTITION '3';
 ALTER TABLE replicated_mutations_empty_partitions DROP PARTITION '4';
@@ -24,7 +24,7 @@ ALTER TABLE replicated_mutations_empty_partitions DROP PARTITION '5';
 ALTER TABLE replicated_mutations_empty_partitions DROP PARTITION '9';
 
 -- still ten records
-SELECT count() FROM system.zookeeper WHERE path = '/clickhouse/test/'||currentDatabase()||'/01586_replicated_mutations_empty_partitions/s1/block_numbers';
+SELECT count() FROM system.zookeeper WHERE path = '/clickhouse/test/'||currentDatabase()||'/01586_replicated_mutations_empty_partitions/'||getMacro('shard')||'/block_numbers';
 
 ALTER TABLE replicated_mutations_empty_partitions MODIFY COLUMN value UInt64 SETTINGS replication_alter_partitions_sync=2;
 
diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh
index f8f3ccd6dd6..2762f918d72 100755
--- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh
+++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh
@@ -7,6 +7,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=./replication.lib
 . "$CURDIR"/replication.lib
 
+SHARD=$($CLICKHOUSE_CLIENT --query "Select getMacro('shard')")
+REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')")
+
 REPLICAS=5
 
 for i in $(seq $REPLICAS); do
@@ -79,9 +82,9 @@ while true; do
 done
 
 
-metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r11/' and name = 'metadata_version'")
+metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD/replicas/${REPLICA}1/' and name = 'metadata_version'")
 for i in $(seq $REPLICAS); do
-    replica_metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r1$i/' and name = 'metadata_version'")
+    replica_metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD/replicas/${REPLICA}$i/' and name = 'metadata_version'")
 
     if [ "$metadata_version" != "$replica_metadata_version" ]; then
         echo "Metadata version on replica $i differs from the first replica, FAIL"
diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
index 664d8e84f27..b4eaf226106 100644
--- a/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
@@ -14,5 +14,3 @@ abandonable_lock-other
 failed_parts
 last_part
 parallel
-shared
-shared
diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
index cf4bc7650e7..3b321d3cea5 100644
--- a/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
@@ -8,17 +8,17 @@ CREATE TABLE sample_table (
 ENGINE ReplicatedMergeTree('/clickhouse/{database}/01700_system_zookeeper_path_in/{shard}', '{replica}')
 ORDER BY tuple();
 
-SELECT name FROM system.zookeeper WHERE path = '/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/s1' AND name like 'block%' ORDER BY name;
-SELECT name FROM system.zookeeper WHERE path = '/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/s1/replicas' AND name LIKE '%r1%' ORDER BY name;
+SELECT name FROM system.zookeeper WHERE path = '/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/' || getMacro('shard') AND name like 'block%' ORDER BY name;
+SELECT 'r1' FROM system.zookeeper WHERE path = '/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/' || getMacro('shard') || '/replicas' AND name LIKE '%'|| getMacro('replica') ||'%' ORDER BY name;
 
 SELECT '========';
-SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/s1') AND name LIKE 'block%' ORDER BY name;
-SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/s1/replicas') AND name LIKE '%r1%' ORDER BY name;
+SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/' || getMacro('shard')) AND name LIKE 'block%' ORDER BY name;
+SELECT 'r1' FROM system.zookeeper WHERE path IN ('/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/' || getMacro('shard') || '/replicas') AND name LIKE '%' || getMacro('replica') || '%' ORDER BY name;
 SELECT '========';
-SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/s1',
-                                                 '/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/s1/replicas') AND name LIKE 'block%' ORDER BY name;
+SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/' || getMacro('shard'),
+                                                 '/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/' || getMacro('shard') || '/replicas') AND name LIKE 'block%' ORDER BY name;
 SELECT '========';
-SELECT name FROM system.zookeeper WHERE path IN (SELECT concat('/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/s1/', name)
-    FROM system.zookeeper WHERE (name != 'replicas' AND name NOT LIKE 'leader_election%' AND path = '/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/s1')) ORDER BY name;
+SELECT name FROM system.zookeeper WHERE path IN (SELECT concat('/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/' || getMacro('shard') || '/', name)
+    FROM system.zookeeper WHERE (name != 'replicas' AND name NOT LIKE 'leader_election%' AND name NOT LIKE 'zero_copy_%' AND path = '/clickhouse/' || currentDatabase() || '/01700_system_zookeeper_path_in/' || getMacro('shard'))) ORDER BY name;
 
 DROP TABLE IF EXISTS sample_table;
diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql
index 1d01fde56d6..0142afba7f2 100644
--- a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql
+++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql
@@ -21,7 +21,7 @@ select sleepEachRow(3) as higher_probability_of_reproducing_the_issue format Nul
 system flush logs;
 
 -- it should not list unneeded partitions where we cannot merge anything
-select * from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%'
+select * from system.zookeeper_log where path like '/test/02439/' || getMacro('shard') || '/' || currentDatabase() || '/block_numbers/%'
     and op_num in ('List', 'SimpleList', 'FilteredList')
     and path not like '%/block_numbers/1' and path not like '%/block_numbers/123'
     and event_time >= now() - interval 1 minute

From 01000e8b9edddbbe337d5e6287c20b9b88a64cc2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 17 Nov 2023 14:57:02 +0100
Subject: [PATCH 0726/1097] Merge with master

---
 contrib/abseil-cpp-cmake/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt
index e84b4d46c4a..e6c3268c57a 100644
--- a/contrib/abseil-cpp-cmake/CMakeLists.txt
+++ b/contrib/abseil-cpp-cmake/CMakeLists.txt
@@ -2683,6 +2683,7 @@ absl_cc_library(
     "${DIR}/status.h"
   SRCS
     "${DIR}/internal/status_internal.h"
+    "${DIR}/internal/status_internal.cc"
     "${DIR}/status.cc"
     "${DIR}/status_payload_printer.h"
     "${DIR}/status_payload_printer.cc"
@@ -2761,7 +2762,6 @@ absl_cc_library(
     "${DIR}/has_absl_stringify.h"
     "${DIR}/internal/damerau_levenshtein_distance.h"
     "${DIR}/internal/string_constant.h"
-    "${DIR}/internal/has_absl_stringify.h"
     "${DIR}/match.h"
     "${DIR}/numbers.h"
     "${DIR}/str_cat.h"

From 4d5becb4deff91f4e9675624175f2207ea2d9ccf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 17 Nov 2023 15:01:54 +0100
Subject: [PATCH 0727/1097] Adapt
 test_storage_s3/test.py::test_predefined_connection_configuration

---
 tests/integration/test_storage_s3/test.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 01ade1acc4d..3dd3c9e39d0 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -944,13 +944,6 @@ def test_predefined_connection_configuration(started_cluster):
     instance.query("GRANT SELECT ON *.* TO user")
 
     instance.query(f"drop table if exists {name}", user="user")
-    error = instance.query_and_get_error(
-        f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')"
-    )
-    assert (
-        "To execute this query, it's necessary to have the grant NAMED COLLECTION ON s3_conf1"
-        in error
-    )
     error = instance.query_and_get_error(
         f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')",
         user="user",
@@ -975,11 +968,6 @@ def test_predefined_connection_configuration(started_cluster):
     )
     assert result == instance.query("SELECT number FROM numbers(10)")
 
-    error = instance.query_and_get_error("SELECT * FROM s3(no_collection)")
-    assert (
-        "To execute this query, it's necessary to have the grant NAMED COLLECTION ON no_collection"
-        in error
-    )
     error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user")
     assert (
         "To execute this query, it's necessary to have the grant NAMED COLLECTION ON no_collection"

From a530d8c80db5cd03e567f7eb6962824cfc12b9f2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 17 Nov 2023 15:25:43 +0000
Subject: [PATCH 0728/1097] Fix flaky test #56926

---
 .../02494_query_cache_events.reference            |  3 ---
 .../0_stateless/02494_query_cache_events.sql      | 15 ++-------------
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/tests/queries/0_stateless/02494_query_cache_events.reference b/tests/queries/0_stateless/02494_query_cache_events.reference
index 9bcd2820f27..00510f3a0c6 100644
--- a/tests/queries/0_stateless/02494_query_cache_events.reference
+++ b/tests/queries/0_stateless/02494_query_cache_events.reference
@@ -1,7 +1,4 @@
----
 1
-0	1
----
 1
 0	1
 1	0
diff --git a/tests/queries/0_stateless/02494_query_cache_events.sql b/tests/queries/0_stateless/02494_query_cache_events.sql
index 05c0acad4b8..f92e71cb50f 100644
--- a/tests/queries/0_stateless/02494_query_cache_events.sql
+++ b/tests/queries/0_stateless/02494_query_cache_events.sql
@@ -4,20 +4,7 @@
 -- Start with empty query cache QC
 SYSTEM DROP QUERY CACHE;
 
--- Run a query with QC on. The first execution is a QC miss.
-SELECT '---';
 SELECT 1 SETTINGS use_query_cache = true;
-
-SYSTEM FLUSH LOGS;
-SELECT ProfileEvents['QueryCacheHits'], ProfileEvents['QueryCacheMisses']
-FROM system.query_log
-WHERE type = 'QueryFinish'
-  AND current_database = currentDatabase()
-  AND query = 'SELECT 1 SETTINGS use_query_cache = true;';
-
-
--- Run previous query again with query cache on
-SELECT '---';
 SELECT 1 SETTINGS use_query_cache = true;
 
 SYSTEM FLUSH LOGS;
@@ -28,4 +15,6 @@ WHERE type = 'QueryFinish'
   AND query = 'SELECT 1 SETTINGS use_query_cache = true;'
 ORDER BY event_time_microseconds;
 
+-- (The 1st execution was a cache miss, the 2nd execution was a cache hit)
+
 SYSTEM DROP QUERY CACHE;

From dd626d51cbc3b7307b7d48279d31f02dca8ec302 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 17 Nov 2023 16:36:19 +0100
Subject: [PATCH 0729/1097] Fix perf tests report when there are no tests
 (#56881)

* fix perf tests report when there are no tests

* Automatic style fix

* Update docker/test/performance-comparison/compare.sh

---------

Co-authored-by: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
---
 docker/test/performance-comparison/compare.sh | 10 +++++
 docker/test/performance-comparison/report.py  | 42 ++++++++++++++-----
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 7d6de732489..f10236b7135 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -189,6 +189,8 @@ function run_tests
         test_prefix=right/performance
     fi
 
+    run_only_changed_tests=0
+
     # Determine which tests to run.
     if [ -v CHPC_TEST_GREP ]
     then
@@ -203,6 +205,7 @@ function run_tests
         # tests. The lists of changed files are prepared in entrypoint.sh because
         # it has the repository.
         test_files=($(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt))
+        run_only_changed_tests=1
     else
         # The default -- run all tests found in the test dir.
         test_files=($(ls "$test_prefix"/*.xml))
@@ -226,6 +229,13 @@ function run_tests
         test_files=("${test_files[@]}")
     fi
 
+    if [ "$run_only_changed_tests" -ne 0 ]; then
+        if [ ${#test_files[@]} -eq 0 ]; then
+            time "$script_dir/report.py" --no-tests-run > report.html
+            exit 0
+        fi
+    fi
+
     # For PRs w/o changes in test definitons, test only a subset of queries,
     # and run them less times. If the corresponding environment variables are
     # already set, keep those values.
diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index 7da30ba7a08..c2bc773bd54 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -19,6 +19,7 @@ parser.add_argument(
     choices=["main", "all-queries"],
     help="Which report to build",
 )
+parser.add_argument("--no-tests-run", action="store_true", default=False)
 args = parser.parse_args()
 
 tables = []
@@ -354,6 +355,36 @@ if args.report == "main":
 
     add_tested_commits()
 
+    def print_status(status, message):
+        print(
+            (
+                """
+        <!--status: {status}-->
+        <!--message: {message}-->
+        """.format(
+                    status=status, message=message
+                )
+            )
+        )
+
+    if args.no_tests_run:
+        for t in tables:
+            print(t)
+        print(
+            "<h2>No tests to run. Only changed tests were run, but all changed tests are from another batch.</h2>"
+        )
+        print(
+            f"""
+        </div>
+        {os.getenv("CHPC_ADD_REPORT_LINKS") or ''}
+        </body>
+        </html>
+        """
+        )
+        # Why failure? Because otherwise we will not notice if we have a bug that leads to 0 tests being run
+        print_status("failure", "No tests changed, nothing to run")
+        exit(0)
+
     run_error_rows = tsvRows("run-errors.tsv")
     error_tests += len(run_error_rows)
     addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
@@ -646,16 +677,7 @@ if args.report == "main":
         status = "failure"
         message = "Errors while building the report."
 
-    print(
-        (
-            """
-    <!--status: {status}-->
-    <!--message: {message}-->
-    """.format(
-                status=status, message=message
-            )
-        )
-    )
+    print_status(status, message)
 
 elif args.report == "all-queries":
     print((header_template.format()))

From 6d5a5f9fcd07bc87d558060495f9b071b2535abf Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 17 Nov 2023 17:26:53 +0100
Subject: [PATCH 0730/1097] buffer result if out copacity is not enough

---
 src/IO/Lz4DeflatingWriteBuffer.cpp            | 190 ++++++++++--------
 src/IO/Lz4DeflatingWriteBuffer.h              |   5 +-
 .../test_checking_s3_blobs_paranoid/test.py   |   6 +-
 3 files changed, 110 insertions(+), 91 deletions(-)

diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp
index aab8dacef38..076b8c44f91 100644
--- a/src/IO/Lz4DeflatingWriteBuffer.cpp
+++ b/src/IO/Lz4DeflatingWriteBuffer.cpp
@@ -2,6 +2,59 @@
 #include <Common/Exception.h>
 
 
+namespace
+{
+    using namespace DB;
+
+    class SinkToOut
+    {
+    public:
+        SinkToOut(WriteBuffer * out_, Memory<> & mem_, size_t guaranteed_capacity)
+            : sink(out_)
+            , tmp_out(mem_)
+            , cur_out(sink)
+        {
+            chassert(sink);
+
+            if (sink->available() < guaranteed_capacity)
+            {
+                mem_.resize(guaranteed_capacity);
+                cur_out = &tmp_out;
+            }
+        }
+
+        size_t getCapacity()
+        {
+           return  cur_out->available();
+        }
+
+        BufferBase::Position getPosition()
+        {
+            return cur_out->position();
+        }
+
+        void advancePosition(size_t size)
+        {
+            chassert(size <= cur_out->available());
+            cur_out->position() += size;
+        }
+
+        ~SinkToOut() noexcept(false)
+        {
+            if (cur_out == sink)
+                return;
+
+            sink->write(tmp_out.buffer().begin(), tmp_out.count());
+        }
+
+    private:
+        WriteBuffer * sink;
+        BufferWithOutsideMemory<WriteBuffer> tmp_out;
+        WriteBuffer * cur_out;
+    };
+}
+
+
 namespace DB
 {
 namespace ErrorCodes
@@ -13,9 +66,9 @@ Lz4DeflatingWriteBuffer::Lz4DeflatingWriteBuffer(
     std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
     : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment)
     , in_data(nullptr)
-    , out_data(nullptr)
     , in_capacity(0)
-    , out_capacity(0)
+    , tmp_memory(buf_size)
+
 {
     kPrefs = {
         {LZ4F_max256KB,
@@ -36,8 +89,8 @@ Lz4DeflatingWriteBuffer::Lz4DeflatingWriteBuffer(
     if (LZ4F_isError(ret))
         throw Exception(
             ErrorCodes::LZ4_ENCODER_FAILED,
-            "creation of LZ4 compression context failed. LZ4F version: {}",
-            LZ4F_VERSION);
+            "creation of LZ4 compression context failed. LZ4F version: {}, error: {}",
+            LZ4F_VERSION, LZ4F_getErrorName(ret));
 }
 
 Lz4DeflatingWriteBuffer::~Lz4DeflatingWriteBuffer()
@@ -54,107 +107,76 @@ void Lz4DeflatingWriteBuffer::nextImpl()
     in_data = reinterpret_cast<void *>(working_buffer.begin());
     in_capacity = offset();
 
-    out_capacity = out->buffer().end() - out->position();
-    out_data = reinterpret_cast<void *>(out->position());
-
-    try
+    if (first_time)
     {
-        if (first_time)
+        auto sink = SinkToOut(out.get(), tmp_memory, LZ4F_HEADER_SIZE_MAX);
+        chassert(sink.getCapacity() >= LZ4F_HEADER_SIZE_MAX);
+
+        /// write frame header and check for errors
+        size_t header_size = LZ4F_compressBegin(
+                ctx, sink.getPosition(), sink.getCapacity(), &kPrefs);
+
+        if (LZ4F_isError(header_size))
+            throw Exception(
+                ErrorCodes::LZ4_ENCODER_FAILED,
+                "LZ4 failed to start stream encoding. LZ4F version: {}, error: {}",
+                LZ4F_VERSION, LZ4F_getErrorName(header_size));
+
+        sink.advancePosition(header_size);
+        first_time = false;
+    }
+
+    do
+    {
+        /// Ensure that there is enough space for compressed block of minimal size
+        size_t min_compressed_block_size = LZ4F_compressBound(1, &kPrefs);
+
+        auto sink = SinkToOut(out.get(), tmp_memory, min_compressed_block_size);
+        chassert(sink.getCapacity() >= min_compressed_block_size);
+
+        /// LZ4F_compressUpdate compresses whole input buffer at once so we need to shink it manually
+        size_t cur_buffer_size = in_capacity;
+        if (sink.getCapacity() >= min_compressed_block_size) /// We cannot shrink the input buffer if it's already too small.
         {
-            if (out_capacity < LZ4F_HEADER_SIZE_MAX)
-            {
-                out->next();
-                out_capacity = out->buffer().end() - out->position();
-                out_data = reinterpret_cast<void *>(out->position());
-            }
-
-            /// write frame header and check for errors
-            size_t header_size = LZ4F_compressBegin(ctx, out_data, out_capacity, &kPrefs);
-
-            if (LZ4F_isError(header_size))
-                throw Exception(
-                    ErrorCodes::LZ4_ENCODER_FAILED,
-                    "LZ4 failed to start stream encoding. LZ4F version: {}",
-                    LZ4F_VERSION);
-
-            out_capacity -= header_size;
-            out->position() = out->buffer().end() - out_capacity;
-            out_data = reinterpret_cast<void *>(out->position());
-
-            first_time = false;
+            while (sink.getCapacity() < LZ4F_compressBound(cur_buffer_size, &kPrefs))
+                cur_buffer_size /= 2;
         }
 
-        do
-        {
-            /// Ensure that there is enough space for compressed block of minimal size
-            size_t min_compressed_block_size = LZ4F_compressBound(1, &kPrefs);
-            if (out_capacity < min_compressed_block_size)
-            {
-                out->next();
-                out_capacity = out->buffer().end() - out->position();
-                out_data = reinterpret_cast<void *>(out->position());
-            }
+        size_t compressed_size = LZ4F_compressUpdate(
+                ctx, sink.getPosition(), sink.getCapacity(), in_data, cur_buffer_size, nullptr);
 
-            /// LZ4F_compressUpdate compresses whole input buffer at once so we need to shink it manually
-            size_t cur_buffer_size = in_capacity;
-            if (out_capacity >= min_compressed_block_size) /// We cannot shrink the input buffer if it's already too small.
-            {
-                while (out_capacity < LZ4F_compressBound(cur_buffer_size, &kPrefs))
-                    cur_buffer_size /= 2;
-            }
+        if (LZ4F_isError(compressed_size))
+            throw Exception(
+                ErrorCodes::LZ4_ENCODER_FAILED,
+                "LZ4 failed to encode stream. LZ4F version: {}, error {}, out_capacity {}",
+                LZ4F_VERSION, LZ4F_getErrorName(compressed_size), sink.getCapacity());
 
-            size_t compressed_size = LZ4F_compressUpdate(ctx, out_data, out_capacity, in_data, cur_buffer_size, nullptr);
+        in_capacity -= cur_buffer_size;
+        in_data = reinterpret_cast<void *>(working_buffer.end() - in_capacity);
 
-            if (LZ4F_isError(compressed_size))
-                throw Exception(
-                    ErrorCodes::LZ4_ENCODER_FAILED,
-                    "LZ4 failed to encode stream. LZ4F version: {}",
-                    LZ4F_VERSION);
-
-            in_capacity -= cur_buffer_size;
-            in_data = reinterpret_cast<void *>(working_buffer.end() - in_capacity);
-
-            out_capacity -= compressed_size;
-            out->position() = out->buffer().end() - out_capacity;
-            out_data = reinterpret_cast<void *>(out->position());
-        }
-        while (in_capacity > 0);
+        sink.advancePosition(compressed_size);
     }
-    catch (...)
-    {
-        out->position() = out->buffer().begin();
-        throw;
-    }
-    out->next();
-    out_capacity = out->buffer().end() - out->position();
+    while (in_capacity > 0);
 }
 
 void Lz4DeflatingWriteBuffer::finalizeBefore()
 {
     next();
 
-    out_capacity = out->buffer().end() - out->position();
-    out_data = reinterpret_cast<void *>(out->position());
-
-    if (out_capacity < LZ4F_compressBound(0, &kPrefs))
-    {
-        out->next();
-        out_capacity = out->buffer().end() - out->position();
-        out_data = reinterpret_cast<void *>(out->position());
-    }
+    auto suffix_size = LZ4F_compressBound(0, &kPrefs);
+    auto sink = SinkToOut(out.get(), tmp_memory, suffix_size);
+    chassert(sink.getCapacity() >= suffix_size);
 
     /// compression end
-    size_t end_size = LZ4F_compressEnd(ctx, out_data, out_capacity, nullptr);
+    size_t end_size = LZ4F_compressEnd(ctx, sink.getPosition(), sink.getCapacity(), nullptr);
 
     if (LZ4F_isError(end_size))
         throw Exception(
             ErrorCodes::LZ4_ENCODER_FAILED,
-            "LZ4 failed to end stream encoding. LZ4F version: {}",
-            LZ4F_VERSION);
+            "LZ4 failed to end stream encoding. LZ4F version: {}, error {}, out_capacity {}",
+            LZ4F_VERSION, LZ4F_getErrorName(end_size), sink.getCapacity());
 
-    out_capacity -= end_size;
-    out->position() = out->buffer().end() - out_capacity;
-    out_data = reinterpret_cast<void *>(out->position());
+    sink.advancePosition(end_size);
 }
 
 void Lz4DeflatingWriteBuffer::finalizeAfter()
diff --git a/src/IO/Lz4DeflatingWriteBuffer.h b/src/IO/Lz4DeflatingWriteBuffer.h
index 68873b5f8ee..65f4f0c7349 100644
--- a/src/IO/Lz4DeflatingWriteBuffer.h
+++ b/src/IO/Lz4DeflatingWriteBuffer.h
@@ -33,10 +33,9 @@ private:
     LZ4F_compressionContext_t ctx;
 
     void * in_data;
-    void * out_data;
-
     size_t in_capacity;
-    size_t out_capacity;
+
+    Memory<> tmp_memory;
 
     bool first_time = true;
 };
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index d6bcb3fb8f4..60642379366 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -97,9 +97,8 @@ def get_counters(node, query_id, log_type="ExceptionWhileProcessing"):
     ]
 
 
-#  Add "lz4" compression method in the list after https://github.com/ClickHouse/ClickHouse/issues/50975 is fixed
 @pytest.mark.parametrize(
-    "compression", ["none", "gzip", "br", "xz", "zstd", "bz2", "deflate"]
+    "compression", ["none", "gzip", "br", "xz", "zstd", "bz2", "deflate", "lz4"]
 )
 def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression):
     node = cluster.instances["node"]
@@ -137,9 +136,8 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression
     assert count_s3_errors == 1
 
 
-#  Add "lz4" compression method in the list after https://github.com/ClickHouse/ClickHouse/issues/50975 is fixed
 @pytest.mark.parametrize(
-    "compression", ["none", "gzip", "br", "xz", "zstd", "bz2", "deflate"]
+    "compression", ["none", "gzip", "br", "xz", "zstd", "bz2", "deflate", "lz4"]
 )
 def test_upload_s3_fail_upload_part_when_multi_part_upload(
     cluster, broken_s3, compression

From 29e64347b939638b70d4b9397b9f2d1972f75008 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 17 Nov 2023 17:37:14 +0100
Subject: [PATCH 0731/1097] improve exception message

---
 src/Storages/MergeTree/DataPartsExchange.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 0192fb1868b..43642b3ba79 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -398,7 +398,8 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name)
     if (!pred_result)
         throw Exception(
                 ErrorCodes::ABORTED,
-                "Part {} is in PreActive state for {} ms. Another host has to be asked.",
+                "Could not exchange part {} as it's in preActive state ({} ms) and it uses zero copy replication. "
+                "This is expected behaviour and the client will retry fetching the part automatically.",
                 name, wait_timeout_ms);
 
     return part;

From 6366819f120f5fd108acd60079921ff6aa595116 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 17 Nov 2023 16:52:20 +0000
Subject: [PATCH 0732/1097] Fix generating deep nested columns in
 CapnProto/Protobuf schemas

---
 src/Formats/StructureToFormatSchemaUtils.cpp  |  4 ++
 ...apnp_protobuf_auto_schema_nested.reference | 52 +++++++++++++++++++
 ...02920_capnp_protobuf_auto_schema_nested.sh | 21 ++++++++
 3 files changed, 77 insertions(+)
 create mode 100644 tests/queries/0_stateless/02920_capnp_protobuf_auto_schema_nested.reference
 create mode 100755 tests/queries/0_stateless/02920_capnp_protobuf_auto_schema_nested.sh

diff --git a/src/Formats/StructureToFormatSchemaUtils.cpp b/src/Formats/StructureToFormatSchemaUtils.cpp
index 47701fa4f81..c56ff821a4a 100644
--- a/src/Formats/StructureToFormatSchemaUtils.cpp
+++ b/src/Formats/StructureToFormatSchemaUtils.cpp
@@ -96,6 +96,10 @@ NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types, bool
             nested[field_name].emplace_back(nested_name, type);
     }
 
+    /// Collect nested recursively.
+    for (auto & [field_name, elements] : nested)
+        elements = collectNested(elements, allow_split_by_underscore, format_name);
+
     for (const auto & [field_name, elements]: nested)
         result.emplace_back(field_name, std::make_shared<DataTypeTuple>(elements.getTypes(), elements.getNames()));
 
diff --git a/tests/queries/0_stateless/02920_capnp_protobuf_auto_schema_nested.reference b/tests/queries/0_stateless/02920_capnp_protobuf_auto_schema_nested.reference
new file mode 100644
index 00000000000..9874bc57142
--- /dev/null
+++ b/tests/queries/0_stateless/02920_capnp_protobuf_auto_schema_nested.reference
@@ -0,0 +1,52 @@
+
+message Message
+{
+    message H
+    {
+        uint32 k = 1;
+    }
+    H h = 1;
+    message A
+    {
+        uint32 g = 1;
+        message B
+        {
+            uint32 c = 1;
+            uint32 f = 2;
+            message D
+            {
+                uint32 e = 1;
+            }
+            D d = 3;
+        }
+        B b = 2;
+    }
+    A a = 2;
+}
+46	(45,(42,44,43))
+
+struct Message
+{
+    struct H
+    {
+        k @0 : UInt8;
+    }
+    h @0 : H;
+    struct A
+    {
+        g @0 : UInt8;
+        struct B
+        {
+            c @0 : UInt8;
+            f @1 : UInt8;
+            struct D
+            {
+                e @0 : UInt8;
+            }
+            d @2 : D;
+        }
+        b @1 : B;
+    }
+    a @1 : A;
+}
+(46)	(45,(42,44,(43)))
diff --git a/tests/queries/0_stateless/02920_capnp_protobuf_auto_schema_nested.sh b/tests/queries/0_stateless/02920_capnp_protobuf_auto_schema_nested.sh
new file mode 100755
index 00000000000..aee6b866719
--- /dev/null
+++ b/tests/queries/0_stateless/02920_capnp_protobuf_auto_schema_nested.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+SCHEMA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-schema
+FILE=$CLICKHOUSE_TEST_UNIQUE_NAME
+
+$CLICKHOUSE_LOCAL -q "select 42 as \`a.b.c\`, 43 as \`a.b.d.e\`, 44 as \`a.b.f\`, 45 as \`a.g\`, 46 as \`h.k\` format Protobuf settings output_format_schema='$SCHEMA_FILE.proto'" > $FILE.pb
+tail -n +2 $SCHEMA_FILE.proto
+$CLICKHOUSE_LOCAL -q "select * from file('$FILE.pb') settings format_schema='$SCHEMA_FILE:Message'"
+
+$CLICKHOUSE_LOCAL -q "select 42 as a_b_c, 43 as a_b_d_e, 44 as a_b_f, 45 as a_g, 46 as h_k format CapnProto settings output_format_schema='$SCHEMA_FILE.capnp'" > $FILE.capnp
+tail -n +2 $SCHEMA_FILE.capnp
+$CLICKHOUSE_LOCAL -q "select * from file('$FILE.capnp') settings format_schema='$SCHEMA_FILE:Message'"
+
+rm $SCHEMA_FILE*
+rm $FILE.*
+

From fcce5409f47c4db15ffedded1789e34f9e528010 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 16 Nov 2023 15:17:19 +0000
Subject: [PATCH 0733/1097] Simplify

---
 cmake/cpu_features.cmake             | 50 ++++++++++++++--------------
 contrib/aws-cmake/AwsSIMD.cmake      |  2 +-
 contrib/fastops-cmake/CMakeLists.txt |  4 +--
 contrib/rocksdb-cmake/CMakeLists.txt |  6 ++--
 4 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake
index 765e36403ad..484c139f1a7 100644
--- a/cmake/cpu_features.cmake
+++ b/cmake/cpu_features.cmake
@@ -134,60 +134,60 @@ elseif (ARCH_AMD64)
     # ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/
     # AVX. We only assume that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
 
-    SET (HAVE_SSSE3 1)
-    SET (HAVE_SSE41 1)
-    SET (HAVE_SSE42 1)
-    SET (HAVE_PCLMULQDQ 1)
-    SET (HAVE_POPCNT 1)
-    SET (HAVE_AVX 1)
-    SET (HAVE_AVX2 1)
-    SET (HAVE_AVX512 1)
-    SET (HAVE_AVX512_VBMI 1)
-    SET (HAVE_BMI 1)
-    SET (HAVE_BMI2 1)
-
-    if (HAVE_SSSE3 AND ENABLE_SSSE3)
+    if (ENABLE_SSSE3)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mssse3")
     endif ()
-    if (HAVE_SSE41 AND ENABLE_SSE41)
+
+    if (ENABLE_SSE41)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -msse4.1")
     endif ()
-    if (HAVE_SSE42 AND ENABLE_SSE42)
+
+    if (ENABLE_SSE42)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -msse4.2")
     endif ()
-    if (HAVE_PCLMULQDQ AND ENABLE_PCLMULQDQ)
+
+    if (ENABLE_PCLMULQDQ)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpclmul")
     endif ()
-    if (HAVE_POPCNT AND ENABLE_POPCNT)
+
+    if (ENABLE_POPCNT)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpopcnt")
     endif ()
-    if (HAVE_AVX AND ENABLE_AVX)
+
+    if (ENABLE_AVX)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx")
     endif ()
-    if (HAVE_AVX2 AND ENABLE_AVX2)
+
+    if (ENABLE_AVX2)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx2")
     endif ()
-    if (HAVE_AVX512 AND ENABLE_AVX512)
+
+    if (ENABLE_AVX512)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512f -mavx512bw -mavx512vl")
     endif ()
-    if (HAVE_AVX512 AND ENABLE_AVX512 AND HAVE_AVX512_VBMI AND ENABLE_AVX512_VBMI)
+
+    if (ENABLE_AVX512 AND ENABLE_AVX512_VBMI)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512vbmi")
     endif ()
-    if (HAVE_BMI AND ENABLE_BMI)
+
+    if (ENABLE_BMI)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi")
     endif ()
-    if (HAVE_BMI2 AND HAVE_AVX2 AND ENABLE_AVX2 AND ENABLE_BMI2)
+
+    if (ENABLE_AVX2 AND ENABLE_BMI2)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi2")
     endif ()
+
     if (ENABLE_AVX512_FOR_SPEC_OP)
         set (X86_INTRINSICS_FLAGS "")
-        if (HAVE_BMI)
+        if (1)
             set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi")
         endif ()
-        if (HAVE_AVX512)
+        if (1)
             set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw -mavx512vl -mprefer-vector-width=256")
         endif ()
     endif ()
+
 else ()
     # RISC-V + exotic platforms
 endif ()
diff --git a/contrib/aws-cmake/AwsSIMD.cmake b/contrib/aws-cmake/AwsSIMD.cmake
index a2f50f27d4e..24f7628e86f 100644
--- a/contrib/aws-cmake/AwsSIMD.cmake
+++ b/contrib/aws-cmake/AwsSIMD.cmake
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0.
 
 if (USE_CPU_EXTENSIONS)
-    if (HAVE_AVX2)
+    if (ENABLE_AVX2)
         set (AVX2_CFLAGS "-mavx -mavx2")
         set (HAVE_AVX2_INTRINSICS 1)
         set (HAVE_MM256_EXTRACT_EPI64 1)
diff --git a/contrib/fastops-cmake/CMakeLists.txt b/contrib/fastops-cmake/CMakeLists.txt
index e9aa4803583..1b09b736b2a 100644
--- a/contrib/fastops-cmake/CMakeLists.txt
+++ b/contrib/fastops-cmake/CMakeLists.txt
@@ -13,12 +13,10 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/fastops")
 
 set(SRCS "")
 
-if(HAVE_AVX)
+if(ARCH_AMD64)
     set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/avx/ops_avx.cpp")
     set_source_files_properties("${LIBRARY_DIR}/fastops/avx/ops_avx.cpp" PROPERTIES COMPILE_FLAGS "-mavx -DNO_AVX2")
-endif()
 
-if(HAVE_AVX2)
     set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/avx2/ops_avx2.cpp")
     set_source_files_properties("${LIBRARY_DIR}/fastops/avx2/ops_avx2.cpp" PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
 endif()
diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt
index 2b6c48f0b38..7d7666dff87 100644
--- a/contrib/rocksdb-cmake/CMakeLists.txt
+++ b/contrib/rocksdb-cmake/CMakeLists.txt
@@ -93,11 +93,9 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
 endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
 
 
-if(HAVE_SSE42)
+if(ENABLE_AVX2 AND ENABLE_PCLMULQDQ)
   add_definitions(-DHAVE_SSE42)
   add_definitions(-DHAVE_PCLMUL)
-elseif(FORCE_SSE42)
-  message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
 endif()
 
 set (HAVE_THREAD_LOCAL 1)
@@ -429,7 +427,7 @@ set(SOURCES
     ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc
     rocksdb_build_version.cc)
 
-if(HAVE_SSE42)
+if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
   set_source_files_properties(
     "${ROCKSDB_SOURCE_DIR}/util/crc32c.cc"
     PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")

From c51429b1ef006b2bced059624312af5c1dc1f07f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 16 Nov 2023 15:24:47 +0000
Subject: [PATCH 0734/1097] Simplify more

---
 cmake/cpu_features.cmake | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake
index 484c139f1a7..cfa9c314bc0 100644
--- a/cmake/cpu_features.cmake
+++ b/cmake/cpu_features.cmake
@@ -150,6 +150,10 @@ elseif (ARCH_AMD64)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpclmul")
     endif ()
 
+    if (ENABLE_BMI)
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi")
+    endif ()
+
     if (ENABLE_POPCNT)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mpopcnt")
     endif ()
@@ -160,32 +164,20 @@ elseif (ARCH_AMD64)
 
     if (ENABLE_AVX2)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx2")
+        if (ENABLE_BMI2)
+            set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi2")
+        endif ()
     endif ()
 
     if (ENABLE_AVX512)
         set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512f -mavx512bw -mavx512vl")
-    endif ()
-
-    if (ENABLE_AVX512 AND ENABLE_AVX512_VBMI)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512vbmi")
-    endif ()
-
-    if (ENABLE_BMI)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi")
-    endif ()
-
-    if (ENABLE_AVX2 AND ENABLE_BMI2)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} -mbmi2")
+        if (ENABLE_AVX512_VBMI)
+            set (COMPILER_FLAGS "${COMPILER_FLAGS} -mavx512vbmi")
+        endif ()
     endif ()
 
     if (ENABLE_AVX512_FOR_SPEC_OP)
-        set (X86_INTRINSICS_FLAGS "")
-        if (1)
-            set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi")
-        endif ()
-        if (1)
-            set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw -mavx512vl -mprefer-vector-width=256")
-        endif ()
+        set (X86_INTRINSICS_FLAGS "-mbmi -mavx512f -mavx512bw -mavx512vl -mprefer-vector-width=256")
     endif ()
 
 else ()

From a18b7155919d7a7ffc10814043dde3e2c913f620 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sat, 18 Nov 2023 15:44:45 +0000
Subject: [PATCH 0735/1097] Fix a bug

---
 src/Storages/AlterCommands.cpp                  | 17 +++++++++++++++--
 .../02916_addcolumn_nested.reference            |  1 +
 .../0_stateless/02916_addcolumn_nested.sql      |  5 +++++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 7eeaa2d4594..f5293c52bb0 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -33,6 +33,8 @@
 #include <Common/typeid_cast.h>
 #include <Common/randomSeed.h>
 
+#include <ranges>
+
 namespace DB
 {
 
@@ -403,10 +405,21 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
 
             const auto transformed_columns = temporary_metadata.columns.getAll();
 
-            for (auto it = transformed_columns.rbegin(); it != transformed_columns.rend(); it++)
+            auto add_column = [&](const String & name)
             {
-                const auto & transformed_column = temporary_metadata.columns.get(it->name);
+                const auto & transformed_column = temporary_metadata.columns.get(name);
                 metadata.columns.add(transformed_column, after_column, first);
+            };
+
+            if (!after_column.empty() || first)
+            {
+                for (const auto & col: transformed_columns | std::views::reverse)
+                    add_column(col.name);
+            }
+            else
+            {
+                for (const auto & col: transformed_columns)
+                    add_column(col.name);
             }
         }
         else
diff --git a/tests/queries/0_stateless/02916_addcolumn_nested.reference b/tests/queries/0_stateless/02916_addcolumn_nested.reference
index 869d4336c62..7d79cd8731f 100644
--- a/tests/queries/0_stateless/02916_addcolumn_nested.reference
+++ b/tests/queries/0_stateless/02916_addcolumn_nested.reference
@@ -1,3 +1,4 @@
 CREATE TABLE default.nested_table\n(\n    `id` UInt64,\n    `first` Nested(a Int8, b String)\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
 CREATE TABLE default.nested_table\n(\n    `id` UInt64,\n    `second.c` Array(Int8),\n    `second.d` Array(String),\n    `first` Nested(a Int8, b String)\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
 CREATE TABLE default.nested_table\n(\n    `third` Nested(e Int8, f String),\n    `id` UInt64,\n    `second.c` Array(Int8),\n    `second.d` Array(String),\n    `first` Nested(a Int8, b String)\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
+CREATE TABLE default.nested_table\n(\n    `third` Nested(e Int8, f String),\n    `id` UInt64,\n    `second.c` Array(Int8),\n    `second.d` Array(String),\n    `first` Nested(a Int8, b String),\n    `fourth.g` Array(Int8),\n    `fourth.h` Array(String)\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
diff --git a/tests/queries/0_stateless/02916_addcolumn_nested.sql b/tests/queries/0_stateless/02916_addcolumn_nested.sql
index b23854824b5..1e64fca6a15 100644
--- a/tests/queries/0_stateless/02916_addcolumn_nested.sql
+++ b/tests/queries/0_stateless/02916_addcolumn_nested.sql
@@ -14,4 +14,9 @@ SET flatten_nested = 0;
 ALTER TABLE nested_table ADD COLUMN third Nested(e Int8, f String) FIRST;
 SHOW CREATE nested_table;
 
+SET flatten_nested = 1;
+
+ALTER TABLE nested_table ADD COLUMN fourth Nested(g Int8, h String);
+SHOW CREATE nested_table;
+
 DROP TABLE nested_table;

From 773715a562cdcba9ac9dc0730a773201634c7326 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Sat, 18 Nov 2023 17:30:49 +0100
Subject: [PATCH 0736/1097] finalize tmp_out

---
 src/IO/BufferWithOwnMemory.h       | 6 ++++++
 src/IO/Lz4DeflatingWriteBuffer.cpp | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h
index 39c83e9167c..5c9a69893df 100644
--- a/src/IO/BufferWithOwnMemory.h
+++ b/src/IO/BufferWithOwnMemory.h
@@ -191,6 +191,12 @@ private:
         memory.resize(2 * prev_size + 1);
         Base::set(memory.data() + prev_size, memory.size() - prev_size, 0);
     }
+
+    void finalizeImpl() final
+    {
+        /// there is no need to allocate twice more memory at finalize()
+        /// So make that call no op, do not call here nextImpl()
+    }
 };
 
 }
diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp
index 076b8c44f91..e952e6400ec 100644
--- a/src/IO/Lz4DeflatingWriteBuffer.cpp
+++ b/src/IO/Lz4DeflatingWriteBuffer.cpp
@@ -41,6 +41,8 @@ namespace
 
         ~SinkToOut() noexcept(false)
         {
+            tmp_out.finalize();
+
             if (cur_out == sink)
                 return;
 

From d1c56e3dab845baefe0514f86c1b49be02538a6c Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sat, 18 Nov 2023 17:40:15 +0000
Subject: [PATCH 0737/1097] Send fatal logs by default in clickhouse-local

---
 programs/local/LocalServer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 36020d22cc0..f3b551b08d2 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -612,7 +612,7 @@ void LocalServer::processConfig()
     else if (logging || is_interactive)
     {
         config().setString("logger", "logger");
-        auto log_level_default = is_interactive && !logging ? "none" : level;
+        auto log_level_default = is_interactive && !logging ? "fatal" : level;
         config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default)));
         buildLoggers(config(), logger(), "clickhouse-local");
         logging_initialized = true;

From 6eedd1649db62fcfbeab6eda8039e6217e155c49 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sat, 18 Nov 2023 17:55:38 +0000
Subject: [PATCH 0738/1097] Resubmit: Better except for SSL authentication

---
 src/Server/TCPHandler.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 1da9806b4f5..1c2c16496f0 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1431,8 +1431,11 @@ void TCPHandler::receiveHello()
                     getClientAddress(client_info));
                 return;
             }
-            catch (...)
+            catch (const Exception & e)
             {
+                if (e.code() != DB::ErrorCodes::AUTHENTICATION_FAILED)
+                    throw;
+
                 tryLogCurrentException(log, "SSL authentication failed, falling back to password authentication");
             }
         }

From d56cbda1850974da1f4cf7d66ef52d002d3b7244 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 18 Nov 2023 19:07:59 +0100
Subject: [PATCH 0739/1097] Add metrics for the number of queued jobs, which is
 useful for the IO thread pool

---
 programs/benchmark/Benchmark.cpp              |  3 +-
 programs/copier/ClusterCopier.cpp             |  3 +-
 src/Backups/BackupsWorker.cpp                 |  7 ++-
 src/Common/AsyncLoader.cpp                    |  1 +
 src/Common/AsyncLoader.h                      |  5 ++-
 src/Common/CurrentMetrics.cpp                 | 43 +++++++++++++++++++
 src/Common/ThreadPool.cpp                     | 40 ++++++++---------
 src/Common/ThreadPool.h                       | 14 ++++--
 src/Common/examples/parallel_aggregation.cpp  |  3 +-
 src/Common/examples/parallel_aggregation2.cpp |  3 +-
 .../examples/thread_creation_latency.cpp      |  7 +--
 src/Common/tests/gtest_async_loader.cpp       |  2 +
 .../gtest_thread_pool_concurrent_wait.cpp     |  5 ++-
 .../tests/gtest_thread_pool_global_full.cpp   |  7 +--
 src/Common/tests/gtest_thread_pool_limit.cpp  |  3 +-
 src/Common/tests/gtest_thread_pool_loop.cpp   |  3 +-
 .../gtest_thread_pool_schedule_exception.cpp  |  5 ++-
 src/Coordination/Standalone/Context.cpp       |  1 +
 src/Databases/DatabaseOnDisk.cpp              |  3 +-
 src/Databases/DatabaseOrdinary.cpp            |  3 +-
 src/Databases/TablesLoader.cpp                |  3 +-
 .../CacheDictionaryUpdateQueue.cpp            |  3 +-
 src/Dictionaries/HashedDictionary.cpp         |  5 ++-
 src/Disks/IDisk.h                             |  5 ++-
 src/Disks/IO/ThreadPoolReader.cpp             |  3 +-
 src/Disks/IO/ThreadPoolRemoteFSReader.cpp     |  2 +
 .../AzureBlobStorage/AzureObjectStorage.cpp   |  4 +-
 ...jectStorageRemoteMetadataRestoreHelper.cpp |  5 ++-
 .../ObjectStorageIteratorAsync.cpp            |  1 +
 .../ObjectStorageIteratorAsync.h              |  4 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  2 +
 src/IO/SharedThreadPools.cpp                  | 20 ++++++---
 src/IO/SharedThreadPools.h                    |  5 ++-
 src/Interpreters/Aggregator.cpp               |  7 +--
 src/Interpreters/AsynchronousInsertQueue.cpp  |  3 +-
 src/Interpreters/Context.cpp                  |  9 ++--
 src/Interpreters/DDLWorker.cpp                |  7 +--
 src/Interpreters/DatabaseCatalog.cpp          |  3 +-
 src/Interpreters/InterpreterSystemQuery.cpp   |  3 +-
 src/Interpreters/loadMetadata.cpp             |  5 ++-
 src/Interpreters/threadPoolCallbackRunner.h   |  6 +--
 src/Processors/Executors/PipelineExecutor.cpp |  3 +-
 .../Formats/Impl/DWARFBlockInputFormat.cpp    |  3 +-
 .../Impl/ParallelFormattingOutputFormat.h     |  3 +-
 .../Formats/Impl/ParallelParsingInputFormat.h |  3 +-
 .../Formats/Impl/ParquetBlockInputFormat.cpp  |  3 +-
 .../Formats/Impl/ParquetBlockOutputFormat.cpp |  5 ++-
 .../Transforms/AggregatingTransform.h         |  2 +
 src/Storages/Distributed/DistributedSink.cpp  |  4 +-
 src/Storages/Hive/StorageHive.cpp             |  3 +-
 .../MergeTree/MergeTreeBackgroundExecutor.cpp |  3 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  2 +
 src/Storages/StorageAzureBlob.cpp             |  3 +-
 src/Storages/StorageDistributed.cpp           |  3 +-
 src/Storages/StorageS3.cpp                    |  7 +--
 src/Storages/System/StorageSystemReplicas.cpp |  3 +-
 utils/keeper-bench/Runner.cpp                 |  5 ++-
 57 files changed, 222 insertions(+), 99 deletions(-)

diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index ed3d4a1ea69..d6b8b38d84d 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -46,6 +46,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 namespace DB
@@ -107,7 +108,7 @@ public:
         settings(settings_),
         shared_context(Context::createShared()),
         global_context(Context::createGlobal(shared_context.get())),
-        pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency)
+        pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency)
     {
         const auto secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable;
         size_t connections_cnt = std::max(ports_.size(), hosts_.size());
diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp
index b2b4970d04f..7d58f35f62f 100644
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@@ -25,6 +25,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 namespace DB
@@ -200,7 +201,7 @@ void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts,
 {
     /// Fetch partitions list from a shard
     {
-        ThreadPool thread_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores());
+        ThreadPool thread_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores());
 
         for (const TaskShardPtr & task_shard : task_table.all_shards)
             thread_pool.scheduleOrThrowOnError([this, timeouts, task_shard]()
diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp
index f6020deabec..4e24269fb25 100644
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@@ -31,8 +31,10 @@ namespace CurrentMetrics
 {
     extern const Metric BackupsThreads;
     extern const Metric BackupsThreadsActive;
+    extern const Metric BackupsThreadsScheduled;
     extern const Metric RestoreThreads;
     extern const Metric RestoreThreadsActive;
+    extern const Metric RestoreThreadsScheduled;
 }
 
 namespace DB
@@ -264,6 +266,7 @@ public:
 
         CurrentMetrics::Metric metric_threads;
         CurrentMetrics::Metric metric_active_threads;
+        CurrentMetrics::Metric metric_scheduled_threads;
         size_t max_threads = 0;
 
         /// What to do with a new job if a corresponding thread pool is already running `max_threads` jobs:
@@ -279,6 +282,7 @@ public:
             {
                 metric_threads = CurrentMetrics::BackupsThreads;
                 metric_active_threads = CurrentMetrics::BackupsThreadsActive;
+                metric_active_threads = CurrentMetrics::BackupsThreadsScheduled;
                 max_threads = num_backup_threads;
                 /// We don't use thread pool queues for thread pools with a lot of tasks otherwise that queue could be memory-wasting.
                 use_queue = (thread_pool_id != ThreadPoolId::BACKUP_COPY_FILES);
@@ -291,6 +295,7 @@ public:
             {
                 metric_threads = CurrentMetrics::RestoreThreads;
                 metric_active_threads = CurrentMetrics::RestoreThreadsActive;
+                metric_active_threads = CurrentMetrics::RestoreThreadsScheduled;
                 max_threads = num_restore_threads;
                 use_queue = (thread_pool_id != ThreadPoolId::RESTORE_TABLES_DATA);
                 break;
@@ -301,7 +306,7 @@ public:
         chassert(max_threads != 0);
         size_t max_free_threads = 0;
         size_t queue_size = use_queue ? 0 : max_threads;
-        auto thread_pool = std::make_unique<ThreadPool>(metric_threads, metric_active_threads, max_threads, max_free_threads, queue_size);
+        auto thread_pool = std::make_unique<ThreadPool>(metric_threads, metric_active_threads, metric_scheduled_threads, max_threads, max_free_threads, queue_size);
         auto * thread_pool_ptr = thread_pool.get();
         thread_pools.emplace(thread_pool_id, std::move(thread_pool));
         return *thread_pool_ptr;
diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp
index 2e96d3eab7d..cff34ac036d 100644
--- a/src/Common/AsyncLoader.cpp
+++ b/src/Common/AsyncLoader.cpp
@@ -179,6 +179,7 @@ AsyncLoader::AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool lo
             .thread_pool = std::make_unique<ThreadPool>(
                 init.metric_threads,
                 init.metric_active_threads,
+                init.metric_scheduled_threads,
                 init.max_threads,
                 /* max_free_threads = */ 0,
                 init.max_threads),
diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h
index 77905319f00..0496549001e 100644
--- a/src/Common/AsyncLoader.h
+++ b/src/Common/AsyncLoader.h
@@ -271,8 +271,8 @@ inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs &
 // Basic usage example:
 //     // Start async_loader with two thread pools (0=fg, 1=bg):
 //     AsyncLoader async_loader({
-//         {"FgPool", CurrentMetrics::AsyncLoaderThreads, CurrentMetrics::AsyncLoaderThreadsActive, .max_threads = 2, .priority{0}}
-//         {"BgPool", CurrentMetrics::AsyncLoaderThreads, CurrentMetrics::AsyncLoaderThreadsActive, .max_threads = 1, .priority{1}}
+//         {"FgPool", CurrentMetrics::AsyncLoaderThreads, ..., .max_threads = 2, .priority{0}}
+//         {"BgPool", CurrentMetrics::AsyncLoaderThreads, ..., .max_threads = 1, .priority{1}}
 //     });
 //
 //     // Create and schedule a task consisting of three jobs. Job1 has no dependencies and is run first.
@@ -368,6 +368,7 @@ public:
         String name;
         Metric metric_threads;
         Metric metric_active_threads;
+        Metric metric_scheduled_threads;
         size_t max_threads;
         Priority priority;
     };
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index c929f4d86e2..5a4b6e80f75 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -71,92 +71,135 @@
     M(RWLockActiveWriters, "Number of threads holding write lock in a table RWLock.") \
     M(GlobalThread, "Number of threads in global thread pool.") \
     M(GlobalThreadActive, "Number of threads in global thread pool running a task.") \
+    M(GlobalThreadScheduled, "Number of queued or active jobs in global thread pool.") \
     M(LocalThread, "Number of threads in local thread pools. The threads in local thread pools are taken from the global thread pool.") \
     M(LocalThreadActive, "Number of threads in local thread pools running a task.") \
+    M(LocalThreadScheduled, "Number of queued or active jobs in local thread pools.") \
     M(MergeTreeDataSelectExecutorThreads, "Number of threads in the MergeTreeDataSelectExecutor thread pool.") \
     M(MergeTreeDataSelectExecutorThreadsActive, "Number of threads in the MergeTreeDataSelectExecutor thread pool running a task.") \
+    M(MergeTreeDataSelectExecutorThreadsScheduled, "Number of queued or active jobs in the MergeTreeDataSelectExecutor thread pool.") \
     M(BackupsThreads, "Number of threads in the thread pool for BACKUP.") \
     M(BackupsThreadsActive, "Number of threads in thread pool for BACKUP running a task.") \
+    M(BackupsThreadsScheduled, "Number of queued or active jobs for BACKUP.") \
     M(RestoreThreads, "Number of threads in the thread pool for RESTORE.") \
     M(RestoreThreadsActive, "Number of threads in the thread pool for RESTORE running a task.") \
+    M(RestoreThreadsScheduled, "Number of queued or active jobs for RESTORE.") \
     M(MarksLoaderThreads, "Number of threads in thread pool for loading marks.") \
     M(MarksLoaderThreadsActive, "Number of threads in the thread pool for loading marks running a task.") \
+    M(MarksLoaderThreadsScheduled, "Number of queued or active jobs in the thread pool for loading marks.") \
     M(IOPrefetchThreads, "Number of threads in the IO prefertch thread pool.") \
     M(IOPrefetchThreadsActive, "Number of threads in the IO prefetch thread pool running a task.") \
+    M(IOPrefetchThreadsScheduled, "Number of queued or active jobs in the IO prefetch thread pool.") \
     M(IOWriterThreads, "Number of threads in the IO writer thread pool.") \
     M(IOWriterThreadsActive, "Number of threads in the IO writer thread pool running a task.") \
+    M(IOWriterThreadsScheduled, "Number of queued or active jobs in the IO writer thread pool.") \
     M(IOThreads, "Number of threads in the IO thread pool.") \
     M(IOThreadsActive, "Number of threads in the IO thread pool running a task.") \
+    M(IOThreadsScheduled, "Number of queued or active jobs in the IO thread pool.") \
     M(ThreadPoolRemoteFSReaderThreads, "Number of threads in the thread pool for remote_filesystem_read_method=threadpool.") \
     M(ThreadPoolRemoteFSReaderThreadsActive, "Number of threads in the thread pool for remote_filesystem_read_method=threadpool running a task.") \
+    M(ThreadPoolRemoteFSReaderThreadsScheduled, "Number of queued or active jobs in the thread pool for remote_filesystem_read_method=threadpool.") \
     M(ThreadPoolFSReaderThreads, "Number of threads in the thread pool for local_filesystem_read_method=threadpool.") \
     M(ThreadPoolFSReaderThreadsActive, "Number of threads in the thread pool for local_filesystem_read_method=threadpool running a task.") \
+    M(ThreadPoolFSReaderThreadsScheduled, "Number of queued or active jobs in the thread pool for local_filesystem_read_method=threadpool.") \
     M(BackupsIOThreads, "Number of threads in the BackupsIO thread pool.") \
     M(BackupsIOThreadsActive, "Number of threads in the BackupsIO thread pool running a task.") \
+    M(BackupsIOThreadsScheduled, "Number of queued or active jobs in the BackupsIO thread pool.") \
     M(DiskObjectStorageAsyncThreads, "Obsolete metric, shows nothing.") \
     M(DiskObjectStorageAsyncThreadsActive, "Obsolete metric, shows nothing.") \
     M(StorageHiveThreads, "Number of threads in the StorageHive thread pool.") \
     M(StorageHiveThreadsActive, "Number of threads in the StorageHive thread pool running a task.") \
+    M(StorageHiveThreadsScheduled, "Number of queued or active jobs in the StorageHive thread pool.") \
     M(TablesLoaderThreads, "Number of threads in the tables loader thread pool.") \
     M(TablesLoaderThreadsActive, "Number of threads in the tables loader thread pool running a task.") \
+    M(TablesLoaderThreadsScheduled, "Number of queued or active jobs in the tables loader thread pool.") \
     M(DatabaseOrdinaryThreads, "Number of threads in the Ordinary database thread pool.") \
     M(DatabaseOrdinaryThreadsActive, "Number of threads in the Ordinary database thread pool running a task.") \
+    M(DatabaseOrdinaryThreadsScheduled, "Number of queued or active jobs in the Ordinary database thread pool.") \
     M(DatabaseOnDiskThreads, "Number of threads in the DatabaseOnDisk thread pool.") \
     M(DatabaseOnDiskThreadsActive, "Number of threads in the DatabaseOnDisk thread pool running a task.") \
+    M(DatabaseOnDiskThreadsScheduled, "Number of queued or active jobs in the DatabaseOnDisk thread pool.") \
     M(DatabaseCatalogThreads, "Number of threads in the DatabaseCatalog thread pool.") \
     M(DatabaseCatalogThreadsActive, "Number of threads in the DatabaseCatalog thread pool running a task.") \
+    M(DatabaseCatalogThreadsScheduled, "Number of queued or active jobs in the DatabaseCatalog thread pool.") \
     M(DestroyAggregatesThreads, "Number of threads in the thread pool for destroy aggregate states.") \
     M(DestroyAggregatesThreadsActive, "Number of threads in the thread pool for destroy aggregate states running a task.") \
+    M(DestroyAggregatesThreadsScheduled, "Number of queued or active jobs in the thread pool for destroy aggregate states.") \
     M(HashedDictionaryThreads, "Number of threads in the HashedDictionary thread pool.") \
     M(HashedDictionaryThreadsActive, "Number of threads in the HashedDictionary thread pool running a task.") \
+    M(HashedDictionaryThreadsScheduled, "Number of queued or active jobs in the HashedDictionary thread pool.") \
     M(CacheDictionaryThreads, "Number of threads in the CacheDictionary thread pool.") \
     M(CacheDictionaryThreadsActive, "Number of threads in the CacheDictionary thread pool running a task.") \
+    M(CacheDictionaryThreadsScheduled, "Number of queued or active jobs in the CacheDictionary thread pool.") \
     M(ParallelFormattingOutputFormatThreads, "Number of threads in the ParallelFormattingOutputFormatThreads thread pool.") \
     M(ParallelFormattingOutputFormatThreadsActive, "Number of threads in the ParallelFormattingOutputFormatThreads thread pool running a task.") \
+    M(ParallelFormattingOutputFormatThreadsScheduled, "Number of queued or active jobs in the ParallelFormattingOutputFormatThreads thread pool.") \
     M(ParallelParsingInputFormatThreads, "Number of threads in the ParallelParsingInputFormat thread pool.") \
     M(ParallelParsingInputFormatThreadsActive, "Number of threads in the ParallelParsingInputFormat thread pool running a task.") \
+    M(ParallelParsingInputFormatThreadsScheduled, "Number of queued or active jobs in the ParallelParsingInputFormat thread pool.") \
     M(MergeTreeBackgroundExecutorThreads, "Number of threads in the MergeTreeBackgroundExecutor thread pool.") \
     M(MergeTreeBackgroundExecutorThreadsActive, "Number of threads in the MergeTreeBackgroundExecutor thread pool running a task.") \
+    M(MergeTreeBackgroundExecutorThreadsScheduled, "Number of queued or active jobs in the MergeTreeBackgroundExecutor thread pool.") \
     M(AsynchronousInsertThreads, "Number of threads in the AsynchronousInsert thread pool.") \
     M(AsynchronousInsertThreadsActive, "Number of threads in the AsynchronousInsert thread pool running a task.") \
+    M(AsynchronousInsertThreadsScheduled, "Number of queued or active jobs in the AsynchronousInsert thread pool.") \
     M(StartupSystemTablesThreads, "Number of threads in the StartupSystemTables thread pool.") \
     M(StartupSystemTablesThreadsActive, "Number of threads in the StartupSystemTables thread pool running a task.") \
+    M(StartupSystemTablesThreadsScheduled, "Number of queued or active jobs in the StartupSystemTables thread pool.") \
     M(AggregatorThreads, "Number of threads in the Aggregator thread pool.") \
     M(AggregatorThreadsActive, "Number of threads in the Aggregator thread pool running a task.") \
+    M(AggregatorThreadsScheduled, "Number of queued or active jobs in the Aggregator thread pool.") \
     M(DDLWorkerThreads, "Number of threads in the DDLWorker thread pool for ON CLUSTER queries.") \
     M(DDLWorkerThreadsActive, "Number of threads in the DDLWORKER thread pool for ON CLUSTER queries running a task.") \
+    M(DDLWorkerThreadsScheduled, "Number of queued or active jobs in the DDLWORKER thread pool for ON CLUSTER queries.") \
     M(StorageDistributedThreads, "Number of threads in the StorageDistributed thread pool.") \
     M(StorageDistributedThreadsActive, "Number of threads in the StorageDistributed thread pool running a task.") \
+    M(StorageDistributedThreadsScheduled, "Number of queued or active jobs in the StorageDistributed thread pool.") \
     M(DistributedInsertThreads, "Number of threads used for INSERT into Distributed.") \
     M(DistributedInsertThreadsActive, "Number of threads used for INSERT into Distributed running a task.") \
+    M(DistributedInsertThreadsScheduled, "Number of queued or active jobs used for INSERT into Distributed.") \
     M(StorageS3Threads, "Number of threads in the StorageS3 thread pool.") \
     M(StorageS3ThreadsActive, "Number of threads in the StorageS3 thread pool running a task.") \
+    M(StorageS3ThreadsScheduled, "Number of queued or active jobs in the StorageS3 thread pool.") \
     M(ObjectStorageS3Threads, "Number of threads in the S3ObjectStorage thread pool.") \
     M(ObjectStorageS3ThreadsActive, "Number of threads in the S3ObjectStorage thread pool running a task.") \
+    M(ObjectStorageS3ThreadsScheduled, "Number of queued or active jobs in the S3ObjectStorage thread pool.") \
     M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
     M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
+    M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
     M(MergeTreePartsLoaderThreads, "Number of threads in the MergeTree parts loader thread pool.") \
     M(MergeTreePartsLoaderThreadsActive, "Number of threads in the MergeTree parts loader thread pool running a task.") \
+    M(MergeTreePartsLoaderThreadsScheduled, "Number of queued or active jobs in the MergeTree parts loader thread pool.") \
     M(MergeTreeOutdatedPartsLoaderThreads, "Number of threads in the threadpool for loading Outdated data parts.") \
     M(MergeTreeOutdatedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
+    M(MergeTreeOutdatedPartsLoaderThreadsScheduled, "Number of queued or active jobs in the threadpool for loading Outdated data parts.") \
     M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
     M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \
+    M(MergeTreePartsCleanerThreadsScheduled, "Number of queued or active jobs in the MergeTree parts cleaner thread pool.") \
     M(IDiskCopierThreads, "Number of threads for copying data between disks of different types.") \
     M(IDiskCopierThreadsActive, "Number of threads for copying data between disks of different types running a task.") \
+    M(IDiskCopierThreadsScheduled, "Number of queued or active jobs for copying data between disks of different types.") \
     M(SystemReplicasThreads, "Number of threads in the system.replicas thread pool.") \
     M(SystemReplicasThreadsActive, "Number of threads in the system.replicas thread pool running a task.") \
+    M(SystemReplicasThreadsScheduled, "Number of queued or active jobs in the system.replicas thread pool.") \
     M(RestartReplicaThreads, "Number of threads in the RESTART REPLICA thread pool.") \
     M(RestartReplicaThreadsActive, "Number of threads in the RESTART REPLICA thread pool running a task.") \
+    M(RestartReplicaThreadsScheduled, "Number of queued or active jobs in the RESTART REPLICA thread pool.") \
     M(QueryPipelineExecutorThreads, "Number of threads in the PipelineExecutor thread pool.") \
     M(QueryPipelineExecutorThreadsActive, "Number of threads in the PipelineExecutor thread pool running a task.") \
+    M(QueryPipelineExecutorThreadsScheduled, "Number of queued or active jobs in the PipelineExecutor thread pool.") \
     M(ParquetDecoderThreads, "Number of threads in the ParquetBlockInputFormat thread pool.") \
     M(ParquetDecoderThreadsActive, "Number of threads in the ParquetBlockInputFormat thread pool running a task.") \
+    M(ParquetDecoderThreadsScheduled, "Number of queued or active jobs in the ParquetBlockInputFormat thread pool.") \
     M(ParquetEncoderThreads, "Number of threads in ParquetBlockOutputFormat thread pool.") \
     M(ParquetEncoderThreadsActive, "Number of threads in ParquetBlockOutputFormat thread pool running a task.") \
+    M(ParquetEncoderThreadsScheduled, "Number of queued or active jobs in ParquetBlockOutputFormat thread pool.") \
     M(DWARFReaderThreads, "Number of threads in the DWARFBlockInputFormat thread pool.") \
     M(DWARFReaderThreadsActive, "Number of threads in the DWARFBlockInputFormat thread pool running a task.") \
+    M(DWARFReaderThreadsScheduled, "Number of queued or active jobs in the DWARFBlockInputFormat thread pool.") \
     M(OutdatedPartsLoadingThreads, "Number of threads in the threadpool for loading Outdated data parts.") \
     M(OutdatedPartsLoadingThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
+    M(OutdatedPartsLoadingThreadsScheduled, "Number of queued or active jobs in the threadpool for loading Outdated data parts.") \
     M(DistributedBytesToInsert, "Number of pending bytes to process for asynchronous insertion into Distributed tables. Number of bytes for every shard is summed.") \
     M(BrokenDistributedBytesToInsert, "Number of bytes for asynchronous insertion into Distributed tables that has been marked as broken. Number of bytes for every shard is summed.") \
     M(DistributedFilesToInsert, "Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed.") \
diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp
index 4a5bdeffcee..8cba13373b9 100644
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@@ -25,13 +25,14 @@ namespace CurrentMetrics
 {
     extern const Metric GlobalThread;
     extern const Metric GlobalThreadActive;
+    extern const Metric GlobalThreadScheduled;
 }
 
 static constexpr auto DEFAULT_THREAD_NAME = "ThreadPool";
 
 template <typename Thread>
-ThreadPoolImpl<Thread>::ThreadPoolImpl(Metric metric_threads_, Metric metric_active_threads_)
-    : ThreadPoolImpl(metric_threads_, metric_active_threads_, getNumberOfPhysicalCPUCores())
+ThreadPoolImpl<Thread>::ThreadPoolImpl(Metric metric_threads_, Metric metric_active_threads_, Metric metric_scheduled_jobs_)
+    : ThreadPoolImpl(metric_threads_, metric_active_threads_, metric_scheduled_jobs_, getNumberOfPhysicalCPUCores())
 {
 }
 
@@ -40,8 +41,9 @@ template <typename Thread>
 ThreadPoolImpl<Thread>::ThreadPoolImpl(
     Metric metric_threads_,
     Metric metric_active_threads_,
+    Metric metric_scheduled_jobs_,
     size_t max_threads_)
-    : ThreadPoolImpl(metric_threads_, metric_active_threads_, max_threads_, max_threads_, max_threads_)
+    : ThreadPoolImpl(metric_threads_, metric_active_threads_, metric_scheduled_jobs_, max_threads_, max_threads_, max_threads_)
 {
 }
 
@@ -49,12 +51,14 @@ template <typename Thread>
 ThreadPoolImpl<Thread>::ThreadPoolImpl(
     Metric metric_threads_,
     Metric metric_active_threads_,
+    Metric metric_scheduled_jobs_,
     size_t max_threads_,
     size_t max_free_threads_,
     size_t queue_size_,
     bool shutdown_on_exception_)
     : metric_threads(metric_threads_)
     , metric_active_threads(metric_active_threads_)
+    , metric_scheduled_jobs(metric_scheduled_jobs_)
     , max_threads(max_threads_)
     , max_free_threads(std::min(max_free_threads_, max_threads))
     , queue_size(queue_size_ ? std::max(queue_size_, max_threads) : 0 /* zero means the queue is unlimited */)
@@ -187,6 +191,7 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, Priority priority, std:
 
         jobs.emplace(std::move(job),
                      priority,
+                     metric_scheduled_jobs,
                      /// Tracing context on this thread is used as parent context for the sub-thread that runs the job
                      propagate_opentelemetry_tracing_context ? DB::OpenTelemetry::CurrentContext() : DB::OpenTelemetry::TracingContextOnThread(),
                      /// capture_frame_pointers
@@ -346,13 +351,8 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
         /// This is inside the loop to also reset previous thread names set inside the jobs.
         setThreadName(DEFAULT_THREAD_NAME);
 
-        /// A copy of parent trace context
-        DB::OpenTelemetry::TracingContextOnThread parent_thread_trace_context;
-
-        std::vector<StackTrace::FramePointers> thread_frame_pointers;
-
         /// Get a job from the queue.
-        Job job;
+        std::optional<JobWithPriority> job_data;
 
         {
             std::unique_lock lock(mutex);
@@ -393,12 +393,8 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
             }
 
             /// boost::priority_queue does not provide interface for getting non-const reference to an element
-            /// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
-            job = std::move(const_cast<Job &>(jobs.top().job));
-            parent_thread_trace_context = std::move(const_cast<DB::OpenTelemetry::TracingContextOnThread &>(jobs.top().thread_trace_context));
-            DB::Exception::enable_job_stack_trace = jobs.top().enable_job_stack_trace;
-            if (DB::Exception::enable_job_stack_trace)
-                thread_frame_pointers = std::move(const_cast<std::vector<StackTrace::FramePointers> &>(jobs.top().frame_pointers));
+            /// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority.
+            job_data = std::move(const_cast<JobWithPriority &>(jobs.top()));
             jobs.pop();
 
             /// We don't run jobs after `shutdown` is set, but we have to properly dequeue all jobs and finish them.
@@ -412,18 +408,17 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
         ALLOW_ALLOCATIONS_IN_SCOPE;
 
         /// Set up tracing context for this thread by its parent context.
-        DB::OpenTelemetry::TracingContextHolder thread_trace_context("ThreadPool::worker()", parent_thread_trace_context);
+        DB::OpenTelemetry::TracingContextHolder thread_trace_context("ThreadPool::worker()", job_data->thread_trace_context);
 
         /// Run the job.
         try
         {
             if (DB::Exception::enable_job_stack_trace)
-                DB::Exception::thread_frame_pointers = std::move(thread_frame_pointers);
-
+                DB::Exception::thread_frame_pointers = std::move(job_data->frame_pointers);
 
             CurrentMetrics::Increment metric_active_pool_threads(metric_active_threads);
 
-            job();
+            job_data->job();
 
             if (thread_trace_context.root_span.isTraceEnabled())
             {
@@ -437,13 +432,13 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
                 else
                 {
                     /// If the thread name is not set, use the type name of the job instead
-                    thread_trace_context.root_span.operation_name = demangle(job.target_type().name());
+                    thread_trace_context.root_span.operation_name = demangle(job_data->job.target_type().name());
                 }
             }
 
             /// job should be reset before decrementing scheduled_jobs to
             /// ensure that the Job destroyed before wait() returns.
-            job = {};
+            job_data.reset();
         }
         catch (...)
         {
@@ -452,7 +447,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
 
             /// job should be reset before decrementing scheduled_jobs to
             /// ensure that the Job destroyed before wait() returns.
-            job = {};
+            job_data.reset();
         }
 
         job_is_done = true;
@@ -475,6 +470,7 @@ GlobalThreadPool::GlobalThreadPool(
     : FreeThreadPool(
         CurrentMetrics::GlobalThread,
         CurrentMetrics::GlobalThreadActive,
+        CurrentMetrics::GlobalThreadScheduled,
         max_threads_,
         max_free_threads_,
         queue_size_,
diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h
index f5721146e09..c8eefedd838 100644
--- a/src/Common/ThreadPool.h
+++ b/src/Common/ThreadPool.h
@@ -41,18 +41,20 @@ public:
     using Metric = CurrentMetrics::Metric;
 
     /// Maximum number of threads is based on the number of physical cores.
-    ThreadPoolImpl(Metric metric_threads_, Metric metric_active_threads_);
+    ThreadPoolImpl(Metric metric_threads_, Metric metric_active_threads_, Metric metric_scheduled_jobs_);
 
     /// Size is constant. Up to num_threads are created on demand and then run until shutdown.
     explicit ThreadPoolImpl(
         Metric metric_threads_,
         Metric metric_active_threads_,
+        Metric metric_scheduled_jobs_,
         size_t max_threads_);
 
     /// queue_size - maximum number of running plus scheduled jobs. It can be greater than max_threads. Zero means unlimited.
     ThreadPoolImpl(
         Metric metric_threads_,
         Metric metric_active_threads_,
+        Metric metric_scheduled_jobs_,
         size_t max_threads_,
         size_t max_free_threads_,
         size_t queue_size_,
@@ -113,6 +115,7 @@ private:
 
     Metric metric_threads;
     Metric metric_active_threads;
+    Metric metric_scheduled_jobs;
 
     size_t max_threads;
     size_t max_free_threads;
@@ -127,14 +130,19 @@ private:
     {
         Job job;
         Priority priority;
+        CurrentMetrics::Increment metric_increment;
         DB::OpenTelemetry::TracingContextOnThread thread_trace_context;
 
         /// Call stacks of all jobs' schedulings leading to this one
         std::vector<StackTrace::FramePointers> frame_pointers;
         bool enable_job_stack_trace = false;
 
-        JobWithPriority(Job job_, Priority priority_, const DB::OpenTelemetry::TracingContextOnThread & thread_trace_context_, bool capture_frame_pointers = false)
-            : job(job_), priority(priority_), thread_trace_context(thread_trace_context_), enable_job_stack_trace(capture_frame_pointers)
+        JobWithPriority(
+            Job job_, Priority priority_, CurrentMetrics::Metric metric,
+            const DB::OpenTelemetry::TracingContextOnThread & thread_trace_context_,
+            bool capture_frame_pointers)
+            : job(job_), priority(priority_), metric_increment(metric),
+            thread_trace_context(thread_trace_context_), enable_job_stack_trace(capture_frame_pointers)
         {
             if (!capture_frame_pointers)
                 return;
diff --git a/src/Common/examples/parallel_aggregation.cpp b/src/Common/examples/parallel_aggregation.cpp
index cf7a3197fef..20f5f1c5224 100644
--- a/src/Common/examples/parallel_aggregation.cpp
+++ b/src/Common/examples/parallel_aggregation.cpp
@@ -33,6 +33,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 struct SmallLock
@@ -254,7 +255,7 @@ int main(int argc, char ** argv)
 
     std::cerr << std::fixed << std::setprecision(2);
 
-    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, num_threads);
+    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads);
 
     Source data(n);
 
diff --git a/src/Common/examples/parallel_aggregation2.cpp b/src/Common/examples/parallel_aggregation2.cpp
index 1b0ad760490..e7136707dbd 100644
--- a/src/Common/examples/parallel_aggregation2.cpp
+++ b/src/Common/examples/parallel_aggregation2.cpp
@@ -29,6 +29,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 template <typename Map>
@@ -281,7 +282,7 @@ int main(int argc, char ** argv)
 
     std::cerr << std::fixed << std::setprecision(2);
 
-    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, num_threads);
+    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads);
 
     Source data(n);
 
diff --git a/src/Common/examples/thread_creation_latency.cpp b/src/Common/examples/thread_creation_latency.cpp
index 2434759c968..60fb27dc345 100644
--- a/src/Common/examples/thread_creation_latency.cpp
+++ b/src/Common/examples/thread_creation_latency.cpp
@@ -19,6 +19,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 namespace DB
@@ -72,7 +73,7 @@ int main(int argc, char ** argv)
 
     test(n, "Create and destroy ThreadPool each iteration", []
     {
-        ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 1);
+        ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1);
         tp.scheduleOrThrowOnError(f);
         tp.wait();
     });
@@ -93,7 +94,7 @@ int main(int argc, char ** argv)
     });
 
     {
-        ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 1);
+        ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1);
 
         test(n, "Schedule job for Threadpool each iteration", [&tp]
         {
@@ -103,7 +104,7 @@ int main(int argc, char ** argv)
     }
 
     {
-        ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 128);
+        ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 128);
 
         test(n, "Schedule job for Threadpool with 128 threads each iteration", [&tp]
         {
diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp
index dfcbf27b9fc..28d47593e78 100644
--- a/src/Common/tests/gtest_async_loader.cpp
+++ b/src/Common/tests/gtest_async_loader.cpp
@@ -21,6 +21,7 @@ namespace CurrentMetrics
 {
     extern const Metric TablesLoaderThreads;
     extern const Metric TablesLoaderThreadsActive;
+    extern const Metric TablesLoaderThreadsScheduled;
 }
 
 namespace DB::ErrorCodes
@@ -62,6 +63,7 @@ struct AsyncLoaderTest
                 .name = fmt::format("Pool{}", pool_id),
                 .metric_threads = CurrentMetrics::TablesLoaderThreads,
                 .metric_active_threads = CurrentMetrics::TablesLoaderThreadsActive,
+                .metric_scheduled_threads = CurrentMetrics::TablesLoaderThreadsScheduled,
                 .max_threads = desc.max_threads,
                 .priority = desc.priority
             });
diff --git a/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp b/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp
index f93017129dd..ddaff3382db 100644
--- a/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp
+++ b/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp
@@ -12,6 +12,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 TEST(ThreadPool, ConcurrentWait)
@@ -25,14 +26,14 @@ TEST(ThreadPool, ConcurrentWait)
     constexpr size_t num_threads = 4;
     constexpr size_t num_jobs = 4;
 
-    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, num_threads);
+    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads);
 
     for (size_t i = 0; i < num_jobs; ++i)
         pool.scheduleOrThrowOnError(worker);
 
     constexpr size_t num_waiting_threads = 4;
 
-    ThreadPool waiting_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, num_waiting_threads);
+    ThreadPool waiting_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_waiting_threads);
 
     for (size_t i = 0; i < num_waiting_threads; ++i)
         waiting_pool.scheduleOrThrowOnError([&pool] { pool.wait(); });
diff --git a/src/Common/tests/gtest_thread_pool_global_full.cpp b/src/Common/tests/gtest_thread_pool_global_full.cpp
index 1b2ded9c7e1..4507998be3c 100644
--- a/src/Common/tests/gtest_thread_pool_global_full.cpp
+++ b/src/Common/tests/gtest_thread_pool_global_full.cpp
@@ -11,6 +11,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 /// Test what happens if local ThreadPool cannot create a ThreadFromGlobalPool.
@@ -34,7 +35,7 @@ TEST(ThreadPool, GlobalFull1)
 
     auto func = [&] { ++counter; while (counter != num_jobs) {} };
 
-    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, num_jobs);
+    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_jobs);
 
     for (size_t i = 0; i < capacity; ++i)
         pool.scheduleOrThrowOnError(func);
@@ -72,11 +73,11 @@ TEST(ThreadPool, GlobalFull2)
     std::atomic<size_t> counter = 0;
     auto func = [&] { ++counter; while (counter != capacity + 1) {} };
 
-    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, capacity, 0, capacity);
+    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, capacity, 0, capacity);
     for (size_t i = 0; i < capacity; ++i)
         pool.scheduleOrThrowOnError(func);
 
-    ThreadPool another_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 1);
+    ThreadPool another_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1);
     EXPECT_THROW(another_pool.scheduleOrThrowOnError(func), DB::Exception);
 
     ++counter;
diff --git a/src/Common/tests/gtest_thread_pool_limit.cpp b/src/Common/tests/gtest_thread_pool_limit.cpp
index b47c8cdad18..66c6f8dc122 100644
--- a/src/Common/tests/gtest_thread_pool_limit.cpp
+++ b/src/Common/tests/gtest_thread_pool_limit.cpp
@@ -8,6 +8,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 /// Test for thread self-removal when number of free threads in pool is too large.
@@ -16,7 +17,7 @@ namespace CurrentMetrics
 template <typename Pool>
 int test()
 {
-    Pool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 10, 2, 10);
+    Pool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 10, 2, 10);
 
     std::atomic<int> counter{0};
     for (size_t i = 0; i < 10; ++i)
diff --git a/src/Common/tests/gtest_thread_pool_loop.cpp b/src/Common/tests/gtest_thread_pool_loop.cpp
index 170a888ff72..4257c0b73a5 100644
--- a/src/Common/tests/gtest_thread_pool_loop.cpp
+++ b/src/Common/tests/gtest_thread_pool_loop.cpp
@@ -9,6 +9,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 TEST(ThreadPool, Loop)
@@ -18,7 +19,7 @@ TEST(ThreadPool, Loop)
     for (size_t i = 0; i < 1000; ++i)
     {
         size_t threads = 16;
-        ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, threads);
+        ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, threads);
         for (size_t j = 0; j < threads; ++j)
             pool.scheduleOrThrowOnError([&] { ++res; });
         pool.wait();
diff --git a/src/Common/tests/gtest_thread_pool_schedule_exception.cpp b/src/Common/tests/gtest_thread_pool_schedule_exception.cpp
index d8e00b5314c..1b4f27e59e0 100644
--- a/src/Common/tests/gtest_thread_pool_schedule_exception.cpp
+++ b/src/Common/tests/gtest_thread_pool_schedule_exception.cpp
@@ -9,11 +9,12 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 static bool check()
 {
-    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 10);
+    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 10);
 
     /// The throwing thread.
     pool.scheduleOrThrowOnError([] { throw std::runtime_error("Hello, world!"); });
@@ -53,7 +54,7 @@ TEST(ThreadPool, ExceptionFromSchedule)
 
 static bool check2()
 {
-    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 2);
+    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 2);
 
     try
     {
diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp
index ef23e85e54b..4001673e01a 100644
--- a/src/Coordination/Standalone/Context.cpp
+++ b/src/Coordination/Standalone/Context.cpp
@@ -25,6 +25,7 @@ namespace CurrentMetrics
     extern const Metric BackgroundSchedulePoolSize;
     extern const Metric IOWriterThreads;
     extern const Metric IOWriterThreadsActive;
+    extern const Metric IOWriterThreadsScheduled;
 }
 
 namespace DB
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 96c084a261c..549711150b8 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -30,6 +30,7 @@ namespace CurrentMetrics
 {
     extern const Metric DatabaseOnDiskThreads;
     extern const Metric DatabaseOnDiskThreadsActive;
+    extern const Metric DatabaseOnDiskThreadsScheduled;
 }
 
 namespace DB
@@ -627,7 +628,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
     }
 
     /// Read and parse metadata in parallel
-    ThreadPool pool(CurrentMetrics::DatabaseOnDiskThreads, CurrentMetrics::DatabaseOnDiskThreadsActive);
+    ThreadPool pool(CurrentMetrics::DatabaseOnDiskThreads, CurrentMetrics::DatabaseOnDiskThreadsActive, CurrentMetrics::DatabaseOnDiskThreadsScheduled);
     for (const auto & file : metadata_files)
     {
         pool.scheduleOrThrowOnError([&]()
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 51d37b84e14..1b2a7b9d5e2 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -34,6 +34,7 @@ namespace CurrentMetrics
 {
     extern const Metric DatabaseOrdinaryThreads;
     extern const Metric DatabaseOrdinaryThreadsActive;
+    extern const Metric DatabaseOrdinaryThreadsScheduled;
 }
 
 namespace DB
@@ -106,7 +107,7 @@ void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr local_context, Loadin
     std::atomic<size_t> dictionaries_processed{0};
     std::atomic<size_t> tables_processed{0};
 
-    ThreadPool pool(CurrentMetrics::DatabaseOrdinaryThreads, CurrentMetrics::DatabaseOrdinaryThreadsActive);
+    ThreadPool pool(CurrentMetrics::DatabaseOrdinaryThreads, CurrentMetrics::DatabaseOrdinaryThreadsActive, CurrentMetrics::DatabaseOrdinaryThreadsScheduled);
 
     /// We must attach dictionaries before attaching tables
     /// because while we're attaching tables we may need to have some dictionaries attached
diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp
index f8b4e7fe33b..5cc7e77a6de 100644
--- a/src/Databases/TablesLoader.cpp
+++ b/src/Databases/TablesLoader.cpp
@@ -15,6 +15,7 @@ namespace CurrentMetrics
 {
     extern const Metric TablesLoaderThreads;
     extern const Metric TablesLoaderThreadsActive;
+    extern const Metric TablesLoaderThreadsScheduled;
 }
 
 namespace DB
@@ -32,7 +33,7 @@ TablesLoader::TablesLoader(ContextMutablePtr global_context_, Databases database
     , referential_dependencies("ReferentialDeps")
     , loading_dependencies("LoadingDeps")
     , all_loading_dependencies("LoadingDeps")
-    , pool(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive)
+    , pool(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, CurrentMetrics::TablesLoaderThreadsScheduled)
 {
     metadata.default_database = global_context->getCurrentDatabase();
     log = &Poco::Logger::get("TablesLoader");
diff --git a/src/Dictionaries/CacheDictionaryUpdateQueue.cpp b/src/Dictionaries/CacheDictionaryUpdateQueue.cpp
index 3aa3cbb9a4b..1e9b1da390a 100644
--- a/src/Dictionaries/CacheDictionaryUpdateQueue.cpp
+++ b/src/Dictionaries/CacheDictionaryUpdateQueue.cpp
@@ -9,6 +9,7 @@ namespace CurrentMetrics
 {
     extern const Metric CacheDictionaryThreads;
     extern const Metric CacheDictionaryThreadsActive;
+    extern const Metric CacheDictionaryThreadsScheduled;
 }
 
 namespace DB
@@ -33,7 +34,7 @@ CacheDictionaryUpdateQueue<dictionary_key_type>::CacheDictionaryUpdateQueue(
     , configuration(configuration_)
     , update_func(std::move(update_func_))
     , update_queue(configuration.max_update_queue_size)
-    , update_pool(CurrentMetrics::CacheDictionaryThreads, CurrentMetrics::CacheDictionaryThreadsActive, configuration.max_threads_for_updates)
+    , update_pool(CurrentMetrics::CacheDictionaryThreads, CurrentMetrics::CacheDictionaryThreadsActive, CurrentMetrics::CacheDictionaryThreadsScheduled, configuration.max_threads_for_updates)
 {
     for (size_t i = 0; i < configuration.max_threads_for_updates; ++i)
         update_pool.scheduleOrThrowOnError([this] { updateThreadFunction(); });
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 0556e2bb266..9c5dfeef6ca 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -32,6 +32,7 @@ namespace CurrentMetrics
 {
     extern const Metric HashedDictionaryThreads;
     extern const Metric HashedDictionaryThreadsActive;
+    extern const Metric HashedDictionaryThreadsScheduled;
 }
 
 namespace DB
@@ -59,7 +60,7 @@ public:
     explicit ParallelDictionaryLoader(HashedDictionary & dictionary_)
         : dictionary(dictionary_)
         , shards(dictionary.configuration.shards)
-        , pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, shards)
+        , pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards)
         , shards_queues(shards)
     {
         UInt64 backlog = dictionary.configuration.shard_load_queue_backlog;
@@ -229,7 +230,7 @@ HashedDictionary<dictionary_key_type, sparse, sharded>::~HashedDictionary()
         return;
 
     size_t shards = std::max<size_t>(configuration.shards, 1);
-    ThreadPool pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, shards);
+    ThreadPool pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards);
 
     size_t hash_tables_count = 0;
     auto schedule_destroy = [&hash_tables_count, &pool](auto & container)
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 6911fd86db2..5fcf9a9dce4 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -38,6 +38,7 @@ namespace CurrentMetrics
 {
     extern const Metric IDiskCopierThreads;
     extern const Metric IDiskCopierThreadsActive;
+    extern const Metric IDiskCopierThreadsScheduled;
 }
 
 namespace DB
@@ -117,13 +118,13 @@ public:
     /// Default constructor.
     IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
         : name(name_)
-        , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, config.getUInt(config_prefix + ".thread_pool_size", 16))
+        , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, config.getUInt(config_prefix + ".thread_pool_size", 16))
     {
     }
 
     explicit IDisk(const String & name_)
         : name(name_)
-        , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, 16)
+        , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, 16)
     {
     }
 
diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp
index cd3f2d8dea0..15cbcdf5b9b 100644
--- a/src/Disks/IO/ThreadPoolReader.cpp
+++ b/src/Disks/IO/ThreadPoolReader.cpp
@@ -64,6 +64,7 @@ namespace CurrentMetrics
     extern const Metric Read;
     extern const Metric ThreadPoolFSReaderThreads;
     extern const Metric ThreadPoolFSReaderThreadsActive;
+    extern const Metric ThreadPoolFSReaderThreadsScheduled;
 }
 
 
@@ -88,7 +89,7 @@ static bool hasBugInPreadV2()
 #endif
 
 ThreadPoolReader::ThreadPoolReader(size_t pool_size, size_t queue_size_)
-    : pool(std::make_unique<ThreadPool>(CurrentMetrics::ThreadPoolFSReaderThreads, CurrentMetrics::ThreadPoolFSReaderThreadsActive, pool_size, pool_size, queue_size_))
+    : pool(std::make_unique<ThreadPool>(CurrentMetrics::ThreadPoolFSReaderThreads, CurrentMetrics::ThreadPoolFSReaderThreadsActive, CurrentMetrics::ThreadPoolFSReaderThreadsScheduled, pool_size, pool_size, queue_size_))
 {
 }
 
diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
index 0ec5e0fd6c1..ac599bb3547 100644
--- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
+++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
@@ -29,6 +29,7 @@ namespace CurrentMetrics
     extern const Metric RemoteRead;
     extern const Metric ThreadPoolRemoteFSReaderThreads;
     extern const Metric ThreadPoolRemoteFSReaderThreadsActive;
+    extern const Metric ThreadPoolRemoteFSReaderThreadsScheduled;
 }
 
 namespace DB
@@ -59,6 +60,7 @@ namespace
 ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_)
     : pool(std::make_unique<ThreadPool>(CurrentMetrics::ThreadPoolRemoteFSReaderThreads,
                                         CurrentMetrics::ThreadPoolRemoteFSReaderThreadsActive,
+                                        CurrentMetrics::ThreadPoolRemoteFSReaderThreadsScheduled,
                                         pool_size, pool_size, queue_size_))
 {
 }
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index fcb82daca95..068e2aebab1 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -14,11 +14,12 @@
 #include <Interpreters/Context.h>
 #include <Common/logger_useful.h>
 
+
 namespace CurrentMetrics
 {
     extern const Metric ObjectStorageAzureThreads;
     extern const Metric ObjectStorageAzureThreadsActive;
-
+    extern const Metric ObjectStorageAzureThreadsScheduled;
 }
 
 namespace DB
@@ -45,6 +46,7 @@ public:
         : IObjectStorageIteratorAsync(
             CurrentMetrics::ObjectStorageAzureThreads,
             CurrentMetrics::ObjectStorageAzureThreadsActive,
+            CurrentMetrics::ObjectStorageAzureThreadsScheduled,
             "ListObjectAzure")
         , client(client_)
     {
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
index 33b98cd328c..0314e0a7e92 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
@@ -13,6 +13,7 @@ namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 namespace DB
@@ -156,7 +157,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchema()
     {
         LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name);
 
-        ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive};
+        ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled};
 
         for (const auto & root : data_roots)
             if (disk->exists(root))
@@ -355,7 +356,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage *
 {
     LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name);
 
-    ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive};
+    ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled};
     auto restore_files = [this, &source_object_storage, &restore_information, &pool](const RelativePathsWithMetadata & objects)
     {
         std::vector<String> keys_names;
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
index 7425f629a5a..990e66fc4e5 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
@@ -2,6 +2,7 @@
 
 #include <Common/logger_useful.h>
 
+
 namespace DB
 {
 
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
index b0dd3cef39c..a6abe03bac9 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
@@ -6,6 +6,7 @@
 #include <mutex>
 #include <Common/CurrentMetrics.h>
 
+
 namespace DB
 {
 
@@ -15,8 +16,9 @@ public:
     IObjectStorageIteratorAsync(
         CurrentMetrics::Metric threads_metric,
         CurrentMetrics::Metric threads_active_metric,
+        CurrentMetrics::Metric threads_scheduled_metric,
         const std::string & thread_name)
-        : list_objects_pool(threads_metric, threads_active_metric, 1)
+        : list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1)
         , list_objects_scheduler(threadPoolCallbackRunner<BatchAndHasNext>(list_objects_pool, thread_name))
     {
     }
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index b36185249af..3af316bf0cf 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -37,6 +37,7 @@ namespace CurrentMetrics
 {
     extern const Metric ObjectStorageS3Threads;
     extern const Metric ObjectStorageS3ThreadsActive;
+    extern const Metric ObjectStorageS3ThreadsScheduled;
 }
 
 
@@ -105,6 +106,7 @@ public:
         : IObjectStorageIteratorAsync(
             CurrentMetrics::ObjectStorageS3Threads,
             CurrentMetrics::ObjectStorageS3ThreadsActive,
+            CurrentMetrics::ObjectStorageS3ThreadsScheduled,
             "ListObjectS3")
         , client(client_)
     {
diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp
index 6a0e953f0ef..4a0ef173669 100644
--- a/src/IO/SharedThreadPools.cpp
+++ b/src/IO/SharedThreadPools.cpp
@@ -7,14 +7,19 @@ namespace CurrentMetrics
 {
     extern const Metric IOThreads;
     extern const Metric IOThreadsActive;
+    extern const Metric IOThreadsScheduled;
     extern const Metric BackupsIOThreads;
     extern const Metric BackupsIOThreadsActive;
+    extern const Metric BackupsIOThreadsScheduled;
     extern const Metric MergeTreePartsLoaderThreads;
     extern const Metric MergeTreePartsLoaderThreadsActive;
+    extern const Metric MergeTreePartsLoaderThreadsScheduled;
     extern const Metric MergeTreePartsCleanerThreads;
     extern const Metric MergeTreePartsCleanerThreadsActive;
+    extern const Metric MergeTreePartsCleanerThreadsScheduled;
     extern const Metric MergeTreeOutdatedPartsLoaderThreads;
     extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive;
+    extern const Metric MergeTreeOutdatedPartsLoaderThreadsScheduled;
 }
 
 namespace DB
@@ -29,10 +34,12 @@ namespace ErrorCodes
 StaticThreadPool::StaticThreadPool(
     const String & name_,
     CurrentMetrics::Metric threads_metric_,
-    CurrentMetrics::Metric threads_active_metric_)
+    CurrentMetrics::Metric threads_active_metric_,
+    CurrentMetrics::Metric threads_scheduled_metric_)
     : name(name_)
     , threads_metric(threads_metric_)
     , threads_active_metric(threads_active_metric_)
+    , threads_scheduled_metric(threads_scheduled_metric_)
 {
 }
 
@@ -47,6 +54,7 @@ void StaticThreadPool::initialize(size_t max_threads, size_t max_free_threads, s
     instance = std::make_unique<ThreadPool>(
         threads_metric,
         threads_active_metric,
+        threads_scheduled_metric,
         max_threads,
         max_free_threads,
         queue_size,
@@ -110,31 +118,31 @@ void StaticThreadPool::setMaxTurboThreads(size_t max_threads_turbo_)
 
 StaticThreadPool & getIOThreadPool()
 {
-    static StaticThreadPool instance("IOThreadPool", CurrentMetrics::IOThreads, CurrentMetrics::IOThreadsActive);
+    static StaticThreadPool instance("IOThreadPool", CurrentMetrics::IOThreads, CurrentMetrics::IOThreadsActive, CurrentMetrics::IOThreadsScheduled);
     return instance;
 }
 
 StaticThreadPool & getBackupsIOThreadPool()
 {
-    static StaticThreadPool instance("BackupsIOThreadPool", CurrentMetrics::BackupsIOThreads, CurrentMetrics::BackupsIOThreadsActive);
+    static StaticThreadPool instance("BackupsIOThreadPool", CurrentMetrics::BackupsIOThreads, CurrentMetrics::BackupsIOThreadsActive, CurrentMetrics::BackupsIOThreadsScheduled);
     return instance;
 }
 
 StaticThreadPool & getActivePartsLoadingThreadPool()
 {
-    static StaticThreadPool instance("MergeTreePartsLoaderThreadPool", CurrentMetrics::MergeTreePartsLoaderThreads, CurrentMetrics::MergeTreePartsLoaderThreadsActive);
+    static StaticThreadPool instance("MergeTreePartsLoaderThreadPool", CurrentMetrics::MergeTreePartsLoaderThreads, CurrentMetrics::MergeTreePartsLoaderThreadsActive, CurrentMetrics::MergeTreeOutdatedPartsLoaderThreadsScheduled);
     return instance;
 }
 
 StaticThreadPool & getPartsCleaningThreadPool()
 {
-    static StaticThreadPool instance("MergeTreePartsCleanerThreadPool", CurrentMetrics::MergeTreePartsCleanerThreads, CurrentMetrics::MergeTreePartsCleanerThreadsActive);
+    static StaticThreadPool instance("MergeTreePartsCleanerThreadPool", CurrentMetrics::MergeTreePartsCleanerThreads, CurrentMetrics::MergeTreePartsCleanerThreadsActive, CurrentMetrics::MergeTreePartsCleanerThreadsScheduled);
     return instance;
 }
 
 StaticThreadPool & getOutdatedPartsLoadingThreadPool()
 {
-    static StaticThreadPool instance("MergeTreeOutdatedPartsLoaderThreadPool", CurrentMetrics::MergeTreeOutdatedPartsLoaderThreads, CurrentMetrics::MergeTreeOutdatedPartsLoaderThreadsActive);
+    static StaticThreadPool instance("MergeTreeOutdatedPartsLoaderThreadPool", CurrentMetrics::MergeTreeOutdatedPartsLoaderThreads, CurrentMetrics::MergeTreeOutdatedPartsLoaderThreadsActive, CurrentMetrics::MergeTreeOutdatedPartsLoaderThreadsScheduled);
     return instance;
 }
 
diff --git a/src/IO/SharedThreadPools.h b/src/IO/SharedThreadPools.h
index 188a2a4f003..f37f3acefe7 100644
--- a/src/IO/SharedThreadPools.h
+++ b/src/IO/SharedThreadPools.h
@@ -8,6 +8,7 @@
 #include <memory>
 #include <mutex>
 
+
 namespace DB
 {
 
@@ -17,7 +18,8 @@ public:
     StaticThreadPool(
         const String & name_,
         CurrentMetrics::Metric threads_metric_,
-        CurrentMetrics::Metric threads_active_metric_);
+        CurrentMetrics::Metric threads_active_metric_,
+        CurrentMetrics::Metric threads_scheduled_metric_);
 
     ThreadPool & get();
 
@@ -34,6 +36,7 @@ private:
     const String name;
     const CurrentMetrics::Metric threads_metric;
     const CurrentMetrics::Metric threads_active_metric;
+    const CurrentMetrics::Metric threads_scheduled_metric;
 
     std::unique_ptr<ThreadPool> instance;
     std::mutex mutex;
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index 129c02f032b..e2ddfbe3418 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -63,6 +63,7 @@ namespace CurrentMetrics
     extern const Metric TemporaryFilesForAggregation;
     extern const Metric AggregatorThreads;
     extern const Metric AggregatorThreadsActive;
+    extern const Metric AggregatorThreadsScheduled;
 }
 
 namespace DB
@@ -2466,7 +2467,7 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b
     std::unique_ptr<ThreadPool> thread_pool;
     if (max_threads > 1 && data_variants.sizeWithoutOverflowRow() > 100000  /// TODO Make a custom threshold.
         && data_variants.isTwoLevel())                      /// TODO Use the shared thread pool with the `merge` function.
-        thread_pool = std::make_unique<ThreadPool>(CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, max_threads);
+        thread_pool = std::make_unique<ThreadPool>(CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, max_threads);
 
     if (data_variants.without_key)
         blocks.emplace_back(prepareBlockAndFillWithoutKey(
@@ -2656,7 +2657,7 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl(
 void NO_INLINE Aggregator::mergeWithoutKeyDataImpl(
     ManyAggregatedDataVariants & non_empty_data) const
 {
-    ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, params.max_threads};
+    ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, params.max_threads};
 
     AggregatedDataVariantsPtr & res = non_empty_data[0];
 
@@ -3144,7 +3145,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari
 
         std::unique_ptr<ThreadPool> thread_pool;
         if (max_threads > 1 && total_input_rows > 100000)    /// TODO Make a custom threshold.
-            thread_pool = std::make_unique<ThreadPool>(CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, max_threads);
+            thread_pool = std::make_unique<ThreadPool>(CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, max_threads);
 
         for (const auto & bucket_blocks : bucket_to_blocks)
         {
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index 04285a06a65..a0750122a5c 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -39,6 +39,7 @@ namespace CurrentMetrics
     extern const Metric PendingAsyncInsert;
     extern const Metric AsynchronousInsertThreads;
     extern const Metric AsynchronousInsertThreadsActive;
+    extern const Metric AsynchronousInsertThreadsScheduled;
 }
 
 namespace ProfileEvents
@@ -175,7 +176,7 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo
     , pool_size(pool_size_)
     , flush_on_shutdown(flush_on_shutdown_)
     , queue_shards(pool_size)
-    , pool(CurrentMetrics::AsynchronousInsertThreads, CurrentMetrics::AsynchronousInsertThreadsActive, pool_size)
+    , pool(CurrentMetrics::AsynchronousInsertThreads, CurrentMetrics::AsynchronousInsertThreadsActive, CurrentMetrics::AsynchronousInsertThreadsScheduled, pool_size)
 {
     if (!pool_size)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "pool_size cannot be zero");
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index c6640d582a2..c13be020933 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -135,10 +135,13 @@ namespace CurrentMetrics
     extern const Metric BackgroundCommonPoolSize;
     extern const Metric MarksLoaderThreads;
     extern const Metric MarksLoaderThreadsActive;
+    extern const Metric MarksLoaderThreadsScheduled;
     extern const Metric IOPrefetchThreads;
     extern const Metric IOPrefetchThreadsActive;
+    extern const Metric IOPrefetchThreadsScheduled;
     extern const Metric IOWriterThreads;
     extern const Metric IOWriterThreadsActive;
+    extern const Metric IOWriterThreadsScheduled;
 }
 
 
@@ -2542,7 +2545,7 @@ ThreadPool & Context::getLoadMarksThreadpool() const
         auto pool_size = config.getUInt(".load_marks_threadpool_pool_size", 50);
         auto queue_size = config.getUInt(".load_marks_threadpool_queue_size", 1000000);
         shared->load_marks_threadpool = std::make_unique<ThreadPool>(
-            CurrentMetrics::MarksLoaderThreads, CurrentMetrics::MarksLoaderThreadsActive, pool_size, pool_size, queue_size);
+            CurrentMetrics::MarksLoaderThreads, CurrentMetrics::MarksLoaderThreadsActive, CurrentMetrics::MarksLoaderThreadsScheduled, pool_size, pool_size, queue_size);
     });
 
     return *shared->load_marks_threadpool;
@@ -2725,7 +2728,7 @@ ThreadPool & Context::getPrefetchThreadpool() const
         auto pool_size = config.getUInt(".prefetch_threadpool_pool_size", 100);
         auto queue_size = config.getUInt(".prefetch_threadpool_queue_size", 1000000);
         shared->prefetch_threadpool = std::make_unique<ThreadPool>(
-            CurrentMetrics::IOPrefetchThreads, CurrentMetrics::IOPrefetchThreadsActive, pool_size, pool_size, queue_size);
+            CurrentMetrics::IOPrefetchThreads, CurrentMetrics::IOPrefetchThreadsActive, CurrentMetrics::IOPrefetchThreadsScheduled, pool_size, pool_size, queue_size);
     });
 
     return *shared->prefetch_threadpool;
@@ -4765,7 +4768,7 @@ ThreadPool & Context::getThreadPoolWriter() const
         auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000);
 
         shared->threadpool_writer = std::make_unique<ThreadPool>(
-            CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, pool_size, pool_size, queue_size);
+            CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, CurrentMetrics::IOWriterThreadsScheduled, pool_size, pool_size, queue_size);
     });
 
     return *shared->threadpool_writer;
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index a9930036e7e..a43c61f3d66 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -46,6 +46,7 @@ namespace CurrentMetrics
 {
     extern const Metric DDLWorkerThreads;
     extern const Metric DDLWorkerThreadsActive;
+    extern const Metric DDLWorkerThreadsScheduled;
 }
 
 namespace DB
@@ -93,7 +94,7 @@ DDLWorker::DDLWorker(
     {
         LOG_WARNING(log, "DDLWorker is configured to use multiple threads. "
                          "It's not recommended because queries can be reordered. Also it may cause some unknown issues to appear.");
-        worker_pool = std::make_unique<ThreadPool>(CurrentMetrics::DDLWorkerThreads, CurrentMetrics::DDLWorkerThreadsActive, pool_size);
+        worker_pool = std::make_unique<ThreadPool>(CurrentMetrics::DDLWorkerThreads, CurrentMetrics::DDLWorkerThreadsActive, CurrentMetrics::DDLWorkerThreadsScheduled, pool_size);
     }
 
     queue_dir = zk_root_dir;
@@ -1113,7 +1114,7 @@ void DDLWorker::runMainThread()
         /// It will wait for all threads in pool to finish and will not rethrow exceptions (if any).
         /// We create new thread pool to forget previous exceptions.
         if (1 < pool_size)
-            worker_pool = std::make_unique<ThreadPool>(CurrentMetrics::DDLWorkerThreads, CurrentMetrics::DDLWorkerThreadsActive, pool_size);
+            worker_pool = std::make_unique<ThreadPool>(CurrentMetrics::DDLWorkerThreads, CurrentMetrics::DDLWorkerThreadsActive, CurrentMetrics::DDLWorkerThreadsScheduled, pool_size);
         /// Clear other in-memory state, like server just started.
         current_tasks.clear();
         last_skipped_entry_name.reset();
@@ -1152,7 +1153,7 @@ void DDLWorker::runMainThread()
                 initialized = false;
                 /// Wait for pending async tasks
                 if (1 < pool_size)
-                    worker_pool = std::make_unique<ThreadPool>(CurrentMetrics::DDLWorkerThreads, CurrentMetrics::DDLWorkerThreadsActive, pool_size);
+                    worker_pool = std::make_unique<ThreadPool>(CurrentMetrics::DDLWorkerThreads, CurrentMetrics::DDLWorkerThreadsActive, CurrentMetrics::DDLWorkerThreadsScheduled, pool_size);
                 LOG_INFO(log, "Lost ZooKeeper connection, will try to connect again: {}", getCurrentExceptionMessage(true));
             }
             else
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 8dc90e1a2f1..e0ce2b99b14 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -44,6 +44,7 @@ namespace CurrentMetrics
     extern const Metric TablesToDropQueueSize;
     extern const Metric DatabaseCatalogThreads;
     extern const Metric DatabaseCatalogThreadsActive;
+    extern const Metric DatabaseCatalogThreadsScheduled;
 }
 
 namespace DB
@@ -1024,7 +1025,7 @@ void DatabaseCatalog::loadMarkedAsDroppedTables()
 
     LOG_INFO(log, "Found {} partially dropped tables. Will load them and retry removal.", dropped_metadata.size());
 
-    ThreadPool pool(CurrentMetrics::DatabaseCatalogThreads, CurrentMetrics::DatabaseCatalogThreadsActive);
+    ThreadPool pool(CurrentMetrics::DatabaseCatalogThreads, CurrentMetrics::DatabaseCatalogThreadsActive, CurrentMetrics::DatabaseCatalogThreadsScheduled);
     for (const auto & elem : dropped_metadata)
     {
         pool.scheduleOrThrowOnError([&]()
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index b42745b726c..ff95b3014dc 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -76,6 +76,7 @@ namespace CurrentMetrics
 {
     extern const Metric RestartReplicaThreads;
     extern const Metric RestartReplicaThreadsActive;
+    extern const Metric RestartReplicaThreadsScheduled;
 }
 
 namespace DB
@@ -809,7 +810,7 @@ void InterpreterSystemQuery::restartReplicas(ContextMutablePtr system_context)
 
     size_t threads = std::min(static_cast<size_t>(getNumberOfPhysicalCPUCores()), replica_names.size());
     LOG_DEBUG(log, "Will restart {} replicas using {} threads", replica_names.size(), threads);
-    ThreadPool pool(CurrentMetrics::RestartReplicaThreads, CurrentMetrics::RestartReplicaThreadsActive, threads);
+    ThreadPool pool(CurrentMetrics::RestartReplicaThreads, CurrentMetrics::RestartReplicaThreadsActive, CurrentMetrics::RestartReplicaThreadsScheduled, threads);
 
     for (auto & replica : replica_names)
     {
diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp
index 3612dbfdc4e..44b57396bd5 100644
--- a/src/Interpreters/loadMetadata.cpp
+++ b/src/Interpreters/loadMetadata.cpp
@@ -32,6 +32,7 @@ namespace CurrentMetrics
 {
     extern const Metric StartupSystemTablesThreads;
     extern const Metric StartupSystemTablesThreadsActive;
+    extern const Metric StartupSystemTablesThreadsScheduled;
 }
 
 namespace DB
@@ -377,7 +378,7 @@ static void maybeConvertOrdinaryDatabaseToAtomic(ContextMutablePtr context, cons
         if (!tables_started)
         {
             /// It's not quite correct to run DDL queries while database is not started up.
-            ThreadPool pool(CurrentMetrics::StartupSystemTablesThreads, CurrentMetrics::StartupSystemTablesThreadsActive);
+            ThreadPool pool(CurrentMetrics::StartupSystemTablesThreads, CurrentMetrics::StartupSystemTablesThreadsActive, CurrentMetrics::StartupSystemTablesThreadsScheduled);
             DatabaseCatalog::instance().getSystemDatabase()->startupTables(pool, LoadingStrictnessLevel::FORCE_RESTORE);
         }
 
@@ -472,7 +473,7 @@ void convertDatabasesEnginesIfNeed(ContextMutablePtr context)
 
 void startupSystemTables()
 {
-    ThreadPool pool(CurrentMetrics::StartupSystemTablesThreads, CurrentMetrics::StartupSystemTablesThreadsActive);
+    ThreadPool pool(CurrentMetrics::StartupSystemTablesThreads, CurrentMetrics::StartupSystemTablesThreadsActive, CurrentMetrics::StartupSystemTablesThreadsScheduled);
     DatabaseCatalog::instance().getSystemDatabase()->startupTables(pool, LoadingStrictnessLevel::FORCE_RESTORE);
 }
 
diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h
index eb90b61cf31..2b943110273 100644
--- a/src/Interpreters/threadPoolCallbackRunner.h
+++ b/src/Interpreters/threadPoolCallbackRunner.h
@@ -24,9 +24,10 @@ ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool &
             if (thread_group)
                 CurrentThread::attachToGroup(thread_group);
 
-            SCOPE_EXIT_SAFE({
+            SCOPE_EXIT_SAFE(
+            {
                 {
-                    /// Release all captutred resources before detaching thread group
+                    /// Release all captured resources before detaching thread group
                     /// Releasing has to use proper memory tracker which has been set here before callback
 
                     [[maybe_unused]] auto tmp = std::move(my_callback);
@@ -34,7 +35,6 @@ ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool &
 
                 if (thread_group)
                     CurrentThread::detachFromGroupIfNotDetached();
-
             });
 
             setThreadName(thread_name.data());
diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index 37af391fba3..812b64ccdb8 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -22,6 +22,7 @@ namespace CurrentMetrics
 {
     extern const Metric QueryPipelineExecutorThreads;
     extern const Metric QueryPipelineExecutorThreadsActive;
+    extern const Metric QueryPipelineExecutorThreadsScheduled;
 }
 
 namespace DB
@@ -332,7 +333,7 @@ void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_
     tasks.fill(queue);
 
     if (num_threads > 1)
-        pool = std::make_unique<ThreadPool>(CurrentMetrics::QueryPipelineExecutorThreads, CurrentMetrics::QueryPipelineExecutorThreadsActive, num_threads);
+        pool = std::make_unique<ThreadPool>(CurrentMetrics::QueryPipelineExecutorThreads, CurrentMetrics::QueryPipelineExecutorThreadsActive, CurrentMetrics::QueryPipelineExecutorThreadsScheduled, num_threads);
 }
 
 void PipelineExecutor::spawnThreads()
diff --git a/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp b/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp
index c5f8059f93c..19ed5c94dfd 100644
--- a/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp
@@ -28,6 +28,7 @@ namespace CurrentMetrics
 {
     extern const Metric DWARFReaderThreads;
     extern const Metric DWARFReaderThreadsActive;
+    extern const Metric DWARFReaderThreadsScheduled;
 }
 
 namespace DB
@@ -238,7 +239,7 @@ void DWARFBlockInputFormat::initializeIfNeeded()
 
     LOG_DEBUG(&Poco::Logger::get("DWARF"), "{} units, reading in {} threads", units_queue.size(), num_threads);
 
-    pool.emplace(CurrentMetrics::DWARFReaderThreads, CurrentMetrics::DWARFReaderThreadsActive, num_threads);
+    pool.emplace(CurrentMetrics::DWARFReaderThreads, CurrentMetrics::DWARFReaderThreadsActive, CurrentMetrics::DWARFReaderThreadsScheduled, num_threads);
     for (size_t i = 0; i < num_threads; ++i)
         pool->scheduleOrThrowOnError(
             [this, thread_group = CurrentThread::getGroup()]()
diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
index bf8968dd376..c2f08479730 100644
--- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
+++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
@@ -21,6 +21,7 @@ namespace CurrentMetrics
 {
     extern const Metric ParallelFormattingOutputFormatThreads;
     extern const Metric ParallelFormattingOutputFormatThreadsActive;
+    extern const Metric ParallelFormattingOutputFormatThreadsScheduled;
 }
 
 namespace DB
@@ -80,7 +81,7 @@ public:
     explicit ParallelFormattingOutputFormat(Params params)
         : IOutputFormat(params.header, params.out)
         , internal_formatter_creator(params.internal_formatter_creator)
-        , pool(CurrentMetrics::ParallelFormattingOutputFormatThreads, CurrentMetrics::ParallelFormattingOutputFormatThreadsActive, params.max_threads_for_parallel_formatting)
+        , pool(CurrentMetrics::ParallelFormattingOutputFormatThreads, CurrentMetrics::ParallelFormattingOutputFormatThreadsActive, CurrentMetrics::ParallelFormattingOutputFormatThreadsScheduled, params.max_threads_for_parallel_formatting)
 
     {
         LOG_TEST(&Poco::Logger::get("ParallelFormattingOutputFormat"), "Parallel formatting is being used");
diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
index f61dc3fbc78..be9e50b854b 100644
--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
@@ -18,6 +18,7 @@ namespace CurrentMetrics
 {
     extern const Metric ParallelParsingInputFormatThreads;
     extern const Metric ParallelParsingInputFormatThreadsActive;
+    extern const Metric ParallelParsingInputFormatThreadsScheduled;
 }
 
 namespace DB
@@ -101,7 +102,7 @@ public:
         , min_chunk_bytes(params.min_chunk_bytes)
         , max_block_size(params.max_block_size)
         , is_server(params.is_server)
-        , pool(CurrentMetrics::ParallelParsingInputFormatThreads, CurrentMetrics::ParallelParsingInputFormatThreadsActive, params.max_threads)
+        , pool(CurrentMetrics::ParallelParsingInputFormatThreads, CurrentMetrics::ParallelParsingInputFormatThreadsActive, CurrentMetrics::ParallelParsingInputFormatThreadsScheduled, params.max_threads)
     {
         // One unit for each thread, including segmentator and reader, plus a
         // couple more units so that the segmentation thread doesn't spuriously
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index c9ac2438fc0..d37c2dc1160 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -28,6 +28,7 @@ namespace CurrentMetrics
 {
     extern const Metric ParquetDecoderThreads;
     extern const Metric ParquetDecoderThreadsActive;
+    extern const Metric ParquetDecoderThreadsScheduled;
 }
 
 namespace DB
@@ -377,7 +378,7 @@ ParquetBlockInputFormat::ParquetBlockInputFormat(
     , pending_chunks(PendingChunk::Compare { .row_group_first = format_settings_.parquet.preserve_order })
 {
     if (max_decoding_threads > 1)
-        pool = std::make_unique<ThreadPool>(CurrentMetrics::ParquetDecoderThreads, CurrentMetrics::ParquetDecoderThreadsActive, max_decoding_threads);
+        pool = std::make_unique<ThreadPool>(CurrentMetrics::ParquetDecoderThreads, CurrentMetrics::ParquetDecoderThreadsActive, CurrentMetrics::ParquetDecoderThreadsScheduled, max_decoding_threads);
 }
 
 ParquetBlockInputFormat::~ParquetBlockInputFormat()
diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
index fbf8b3a7c87..fb9f853dc01 100644
--- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
@@ -13,6 +13,7 @@ namespace CurrentMetrics
 {
     extern const Metric ParquetEncoderThreads;
     extern const Metric ParquetEncoderThreadsActive;
+    extern const Metric ParquetEncoderThreadsScheduled;
 }
 
 namespace DB
@@ -79,7 +80,9 @@ ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Blo
     {
         if (format_settings.parquet.parallel_encoding && format_settings.max_threads > 1)
             pool = std::make_unique<ThreadPool>(
-                CurrentMetrics::ParquetEncoderThreads, CurrentMetrics::ParquetEncoderThreadsActive,
+                CurrentMetrics::ParquetEncoderThreads,
+                CurrentMetrics::ParquetEncoderThreadsActive,
+                CurrentMetrics::ParquetEncoderThreadsScheduled,
                 format_settings.max_threads);
 
         using C = FormatSettings::ParquetCompression;
diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h
index 3420cdeaa50..61a6acd6bc8 100644
--- a/src/Processors/Transforms/AggregatingTransform.h
+++ b/src/Processors/Transforms/AggregatingTransform.h
@@ -13,6 +13,7 @@ namespace CurrentMetrics
 {
     extern const Metric DestroyAggregatesThreads;
     extern const Metric DestroyAggregatesThreadsActive;
+    extern const Metric DestroyAggregatesThreadsScheduled;
 }
 
 namespace DB
@@ -95,6 +96,7 @@ struct ManyAggregatedData
             const auto pool = std::make_unique<ThreadPool>(
                 CurrentMetrics::DestroyAggregatesThreads,
                 CurrentMetrics::DestroyAggregatesThreadsActive,
+                CurrentMetrics::DestroyAggregatesThreadsScheduled,
                 variants.size());
 
             for (auto && variant : variants)
diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index 65a4aa2741a..8c58e304203 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -31,8 +31,6 @@
 #include <Common/CurrentThread.h>
 #include <Common/createHardLink.h>
 #include <Common/logger_useful.h>
-#include <base/range.h>
-#include <base/scope_guard.h>
 #include <Common/scope_guard_safe.h>
 
 #include <filesystem>
@@ -43,6 +41,7 @@ namespace CurrentMetrics
     extern const Metric DistributedSend;
     extern const Metric DistributedInsertThreads;
     extern const Metric DistributedInsertThreadsActive;
+    extern const Metric DistributedInsertThreadsScheduled;
 }
 
 namespace ProfileEvents
@@ -465,6 +464,7 @@ void DistributedSink::writeSync(const Block & block)
         pool.emplace(
             CurrentMetrics::DistributedInsertThreads,
             CurrentMetrics::DistributedInsertThreadsActive,
+            CurrentMetrics::DistributedInsertThreadsScheduled,
             max_threads, max_threads, jobs_count);
 
         if (!throttler && (settings.max_network_bandwidth || settings.max_network_bytes))
diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp
index 1587354452e..f03136e4edf 100644
--- a/src/Storages/Hive/StorageHive.cpp
+++ b/src/Storages/Hive/StorageHive.cpp
@@ -46,6 +46,7 @@ namespace CurrentMetrics
 {
     extern const Metric StorageHiveThreads;
     extern const Metric StorageHiveThreadsActive;
+    extern const Metric StorageHiveThreadsScheduled;
 }
 
 namespace DB
@@ -861,7 +862,7 @@ HiveFiles StorageHive::collectHiveFiles(
     Int64 hive_max_query_partitions = context_->getSettings().max_partitions_to_read;
     /// Mutext to protect hive_files, which maybe appended in multiple threads
     std::mutex hive_files_mutex;
-    ThreadPool pool{CurrentMetrics::StorageHiveThreads, CurrentMetrics::StorageHiveThreadsActive, max_threads};
+    ThreadPool pool{CurrentMetrics::StorageHiveThreads, CurrentMetrics::StorageHiveThreadsActive, CurrentMetrics::StorageHiveThreadsScheduled, max_threads};
     if (!partitions.empty())
     {
         for (const auto & partition : partitions)
diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
index 1a7a0b5b2c1..a5f503718b6 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
@@ -14,6 +14,7 @@ namespace CurrentMetrics
 {
     extern const Metric MergeTreeBackgroundExecutorThreads;
     extern const Metric MergeTreeBackgroundExecutorThreadsActive;
+    extern const Metric MergeTreeBackgroundExecutorThreadsScheduled;
 }
 
 namespace DB
@@ -40,7 +41,7 @@ MergeTreeBackgroundExecutor<Queue>::MergeTreeBackgroundExecutor(
     , metric(metric_)
     , max_tasks_metric(max_tasks_metric_, 2 * max_tasks_count) // active + pending
     , pool(std::make_unique<ThreadPool>(
-          CurrentMetrics::MergeTreeBackgroundExecutorThreads, CurrentMetrics::MergeTreeBackgroundExecutorThreadsActive))
+          CurrentMetrics::MergeTreeBackgroundExecutorThreads, CurrentMetrics::MergeTreeBackgroundExecutorThreadsActive, CurrentMetrics::MergeTreeBackgroundExecutorThreadsScheduled))
 {
     if (max_tasks_count == 0)
         throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Task count for MergeTreeBackgroundExecutor must not be zero");
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index e521491c2d5..d1a285b8818 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -53,6 +53,7 @@ namespace CurrentMetrics
 {
     extern const Metric MergeTreeDataSelectExecutorThreads;
     extern const Metric MergeTreeDataSelectExecutorThreadsActive;
+    extern const Metric MergeTreeDataSelectExecutorThreadsScheduled;
 }
 
 namespace DB
@@ -1075,6 +1076,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
             ThreadPool pool(
                 CurrentMetrics::MergeTreeDataSelectExecutorThreads,
                 CurrentMetrics::MergeTreeDataSelectExecutorThreadsActive,
+                CurrentMetrics::MergeTreeDataSelectExecutorThreadsScheduled,
                 num_threads);
 
             for (size_t part_index = 0; part_index < parts.size(); ++part_index)
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 2e0703a8df3..b43f25b0fff 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -53,6 +53,7 @@ namespace CurrentMetrics
 {
     extern const Metric ObjectStorageAzureThreads;
     extern const Metric ObjectStorageAzureThreadsActive;
+    extern const Metric ObjectStorageAzureThreadsScheduled;
 }
 
 namespace ProfileEvents
@@ -1087,7 +1088,7 @@ StorageAzureBlobSource::StorageAzureBlobSource(
     , file_iterator(file_iterator_)
     , need_only_count(need_only_count_)
     , query_info(query_info_)
-    , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, 1)
+    , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1)
     , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "AzureReader"))
 {
     reader = createReader();
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 94ce525bc38..2dedc8abdda 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -134,6 +134,7 @@ namespace CurrentMetrics
 {
     extern const Metric StorageDistributedThreads;
     extern const Metric StorageDistributedThreadsActive;
+    extern const Metric StorageDistributedThreadsScheduled;
 }
 
 namespace DB
@@ -1214,7 +1215,7 @@ void StorageDistributed::initializeFromDisk()
     const auto & disks = data_volume->getDisks();
 
     /// Make initialization for large number of disks parallel.
-    ThreadPool pool(CurrentMetrics::StorageDistributedThreads, CurrentMetrics::StorageDistributedThreadsActive, disks.size());
+    ThreadPool pool(CurrentMetrics::StorageDistributedThreads, CurrentMetrics::StorageDistributedThreadsActive, CurrentMetrics::StorageDistributedThreadsScheduled, disks.size());
 
     for (const DiskPtr & disk : disks)
     {
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 63ed84680c9..80ee1e9339d 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -77,6 +77,7 @@ namespace CurrentMetrics
 {
     extern const Metric StorageS3Threads;
     extern const Metric StorageS3ThreadsActive;
+    extern const Metric StorageS3ThreadsScheduled;
 }
 
 namespace ProfileEvents
@@ -147,7 +148,7 @@ public:
         , virtual_columns(virtual_columns_)
         , read_keys(read_keys_)
         , request_settings(request_settings_)
-        , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1)
+        , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
         , list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
         , file_progress_callback(file_progress_callback_)
     {
@@ -499,7 +500,7 @@ StorageS3Source::ReadTaskIterator::ReadTaskIterator(
     size_t max_threads_count)
     : callback(callback_)
 {
-    ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, max_threads_count);
+    ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count);
     auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "S3ReadTaskItr");
 
     std::vector<std::future<String>> keys;
@@ -564,7 +565,7 @@ StorageS3Source::StorageS3Source(
     , file_iterator(file_iterator_)
     , max_parsing_threads(max_parsing_threads_)
     , need_only_count(need_only_count_)
-    , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1)
+    , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
     , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "CreateS3Reader"))
 {
 }
diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index ffefd41327d..d9a12095443 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -23,6 +23,7 @@ namespace CurrentMetrics
 {
     extern const Metric SystemReplicasThreads;
     extern const Metric SystemReplicasThreadsActive;
+    extern const Metric SystemReplicasThreadsScheduled;
 }
 
 namespace DB
@@ -59,7 +60,7 @@ private:
 
 public:
     explicit StatusRequestsPool(size_t max_threads)
-        : thread_pool(CurrentMetrics::SystemReplicasThreads, CurrentMetrics::SystemReplicasThreadsActive, max_threads)
+        : thread_pool(CurrentMetrics::SystemReplicasThreads, CurrentMetrics::SystemReplicasThreadsActive, CurrentMetrics::SystemReplicasThreadsScheduled, max_threads)
         , log(&Poco::Logger::get("StatusRequestsPool"))
     {}
 
diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index 13855c6d94e..611ca948c53 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -10,10 +10,12 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/copyData.h>
 
+
 namespace CurrentMetrics
 {
     extern const Metric LocalThread;
     extern const Metric LocalThreadActive;
+    extern const Metric LocalThreadScheduled;
 }
 
 namespace DB::ErrorCodes
@@ -106,7 +108,7 @@ Runner::Runner(
 
     std::cerr << "---- Run options ----\n" << std::endl;
 
-    pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency);
+    pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency);
     queue.emplace(concurrency);
 }
 
@@ -461,4 +463,3 @@ Runner::~Runner()
     pool->wait();
     generator->cleanup(*connections[0]);
 }
-

From 16e7bd201ba8a7818b69e2c30ef7c5f4cc0ebfe2 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-93-2.ec2.internal>
Date: Sat, 18 Nov 2023 18:22:33 +0000
Subject: [PATCH 0740/1097] Add experimental flag for PostgresSQL table engine.
 Also add documentation on how to enable it

---
 .../table-engines/integrations/materialized-postgresql.md | 8 ++++++++
 src/Core/Settings.h                                       | 1 +
 src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 5 +++++
 3 files changed, 14 insertions(+)

diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md
index 02afec5cfd6..4d83ca79d5c 100644
--- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md
+++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md
@@ -8,6 +8,14 @@ sidebar_label: MaterializedPostgreSQL
 
 Creates ClickHouse table with an initial data dump of PostgreSQL table and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL table in the remote PostgreSQL database.
 
+:::note
+This table engine is experimental. To use it, set `allow_experimental_materialized_postgresql_table` to 1 in your configuration files or by using the `SET` command:
+```sql
+SET allow_experimental_materialized_postgresql_table=1
+```
+:::
+
+
 If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the `materialized_postgresql_tables_list` setting, which specifies the tables to be replicated (will also be possible to add database `schema`). It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.
 
 ## Creating a Table {#creating-a-table}
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 3b90a3e068b..f1379d162ae 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -791,6 +791,7 @@ class IColumn;
     M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \
     \
     /** Experimental functions */ \
+    M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the materialized PostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
     M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
     M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
     M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index a7650983db8..50d0aa67f0b 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -546,6 +546,11 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory)
         metadata.setColumns(args.columns);
         metadata.setConstraints(args.constraints);
 
+	if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "MaterializedPostgreSQL is an experimental table engine."
+                                " You can enable it with the `allow_experimental_materialized_postgresql_table` setting");
+        }
         if (!args.storage_def->order_by && args.storage_def->primary_key)
             args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone());
 

From 593f04a6b5825182940e1f241989d801b57ed93d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 18 Nov 2023 20:19:24 +0100
Subject: [PATCH 0741/1097] Fix style

---
 src/IO/SharedThreadPools.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp
index 4a0ef173669..6af5aab7a38 100644
--- a/src/IO/SharedThreadPools.cpp
+++ b/src/IO/SharedThreadPools.cpp
@@ -130,7 +130,7 @@ StaticThreadPool & getBackupsIOThreadPool()
 
 StaticThreadPool & getActivePartsLoadingThreadPool()
 {
-    static StaticThreadPool instance("MergeTreePartsLoaderThreadPool", CurrentMetrics::MergeTreePartsLoaderThreads, CurrentMetrics::MergeTreePartsLoaderThreadsActive, CurrentMetrics::MergeTreeOutdatedPartsLoaderThreadsScheduled);
+    static StaticThreadPool instance("MergeTreePartsLoaderThreadPool", CurrentMetrics::MergeTreePartsLoaderThreads, CurrentMetrics::MergeTreePartsLoaderThreadsActive, CurrentMetrics::MergeTreePartsLoaderThreadsScheduled);
     return instance;
 }
 

From 75cebb3c29c54979218f0f0f80f46609851059b9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 18 Nov 2023 20:20:21 +0100
Subject: [PATCH 0742/1097] Fix build

---
 src/Coordination/Standalone/Context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp
index 4001673e01a..6942e866f23 100644
--- a/src/Coordination/Standalone/Context.cpp
+++ b/src/Coordination/Standalone/Context.cpp
@@ -263,7 +263,7 @@ ThreadPool & Context::getThreadPoolWriter() const
         auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000);
 
         shared->threadpool_writer = std::make_unique<ThreadPool>(
-            CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, pool_size, pool_size, queue_size);
+            CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, CurrentMetrics::IOWriterThreadsScheduled, pool_size, pool_size, queue_size);
     });
 
     return *shared->threadpool_writer;

From 7c81d9b2b73ee35f8cf317397c2bd23dcd2336ea Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Sat, 18 Nov 2023 19:38:33 +0000
Subject: [PATCH 0743/1097] Fix test_keeper_auth

---
 tests/integration/test_keeper_auth/test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py
index e247984cc6a..78fbf84bbe2 100644
--- a/tests/integration/test_keeper_auth/test.py
+++ b/tests/integration/test_keeper_auth/test.py
@@ -1,6 +1,7 @@
 import pytest
 import time
 from helpers.cluster import ClickHouseCluster
+from helpers import keeper_utils
 from kazoo.client import KazooClient, KazooState
 from kazoo.security import ACL, make_digest_acl, make_acl
 from kazoo.exceptions import (
@@ -26,6 +27,7 @@ SUPERAUTH = "super:admin"
 def started_cluster():
     try:
         cluster.start()
+        keeper_utils.wait_until_connected(cluster, node)
 
         yield cluster
 

From a8abec6cacc8341927b6b06994f47bf5ff53884c Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Sat, 18 Nov 2023 21:45:17 +0000
Subject: [PATCH 0744/1097] merge_row_policy: cleanup after merge

---
 src/Storages/StorageMerge.cpp | 67 +++++++++--------------------------
 src/Storages/StorageMerge.h   | 22 +++---------
 2 files changed, 21 insertions(+), 68 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 0e088797100..10c98fe6ce8 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -380,7 +380,7 @@ void StorageMerge::read(
     query_plan.addStep(std::move(step));
 }
 
-/// A transient object of this helper class is created
+/// An object of this helper class is created
 ///  when processing a Merge table data source (subordinary table)
 ///  that has row policies
 ///  to guarantee that these row policies are applied
@@ -392,16 +392,16 @@ public:
     /// Add to data stream columns that are needed only for row policies
     ///  SELECT x from T  if  T has row policy  y=42
     ///  required y in data pipeline
-    void extendNames(Names &);
+    void extendNames(Names &) const;
 
     /// Use storage facilities to filter data
     ///  optimization
     ///  does not guarantee accuracy, but reduces number of rows
-    void addStorageFilter(SourceStepWithFilter *);
+    void addStorageFilter(SourceStepWithFilter *) const;
 
     /// Create explicit filter transform to exclude
     /// rows that are not conform to row level policy
-    void addFilterTransform(QueryPipelineBuilder &);
+    void addFilterTransform(QueryPipelineBuilder &) const;
 
 private:
     std::string filter_column_name; // complex filter, may contain logic operations
@@ -573,7 +573,8 @@ void ReadFromMerge::createChildPlans()
         Names column_names_as_aliases;
         Names real_column_names = column_names;
 
-        const auto & [database_name, _storage, _, table_name] = table;
+        const auto & database_name = std::get<0>(table);
+        const auto & table_name = std::get<3>(table);
         auto row_policy_filter_ptr = context->getRowPolicyFilter(
             database_name,
             table_name,
@@ -584,7 +585,6 @@ void ReadFromMerge::createChildPlans()
             row_policy_data_opt->extendNames(real_column_names);
         }
 
-
         if (!context->getSettingsRef().allow_experimental_analyzer)
         {
             auto storage_columns = storage_metadata_snapshot->getColumns();
@@ -645,7 +645,6 @@ void ReadFromMerge::createChildPlans()
             required_max_block_size,
             table,
             column_names_as_aliases.empty() ? std::move(real_column_names) : std::move(column_names_as_aliases),
-            // merge_storage_snapshot->getMetadataForQuery()->getSampleBlock(),
             row_policy_data_opt,
             context,
             current_streams));
@@ -715,7 +714,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const SelectQueryInfo & quer
     return modified_query_info;
 }
 
-
 bool recursivelyApplyToReadingSteps(QueryPlan::Node * node, const std::function<bool(ReadFromMergeTree &)> & func)
 {
     bool ok = true;
@@ -738,10 +736,10 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
     QueryPlan & plan,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & modified_query_info,
-    const QueryProcessingStage::Enum & processed_stage,
+    QueryProcessingStage::Enum processed_stage,
     const Block & header,
     const Aliases & aliases,
-    RowPolicyDataOpt & row_policy_data_opt,
+    const RowPolicyDataOpt & row_policy_data_opt,
     const StorageWithLockAndName & storage_with_lock,
     ContextMutablePtr modified_context,
     bool concat_streams) const
@@ -822,37 +820,18 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
 QueryPlan ReadFromMerge::createPlanForTable(
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & modified_query_info,
-    const QueryProcessingStage::Enum & processed_stage,
+    QueryProcessingStage::Enum processed_stage,
     UInt64 max_block_size,
     const StorageWithLockAndName & storage_with_lock,
     Names && real_column_names,
-    RowPolicyDataOpt & row_policy_data_opt,
+    const RowPolicyDataOpt & row_policy_data_opt,
     ContextMutablePtr modified_context,
     size_t streams_num)
 {
     const auto & [database_name, storage, _, table_name] = storage_with_lock;
-    // auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
-    // auto storage_snapshot = storage->getStorageSnapshot(storage_metadata_snapshot, context);
-    // auto modified_query_info = getModifiedQueryInfo(query_info, context, storage_with_lock, storage_snapshot);
 
     auto & modified_select = modified_query_info.query->as<ASTSelectQuery &>();
 
-    // std::unique_ptr<RowPolicyData> row_policy_data_ptr;
-
-    // auto row_policy_filter_ptr = context->getRowPolicyFilter(
-    //     database_name,
-    //     table_name,
-    //     RowPolicyFilterType::SELECT_FILTER);
-    // if (row_policy_filter_ptr)
-    // {
-    //     row_policy_data_ptr = std::make_unique<RowPolicyData>(row_policy_filter_ptr, storage, context);
-    //     row_policy_data_ptr->extendNames(real_column_names);
-    // }
-
-    // Aliases aliases;
-    // processAliases(real_column_names, storage_with_lock, aliases, sample_block, modified_context);
-
-    // QueryPipelineBuilderPtr builder;
     if (!InterpreterSelectQuery::isQueryWithFinal(modified_query_info) && storage->needRewriteQueryWithFinal(real_column_names))
     {
         /// NOTE: It may not work correctly in some cases, because query was analyzed without final.
@@ -908,7 +887,6 @@ QueryPlan ReadFromMerge::createPlanForTable(
         if (!plan.isInitialized())
             return {};
 
-        /// move to applyFilters
         if (row_policy_data_opt)
         {
             if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter*>((plan.getRootNode()->step.get())))
@@ -917,16 +895,6 @@ QueryPlan ReadFromMerge::createPlanForTable(
             }
         }
 
-        // if (auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(plan.getRootNode()->step.get()))
-        // {
-        //     size_t filters_dags_size = filter_dags.size();
-        //     for (size_t i = 0; i < filters_dags_size; ++i)
-        //         read_from_merge_tree->addFilter(filter_dags[i], filter_nodes.nodes[i]);
-        // }
-
-        // builder = plan.buildQueryPipeline(
-        //     QueryPlanOptimizationSettings::fromContext(modified_context),
-        //     BuildQueryPipelineSettings::fromContext(modified_context));
         applyFilters(plan);
     }
     else if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns))
@@ -964,14 +932,11 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
 {
     storage_metadata_snapshot = storage->getInMemoryMetadataPtr();
     auto storage_columns = storage_metadata_snapshot->getColumns();
-    auto needed_columns = storage_columns.getAll/*Physical*/();
+    auto needed_columns = storage_columns.getAll();
 
     ASTPtr expr = row_policy_filter_ptr->expression;
 
-    auto syntax_result = TreeRewriter(local_context).analyze(expr,
-      needed_columns /*,
-      storage,
-      storage->getStorageSnapshot(storage_metadata_snapshot, local_context)*/);
+    auto syntax_result = TreeRewriter(local_context).analyze(expr, needed_columns);
     auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context};
 
     actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */);
@@ -992,7 +957,7 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter
     filter_column_name = added.getNames().front();
 }
 
-void ReadFromMerge::RowPolicyData::extendNames(Names & names)
+void ReadFromMerge::RowPolicyData::extendNames(Names & names) const
 {
     boost::container::flat_set<std::string_view> names_set(names.begin(), names.end());
     NameSet added_names;
@@ -1011,12 +976,12 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names)
     }
 }
 
-void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step)
+void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step) const
 {
     step->addFilter(actions_dag, filter_column_name);
 }
 
-void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & builder)
+void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & builder) const
 {
     builder.addSimpleTransform([&](const Block & stream_header)
     {
@@ -1198,7 +1163,7 @@ void ReadFromMerge::convertAndFilterSourceStream(
     const Block & header,
     const StorageMetadataPtr & metadata_snapshot,
     const Aliases & aliases,
-    RowPolicyDataOpt & row_policy_data_opt,
+    const RowPolicyDataOpt & row_policy_data_opt,
     ContextPtr local_context,
     QueryPipelineBuilder & builder,
     QueryProcessingStage::Enum processed_stage)
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index afdb9f8e13a..c6593c81fb7 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -181,20 +181,8 @@ private:
     using Aliases = std::vector<AliasData>;
 
     class RowPolicyData;
-    // using RowPolicyDataPtr = std::unique_ptr<RowPolicyData>;
     using RowPolicyDataOpt = std::optional<RowPolicyData>;
 
-
-    /// Populates AliasData structures for further processing
-    ///   using types from result query if possible
-    /// and removes alias columns from real_column_names
-    void processAliases(
-        Names & real_column_names,
-        const StorageWithLockAndName & storage_with_lock,
-        Aliases & aliases,
-        const Block & sample_block,
-        ContextMutablePtr modified_context);
-
     std::vector<Aliases> table_aliases;
 
     std::vector<RowPolicyDataOpt> table_row_policy_data_opts;
@@ -206,11 +194,11 @@ private:
     QueryPlan createPlanForTable(
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
-        const QueryProcessingStage::Enum & processed_stage,
+        QueryProcessingStage::Enum processed_stage,
         UInt64 max_block_size,
         const StorageWithLockAndName & storage_with_lock,
         Names && real_column_names,
-        RowPolicyDataOpt & row_policy_data_ptr,
+        const RowPolicyDataOpt & row_policy_data_ptr,
         ContextMutablePtr modified_context,
         size_t streams_num);
 
@@ -218,10 +206,10 @@ private:
         QueryPlan & plan,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & modified_query_info,
-        const QueryProcessingStage::Enum & processed_stage,
+        QueryProcessingStage::Enum processed_stage,
         const Block & header,
         const Aliases & aliases,
-        RowPolicyDataOpt & row_policy_data_ptr,
+        const RowPolicyDataOpt & row_policy_data_ptr,
         const StorageWithLockAndName & storage_with_lock,
         ContextMutablePtr modified_context,
         bool concat_streams = false) const;
@@ -235,7 +223,7 @@ private:
         const Block & header,
         const StorageMetadataPtr & metadata_snapshot,
         const Aliases & aliases,
-        RowPolicyDataOpt & row_policy_data_ptr,
+        const RowPolicyDataOpt & row_policy_data_ptr,
         ContextPtr context,
         QueryPipelineBuilder & builder,
         QueryProcessingStage::Enum processed_stage);

From 053b20a255d33dca78c8260e6f49cd94ac2545c7 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Sun, 19 Nov 2023 00:44:39 +0100
Subject: [PATCH 0745/1097] fix in_data pointer

---
 src/IO/Lz4DeflatingWriteBuffer.cpp | 16 +++++-----------
 src/IO/Lz4DeflatingWriteBuffer.h   |  3 ---
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp
index e952e6400ec..2dbb7f684ed 100644
--- a/src/IO/Lz4DeflatingWriteBuffer.cpp
+++ b/src/IO/Lz4DeflatingWriteBuffer.cpp
@@ -43,9 +43,6 @@ namespace
         {
             tmp_out.finalize();
 
-            if (cur_out == sink)
-                return;
-
             sink->write(tmp_out.buffer().begin(), tmp_out.count());
         }
 
@@ -67,8 +64,6 @@ namespace ErrorCodes
 Lz4DeflatingWriteBuffer::Lz4DeflatingWriteBuffer(
     std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
     : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment)
-    , in_data(nullptr)
-    , in_capacity(0)
     , tmp_memory(buf_size)
 
 {
@@ -106,9 +101,6 @@ void Lz4DeflatingWriteBuffer::nextImpl()
     if (!offset())
         return;
 
-    in_data = reinterpret_cast<void *>(working_buffer.begin());
-    in_capacity = offset();
-
     if (first_time)
     {
         auto sink = SinkToOut(out.get(), tmp_memory, LZ4F_HEADER_SIZE_MAX);
@@ -128,7 +120,10 @@ void Lz4DeflatingWriteBuffer::nextImpl()
         first_time = false;
     }
 
-    do
+    auto in_data = working_buffer.begin();
+    auto in_capacity = offset();
+
+    while (in_capacity > 0)
     {
         /// Ensure that there is enough space for compressed block of minimal size
         size_t min_compressed_block_size = LZ4F_compressBound(1, &kPrefs);
@@ -154,11 +149,10 @@ void Lz4DeflatingWriteBuffer::nextImpl()
                 LZ4F_VERSION, LZ4F_getErrorName(compressed_size), sink.getCapacity());
 
         in_capacity -= cur_buffer_size;
-        in_data = reinterpret_cast<void *>(working_buffer.end() - in_capacity);
+        in_data += cur_buffer_size;
 
         sink.advancePosition(compressed_size);
     }
-    while (in_capacity > 0);
 }
 
 void Lz4DeflatingWriteBuffer::finalizeBefore()
diff --git a/src/IO/Lz4DeflatingWriteBuffer.h b/src/IO/Lz4DeflatingWriteBuffer.h
index 65f4f0c7349..7bb8a5e6c0e 100644
--- a/src/IO/Lz4DeflatingWriteBuffer.h
+++ b/src/IO/Lz4DeflatingWriteBuffer.h
@@ -32,9 +32,6 @@ private:
     LZ4F_preferences_t kPrefs; /// NOLINT
     LZ4F_compressionContext_t ctx;
 
-    void * in_data;
-    size_t in_capacity;
-
     Memory<> tmp_memory;
 
     bool first_time = true;

From 89b93adf825c8098c0c5681340797c6940125f43 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-93-2.ec2.internal>
Date: Sat, 18 Nov 2023 23:55:46 +0000
Subject: [PATCH 0746/1097] Fix trailing whitespace

---
 src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index 50d0aa67f0b..784694c98e3 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -547,10 +547,8 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory)
         metadata.setConstraints(args.constraints);
 
 	if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table)
-        {
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "MaterializedPostgreSQL is an experimental table engine."
                                 " You can enable it with the `allow_experimental_materialized_postgresql_table` setting");
-        }
         if (!args.storage_def->order_by && args.storage_def->primary_key)
             args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone());
 

From ddd9a23116ce5185c92879d17124215da846ec7b Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-93-2.ec2.internal>
Date: Sun, 19 Nov 2023 00:02:33 +0000
Subject: [PATCH 0747/1097] Fix newline

---
 src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index 784694c98e3..ca91a7a9dec 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -545,10 +545,11 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory)
         StorageInMemoryMetadata metadata;
         metadata.setColumns(args.columns);
         metadata.setConstraints(args.constraints);
-
+	
 	if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "MaterializedPostgreSQL is an experimental table engine."
                                 " You can enable it with the `allow_experimental_materialized_postgresql_table` setting");
+	
         if (!args.storage_def->order_by && args.storage_def->primary_key)
             args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone());
 

From 5f0f9b0ad42ace358586d9b761b83c7198d01c76 Mon Sep 17 00:00:00 2001
From: melvynator <melvyn.peignon@gmail.com>
Date: Sun, 19 Nov 2023 01:05:11 +0100
Subject: [PATCH 0748/1097] Test check style

---
 src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index ca91a7a9dec..36de3936321 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -546,7 +546,7 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory)
         metadata.setColumns(args.columns);
         metadata.setConstraints(args.constraints);
 	
-	if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table)
+	    if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "MaterializedPostgreSQL is an experimental table engine."
                                 " You can enable it with the `allow_experimental_materialized_postgresql_table` setting");
 	

From 88b05253060fead93d1fd8ad7e2c1a139c03c9e7 Mon Sep 17 00:00:00 2001
From: melvynator <melvyn.peignon@gmail.com>
Date: Sun, 19 Nov 2023 01:10:25 +0100
Subject: [PATCH 0749/1097] Style check

---
 src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index 36de3936321..bf84617760f 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -545,8 +545,8 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory)
         StorageInMemoryMetadata metadata;
         metadata.setColumns(args.columns);
         metadata.setConstraints(args.constraints);
-	
-	    if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table)
+        
+        if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "MaterializedPostgreSQL is an experimental table engine."
                                 " You can enable it with the `allow_experimental_materialized_postgresql_table` setting");
 	

From 3ade60a26183b1c2cc7ad64d080f41811a7028e8 Mon Sep 17 00:00:00 2001
From: melvynator <melvyn.peignon@gmail.com>
Date: Sun, 19 Nov 2023 01:31:21 +0100
Subject: [PATCH 0750/1097] Keep trying to fix style

---
 src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index bf84617760f..96247e9e1a7 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -545,7 +545,7 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory)
         StorageInMemoryMetadata metadata;
         metadata.setColumns(args.columns);
         metadata.setConstraints(args.constraints);
-        
+
         if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "MaterializedPostgreSQL is an experimental table engine."
                                 " You can enable it with the `allow_experimental_materialized_postgresql_table` setting");

From a894671e8af10d4b5b70d79beccdba6be2e12174 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Sat, 18 Nov 2023 17:53:32 -0800
Subject: [PATCH 0751/1097] [Docs] Add perf tip for COUNT(DISTINCT expr)

---
 docs/en/sql-reference/aggregate-functions/reference/count.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md
index a98c8e50174..a40108a331a 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/count.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/count.md
@@ -34,6 +34,10 @@ The `SELECT count() FROM table` query is optimized by default using metadata fro
 
 However `SELECT count(nullable_column) FROM table` query can be optimized by enabling the [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole column data. The query `SELECT count(n) FROM table` transforms to `SELECT sum(NOT n.null) FROM table`.
 
+**Improving COUNT(DISTINCT expr) performance**
+
+If your `COUNT(DISTINCT expr)` query is slow, consider adding a [`GROUP BY`](../../../sql-reference/statements/select/group-by.md) clause as this improves parallelization. You can also use a [projection](../../../sql-reference/statements/alter/projection.md) to create an index on the target column used with `COUNT(DISTINCT target_col)`.
+
 **Examples**
 
 Example 1:

From 96e87322b525351b7d241c9e2c6ca125d2178b20 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Sat, 18 Nov 2023 18:10:43 -0800
Subject: [PATCH 0752/1097] [Docs] Add insert_distributed_sync to Core Settings
 docs

---
 docs/en/operations/settings/settings.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index a22bd6e33e5..e61934d2168 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2716,6 +2716,10 @@ Default value: `0`.
 - [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed)
 - [Managing Distributed Tables](../../sql-reference/statements/system.md/#query-language-system-distributed)
 
+## insert_distributed_sync {#insert_distributed_sync}
+
+Alias for [`distributed_foreground_insert`](#distributed_foreground_insert).
+
 ## insert_shard_id {#insert_shard_id}
 
 If not `0`, specifies the shard of [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table into which the data will be inserted synchronously.

From 5e1da38720d16672bba07da1b9b54fc081b1464e Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Sat, 18 Nov 2023 18:28:29 -0800
Subject: [PATCH 0753/1097] [Docs] Add details on why partitions improve query
 perf

---
 .../table-engines/mergetree-family/custom-partitioning-key.md   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
index 7e564b23676..97d37e476ae 100644
--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@@ -14,7 +14,7 @@ You should never use too granular of partitioning. Don't partition your data by
 
 Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
 
-A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible.
+A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. Partitions improve performance for queries containing a partitioning key because ClickHouse will filter for that partition before selecting the parts and granules within the partition.
 
 The partition is specified in the `PARTITION BY expr` clause when [creating a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
 

From 8062fb578c7b6c65d4d24efa28bb47e8c8783ae8 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Sat, 18 Nov 2023 18:38:49 -0800
Subject: [PATCH 0754/1097] [Docs] Recommend ReplacingMergeTree for frequent
 updates

---
 docs/en/engines/table-engines/mergetree-family/mergetree.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index e615c9ad9d3..d250cfd1a08 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -6,7 +6,7 @@ sidebar_label:  MergeTree
 
 # MergeTree
 
-The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most robust ClickHouse table engines.
+The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most common and most robust ClickHouse table engines.
 
 Engines in the `MergeTree` family are designed for inserting a very large amount of data into a table. The data is quickly written to the table part by part, then rules are applied for merging the parts in the background. This method is much more efficient than continually rewriting the data in storage during insert.
 
@@ -32,6 +32,8 @@ Main features:
 The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does not belong to the `*MergeTree` family.
 :::
 
+If you need to update rows frequently, we recommend using the [`ReplacingMergeTree`](/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md). Using `ALTER TALBE my_table UPDATE` to update rows triggers a mutation, which causes parts to be re-written and uses IO/resources. With `ReplacingMergeTree`, you can simply insert the updated rows and the old rows will be replaced according to the table sorting key.
+
 ## Creating a Table {#table_engine-mergetree-creating-a-table}
 
 ``` sql

From a398e3f51e499c5523e62ac4c296b0203304e672 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Sat, 18 Nov 2023 18:40:14 -0800
Subject: [PATCH 0755/1097] [Docs] Fix typo

---
 docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index d250cfd1a08..810b7596150 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -6,7 +6,7 @@ sidebar_label:  MergeTree
 
 # MergeTree
 
-The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most common and most robust ClickHouse table engines.
+The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most commonly used and most robust ClickHouse table engines.
 
 Engines in the `MergeTree` family are designed for inserting a very large amount of data into a table. The data is quickly written to the table part by part, then rules are applied for merging the parts in the background. This method is much more efficient than continually rewriting the data in storage during insert.
 

From f29777db2d77aee2d17034e6844377f65e5ddfe7 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Sat, 18 Nov 2023 18:41:20 -0800
Subject: [PATCH 0756/1097] [Docs] More typos

---
 docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 810b7596150..f0bc45b9f53 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -32,7 +32,7 @@ Main features:
 The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does not belong to the `*MergeTree` family.
 :::
 
-If you need to update rows frequently, we recommend using the [`ReplacingMergeTree`](/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md). Using `ALTER TALBE my_table UPDATE` to update rows triggers a mutation, which causes parts to be re-written and uses IO/resources. With `ReplacingMergeTree`, you can simply insert the updated rows and the old rows will be replaced according to the table sorting key.
+If you need to update rows frequently, we recommend using the [`ReplacingMergeTree`](/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md) table engine. Using `ALTER TABLE my_table UPDATE` to update rows triggers a mutation, which causes parts to be re-written and uses IO/resources. With `ReplacingMergeTree`, you can simply insert the updated rows and the old rows will be replaced according to the table sorting key.
 
 ## Creating a Table {#table_engine-mergetree-creating-a-table}
 

From 9d3c62ec418802371fcf7f466163d683c1ec262a Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Sat, 18 Nov 2023 18:51:35 -0800
Subject: [PATCH 0757/1097] [Docs] Recommend against OPTIMIZE FINAL in OPTIMIZE
 page

---
 docs/en/sql-reference/statements/optimize.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md
index 49843eaff9a..07b5a196096 100644
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@@ -5,7 +5,7 @@ sidebar_label: OPTIMIZE
 title: "OPTIMIZE Statement"
 ---
 
-This query tries to initialize an unscheduled merge of data parts for tables.
+This query tries to initialize an unscheduled merge of data parts for tables. Note that we generally recommend against using `OPTIMIZE TABLE ... FINAL` (see these [docs](/docs/en/optimize/avoidoptimizefinal)) as its use case is meant for administration, not for daily operations.
 
 :::note
 `OPTIMIZE` can’t fix the `Too many parts` error.

From 097f80657c35c83ca522b412a040dfeaa36011d5 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Sun, 19 Nov 2023 11:14:24 +0100
Subject: [PATCH 0758/1097] Fewer concurrent requests in
 02908_many_requests_to_system_replicas

---
 .../02908_many_requests_to_system_replicas.reference            | 2 +-
 .../0_stateless/02908_many_requests_to_system_replicas.sh       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference
index d7850e59dec..af0e50ec332 100644
--- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference
+++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference
@@ -1,5 +1,5 @@
 Creating 300 tables
-Making making 500 requests to system.replicas
+Making making 200 requests to system.replicas
 Query system.replicas while waiting for other concurrent requests to finish
 0
 900
diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh
index c620fcf4bea..f93175529c0 100755
--- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh
+++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 set -e
 
 NUM_TABLES=300
-CONCURRENCY=500
+CONCURRENCY=200
 
 echo "Creating $NUM_TABLES tables"
 

From 09f263e281ef4fe7336ee0f0f59a3785446955ec Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 19 Nov 2023 13:20:01 +0300
Subject: [PATCH 0759/1097] Update fetchPostgreSQLTableStructure.cpp

---
 src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index e9ffdebc583..dec3f1ffe5a 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -263,7 +263,8 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
            "attnotnull AS not_null, attndims AS dims, atttypid as type_id, atttypmod as type_modifier "
            "FROM pg_attribute "
            "WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) "
-           "AND NOT attisdropped AND attnum > 0 order by attnum asc", where);
+           "AND NOT attisdropped AND attnum > 0 "
+           "ORDER BY attnum ASC", where);
 
     auto postgres_table_with_schema = postgres_schema.empty() ? postgres_table : doubleQuoteString(postgres_schema) + '.' + doubleQuoteString(postgres_table);
     table.physical_columns = readNamesAndTypesList(tx, postgres_table_with_schema, query, use_nulls, false);

From 24fbe620d32ef624ed0f6003c0d9650b03086544 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Sun, 19 Nov 2023 12:14:53 +0100
Subject: [PATCH 0760/1097] fix build

---
 src/IO/Lz4DeflatingWriteBuffer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp
index 2dbb7f684ed..7def2da104f 100644
--- a/src/IO/Lz4DeflatingWriteBuffer.cpp
+++ b/src/IO/Lz4DeflatingWriteBuffer.cpp
@@ -120,7 +120,7 @@ void Lz4DeflatingWriteBuffer::nextImpl()
         first_time = false;
     }
 
-    auto in_data = working_buffer.begin();
+    auto * in_data = working_buffer.begin();
     auto in_capacity = offset();
 
     while (in_capacity > 0)

From cacc23b8b745f15eb6d84db002987b6229010932 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Sun, 19 Nov 2023 12:25:42 +0100
Subject: [PATCH 0761/1097] safe SinkToOut d-tor

---
 src/IO/Lz4DeflatingWriteBuffer.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp
index 7def2da104f..0af205a426d 100644
--- a/src/IO/Lz4DeflatingWriteBuffer.cpp
+++ b/src/IO/Lz4DeflatingWriteBuffer.cpp
@@ -39,10 +39,9 @@ namespace
             cur_out->position() += size;
         }
 
-        ~SinkToOut() noexcept(false)
+        void finalize()
         {
             tmp_out.finalize();
-
             sink->write(tmp_out.buffer().begin(), tmp_out.count());
         }
 
@@ -117,6 +116,7 @@ void Lz4DeflatingWriteBuffer::nextImpl()
                 LZ4F_VERSION, LZ4F_getErrorName(header_size));
 
         sink.advancePosition(header_size);
+        sink.finalize();
         first_time = false;
     }
 
@@ -152,6 +152,7 @@ void Lz4DeflatingWriteBuffer::nextImpl()
         in_data += cur_buffer_size;
 
         sink.advancePosition(compressed_size);
+        sink.finalize();
     }
 }
 
@@ -173,6 +174,7 @@ void Lz4DeflatingWriteBuffer::finalizeBefore()
             LZ4F_VERSION, LZ4F_getErrorName(end_size), sink.getCapacity());
 
     sink.advancePosition(end_size);
+    sink.finalize();
 }
 
 void Lz4DeflatingWriteBuffer::finalizeAfter()

From f27018c14196e938aa3b16cae387e46d2578501a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 19 Nov 2023 15:21:11 +0100
Subject: [PATCH 0762/1097] Own CMake for GRPC

---
 cmake/limit_jobs.cmake            |    4 +-
 contrib/grpc-cmake/CMakeLists.txt |   40 +-
 contrib/grpc-cmake/grpc.cmake     | 1864 +++++++++++++++++++++++++++++
 3 files changed, 1868 insertions(+), 40 deletions(-)
 create mode 100644 contrib/grpc-cmake/grpc.cmake

diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake
index 28ccb62e10c..a43e208ff0d 100644
--- a/cmake/limit_jobs.cmake
+++ b/cmake/limit_jobs.cmake
@@ -21,7 +21,7 @@ if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY)
         set (PARALLEL_COMPILE_JOBS 1)
     endif ()
     if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        message(WARNING "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
+        message(INFO "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
     endif()
 endif ()
 
@@ -32,7 +32,7 @@ if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY)
         set (PARALLEL_LINK_JOBS 1)
     endif ()
     if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        message(WARNING "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
+        message(INFO "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
     endif()
 endif ()
 
diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt
index 09ed2fe3f80..b8b5f5580c4 100644
--- a/contrib/grpc-cmake/CMakeLists.txt
+++ b/contrib/grpc-cmake/CMakeLists.txt
@@ -9,50 +9,14 @@ endif()
 set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
 set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")
 
-# Use re2 from ClickHouse contrib, not from gRPC third_party.
-set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE)
-set(_gRPC_RE2_INCLUDE_DIR "")
-set(_gRPC_RE2_LIBRARIES ch_contrib::re2)
-
-# Use zlib from ClickHouse contrib, not from gRPC third_party.
-set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE)
-set(_gRPC_ZLIB_INCLUDE_DIR "")
-set(_gRPC_ZLIB_LIBRARIES ch_contrib::zlib)
-
-# Use protobuf from ClickHouse contrib, not from gRPC third_party.
-set(gRPC_PROTOBUF_PROVIDER "clickhouse" CACHE STRING "" FORCE)
-set(_gRPC_PROTOBUF_LIBRARIES ch_contrib::protobuf)
-set(_gRPC_PROTOBUF_PROTOC "protoc")
-set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE $<TARGET_FILE:protoc>)
-set(_gRPC_PROTOBUF_PROTOC_LIBRARIES ch_contrib::protoc)
-
 if(TARGET OpenSSL::SSL)
   set(gRPC_USE_UNSECURE_LIBRARIES FALSE)
 else()
   set(gRPC_USE_UNSECURE_LIBRARIES TRUE)
 endif()
 
-# Use OpenSSL from ClickHouse contrib, not from gRPC third_party.
-set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
-set(_gRPC_SSL_INCLUDE_DIR "")
-set(_gRPC_SSL_LIBRARIES OpenSSL::Crypto OpenSSL::SSL)
-
-# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party.
-set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
-
-# We don't want to build C# extensions.
-set(gRPC_BUILD_CSHARP_EXT OFF)
-
-# TODO: Remove this. We generally like to compile with C++23 but grpc isn't ready yet.
-set (CMAKE_CXX_STANDARD 20)
-
-set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares)
-set(gRPC_CARES_PROVIDER "clickhouse" CACHE STRING "" FORCE)
-add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
-
-# The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes,
-# so we need to redefine it back.
-include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
+include(grpc.cmake)
+include(protobuf_generate_grpc.cmake)
 
 set(gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
 set(gRPC_PYTHON_PLUGIN $<TARGET_FILE:grpc_python_plugin>)
diff --git a/contrib/grpc-cmake/grpc.cmake b/contrib/grpc-cmake/grpc.cmake
new file mode 100644
index 00000000000..43d4edd191e
--- /dev/null
+++ b/contrib/grpc-cmake/grpc.cmake
@@ -0,0 +1,1864 @@
+# This file was edited for ClickHouse.
+
+# GRPC global cmake file
+# This currently builds C and C++ code.
+# This file has been automatically generated from a template file.
+# Please look at the templates directory instead.
+# This file can be regenerated from the template by running
+# tools/buildgen/generate_projects.sh
+#
+# Copyright 2015 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to use C++23, but GRPC is not ready
+set (CMAKE_CXX_STANDARD 20)
+
+set(_gRPC_ZLIB_INCLUDE_DIR "")
+set(_gRPC_ZLIB_LIBRARIES ch_contrib::zlib)
+
+set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares)
+
+set(_gRPC_RE2_INCLUDE_DIR "")
+set(_gRPC_RE2_LIBRARIES ch_contrib::re2)
+
+set(_gRPC_SSL_INCLUDE_DIR "")
+set(_gRPC_SSL_LIBRARIES OpenSSL::Crypto OpenSSL::SSL)
+
+set(_gRPC_PROTOBUF_LIBRARIES ch_contrib::protobuf)
+set(_gRPC_PROTOBUF_PROTOC "protoc")
+set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE $<TARGET_FILE:protoc>)
+set(_gRPC_PROTOBUF_PROTOC_LIBRARIES ch_contrib::protoc)
+
+
+if(UNIX)
+  if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+    set(_gRPC_PLATFORM_LINUX ON)
+  elseif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(_gRPC_PLATFORM_MAC ON)
+  elseif(${CMAKE_SYSTEM_NAME} MATCHES "iOS")
+    set(_gRPC_PLATFORM_IOS ON)
+  elseif(${CMAKE_SYSTEM_NAME} MATCHES "Android")
+    set(_gRPC_PLATFORM_ANDROID ON)
+  else()
+    set(_gRPC_PLATFORM_POSIX ON)
+  endif()
+endif()
+
+if(UNIX AND NOT HAIKU)
+  # -pthread does more than -lpthread
+  set(THREADS_PREFER_PTHREAD_FLAG ON)
+  find_package(Threads)
+  set(_gRPC_ALLTARGETS_LIBRARIES ${CMAKE_DL_LIBS} Threads::Threads)
+  if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_POSIX)
+    set(_gRPC_ALLTARGETS_LIBRARIES ${_gRPC_ALLTARGETS_LIBRARIES} rt)
+  endif()
+endif()
+
+set(_gRPC_ADDRESS_SORTING_INCLUDE_DIR "${_gRPC_SOURCE_DIR}/third_party/address_sorting/include")
+set(_gRPC_ADDRESS_SORTING_LIBRARIES address_sorting)
+
+set(UPB_ROOT_DIR ${_gRPC_SOURCE_DIR}/third_party/upb)
+
+set(_gRPC_UPB_INCLUDE_DIR "${UPB_ROOT_DIR}" "${_gRPC_SOURCE_DIR}/third_party/utf8_range")
+set(_gRPC_UPB_GRPC_GENERATED_DIR "${_gRPC_SOURCE_DIR}/src//core/ext/upb-generated" "${_gRPC_SOURCE_DIR}/src//core/ext/upbdefs-generated")
+
+set(_gRPC_UPB_LIBRARIES upb)
+
+set(_gRPC_XXHASH_INCLUDE_DIR "${_gRPC_SOURCE_DIR}/third_party/xxhash")
+
+add_library(address_sorting
+  ${_gRPC_SOURCE_DIR}/third_party/address_sorting/address_sorting.c
+  ${_gRPC_SOURCE_DIR}/third_party/address_sorting/address_sorting_posix.c
+  ${_gRPC_SOURCE_DIR}/third_party/address_sorting/address_sorting_windows.c
+)
+
+target_compile_features(address_sorting PUBLIC cxx_std_14)
+
+target_include_directories(address_sorting
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(address_sorting
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+)
+
+
+add_library(gpr
+  ${_gRPC_SOURCE_DIR}/src/core/lib/config/config_vars.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/config/config_vars_non_generated.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/config/load_config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/thread_local.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/alloc.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/android/log.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/atm.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/iphone/cpu.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/linux/cpu.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/linux/log.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/log.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/msys/tmpfile.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/posix/cpu.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/posix/log.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/posix/string.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/posix/sync.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/posix/time.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/posix/tmpfile.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/string.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/sync.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/sync_abseil.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/time.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/time_precise.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/windows/cpu.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/windows/log.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/windows/string.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/windows/string_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/windows/sync.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/windows/time.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/windows/tmpfile.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gpr/wrap_memcpy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/crash.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/examine_stack.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/fork.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/host_port.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/linux/env.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/mpscq.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/posix/env.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/posix/stat.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/posix/thd.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/strerror.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/tchar.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/time_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/windows/env.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/windows/stat.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/windows/thd.cc
+)
+
+target_compile_features(gpr PUBLIC cxx_std_14)
+
+target_include_directories(gpr
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(gpr
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  absl::base
+  absl::core_headers
+  absl::flags
+  absl::flags_marshalling
+  absl::any_invocable
+  absl::memory
+  absl::random_random
+  absl::status
+  absl::cord
+  absl::str_format
+  absl::strings
+  absl::synchronization
+  absl::time
+  absl::optional
+  absl::variant
+)
+if(_gRPC_PLATFORM_ANDROID)
+  target_link_libraries(gpr
+    android
+    log
+  )
+endif()
+
+
+add_library(grpc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/backend_metrics/backend_metric_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/census/grpc_context.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/channel_idle/channel_idle_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/channel_idle/idle_filter_state.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/backend_metric.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/backup_poller.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/channel_connectivity.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_channelz.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_factory.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_plugin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_service_config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/config_selector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/dynamic_filters.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/global_subchannel_pool.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/http_proxy_mapper.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/priority/priority.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/xds/cds.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/xds/xds_wrr_locality.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/local_subchannel_pool.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/binder/binder_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/dns_resolver_plugin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/event_engine/event_engine_client_channel_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/event_engine/service_config_helper.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/google_c2p/google_c2p_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/polling_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/retry_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/retry_filter_legacy_call_data.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/retry_service_config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/retry_throttle.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/service_config_channel_arg_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/subchannel.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/subchannel_pool_interface.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/subchannel_stream_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/deadline/deadline_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/fault_injection/fault_injection_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/fault_injection/fault_injection_service_config_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/client/http_client_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/client_authority_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/http_filters_plugin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/message_compress/compression_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/server/http_server_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/message_size/message_size_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/rbac/rbac_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/rbac/rbac_service_config_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/server_config_selector/server_config_selector_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/stateful_session/stateful_session_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/stateful_session/stateful_session_service_config_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/gcp/metadata_query.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/alpn/alpn.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/client/chttp2_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/server/chttp2_server.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/bin_decoder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/bin_encoder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/chttp2_transport.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/decode_huff.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/flow_control.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_data.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_goaway.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_ping.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_settings.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_window_update.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_encoder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_encoder_table.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_parse_result.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_parser_table.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/http2_settings.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/http_trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/huffsyms.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/max_concurrent_streams_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/parsing.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/ping_abuse_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/ping_callbacks.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/ping_rate_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/stream_lists.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/varint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/write_size_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/writing.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/inproc/inproc_plugin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/inproc/inproc_transport.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/certs.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/clusters.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/config_dump.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/config_dump_shared.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/init_dump.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/listeners.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/memory.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/metrics.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/mutex_stats.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/server_info.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/admin/v3/tap.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/annotations/deprecation.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/annotations/resource.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/accesslog/v3/accesslog.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/bootstrap/v3/bootstrap.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/cluster/v3/circuit_breaker.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/cluster/v3/cluster.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/cluster/v3/filter.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/cluster/v3/outlier_detection.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/common/matcher/v3/matcher.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/address.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/backoff.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/base.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/config_source.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/event_service_config.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/extension.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/grpc_method_list.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/grpc_service.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/health_check.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/http_uri.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/protocol.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/proxy_protocol.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/resolver.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/socket_option.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/substitution_format_string.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/core/v3/udp_socket_config.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/endpoint/v3/endpoint.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/endpoint/v3/endpoint_components.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/endpoint/v3/load_report.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/listener/v3/api_listener.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/listener/v3/listener.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/listener/v3/listener_components.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/listener/v3/quic_config.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/listener/v3/udp_listener_config.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/metrics/v3/metrics_service.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/metrics/v3/stats.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/overload/v3/overload.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/rbac/v3/rbac.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/route/v3/route.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/route/v3/route_components.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/route/v3/scoped_route.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/tap/v3/common.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/datadog.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/dynamic_ot.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/http_tracer.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/lightstep.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/opencensus.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/opentelemetry.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/service.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/skywalking.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/trace.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/xray.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/config/trace/v3/zipkin.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/data/accesslog/v3/accesslog.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/clusters/aggregate/v3/cluster.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/filters/common/fault/v3/fault.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/filters/http/fault/v3/fault.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/filters/http/rbac/v3/rbac.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/filters/http/router/v3/router.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/filters/http/stateful_session/v3/stateful_session.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/http/stateful_session/cookie/v3/cookie.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/load_balancing_policies/client_side_weighted_round_robin/v3/client_side_weighted_round_robin.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/load_balancing_policies/common/v3/common.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/load_balancing_policies/pick_first/v3/pick_first.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/load_balancing_policies/ring_hash/v3/ring_hash.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/load_balancing_policies/wrr_locality/v3/wrr_locality.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/transport_sockets/tls/v3/cert.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/transport_sockets/tls/v3/common.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/transport_sockets/tls/v3/secret.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/transport_sockets/tls/v3/tls.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/extensions/transport_sockets/tls/v3/tls_spiffe_validator_config.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/service/discovery/v3/ads.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/service/discovery/v3/discovery.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/service/load_stats/v3/lrs.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/service/status/v3/csds.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/http/v3/cookie.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/http/v3/path_transformation.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/filter_state.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/http_inputs.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/metadata.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/node.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/number.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/path.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/regex.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/status_code_input.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/string.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/struct.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/matcher/v3/value.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/metadata/v3/metadata.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/tracing/v3/custom_tag.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/v3/hash_policy.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/v3/http.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/v3/http_status.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/v3/percent.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/v3/range.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/v3/ratelimit_strategy.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/v3/ratelimit_unit.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/v3/semantic_version.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/envoy/type/v3/token_bucket.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/api/annotations.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/api/expr/v1alpha1/checked.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/api/expr/v1alpha1/syntax.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/api/http.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/api/httpbody.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/any.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/descriptor.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/duration.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/empty.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/struct.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/timestamp.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/wrappers.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/rpc/status.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/opencensus/proto/trace/v1/trace_config.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/gcp/altscontext.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/gcp/handshaker.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/gcp/transport_security_common.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/health/v1/health.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/lb/v1/load_balancer.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/lookup/v1/rls.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/lookup/v1/rls_config.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/udpa/annotations/migrate.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/udpa/annotations/security.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/udpa/annotations/sensitive.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/udpa/annotations/status.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/udpa/annotations/versioning.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/validate/validate.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/annotations/v3/migrate.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/annotations/v3/security.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/annotations/v3/sensitive.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/annotations/v3/status.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/annotations/v3/versioning.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/core/v3/authority.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/core/v3/cidr.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/core/v3/collection_entry.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/core/v3/context_params.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/core/v3/extension.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/core/v3/resource.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/core/v3/resource_locator.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/core/v3/resource_name.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/data/orca/v3/orca_load_report.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/service/orca/v3/orca.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/matcher/v3/cel.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/matcher/v3/domain.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/matcher/v3/http_inputs.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/matcher/v3/ip.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/matcher/v3/matcher.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/matcher/v3/range.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/matcher/v3/regex.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/matcher/v3/string.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/v3/cel.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/v3/range.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/type/v3/typed_struct.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/certs.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/clusters.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/config_dump.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/config_dump_shared.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/init_dump.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/listeners.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/memory.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/metrics.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/mutex_stats.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/server_info.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/admin/v3/tap.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/annotations/deprecation.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/annotations/resource.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/accesslog/v3/accesslog.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/bootstrap/v3/bootstrap.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/cluster/v3/circuit_breaker.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/cluster/v3/cluster.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/cluster/v3/filter.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/cluster/v3/outlier_detection.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/common/matcher/v3/matcher.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/address.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/backoff.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/base.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/config_source.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/event_service_config.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/extension.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/grpc_method_list.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/grpc_service.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/health_check.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/http_uri.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/protocol.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/proxy_protocol.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/resolver.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/socket_option.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/substitution_format_string.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/core/v3/udp_socket_config.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/endpoint/v3/endpoint.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/endpoint/v3/endpoint_components.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/endpoint/v3/load_report.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/listener/v3/api_listener.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/listener/v3/listener.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/listener/v3/listener_components.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/listener/v3/quic_config.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/listener/v3/udp_listener_config.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/metrics/v3/metrics_service.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/metrics/v3/stats.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/overload/v3/overload.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/rbac/v3/rbac.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/route/v3/route.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/route/v3/route_components.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/route/v3/scoped_route.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/tap/v3/common.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/datadog.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/dynamic_ot.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/http_tracer.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/lightstep.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/opencensus.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/opentelemetry.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/service.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/skywalking.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/trace.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/xray.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/config/trace/v3/zipkin.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/data/accesslog/v3/accesslog.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/clusters/aggregate/v3/cluster.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/filters/common/fault/v3/fault.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/filters/http/fault/v3/fault.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/filters/http/rbac/v3/rbac.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/filters/http/router/v3/router.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/filters/http/stateful_session/v3/stateful_session.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/http/stateful_session/cookie/v3/cookie.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/transport_sockets/tls/v3/cert.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/transport_sockets/tls/v3/common.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/transport_sockets/tls/v3/secret.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/transport_sockets/tls/v3/tls.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/extensions/transport_sockets/tls/v3/tls_spiffe_validator_config.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/service/discovery/v3/ads.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/service/discovery/v3/discovery.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/service/load_stats/v3/lrs.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/service/status/v3/csds.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/http/v3/cookie.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/http/v3/path_transformation.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/filter_state.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/http_inputs.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/metadata.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/node.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/number.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/path.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/regex.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/status_code_input.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/string.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/struct.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/value.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/metadata/v3/metadata.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/tracing/v3/custom_tag.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/v3/hash_policy.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/v3/http.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/v3/http_status.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/v3/percent.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/v3/range.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/v3/ratelimit_strategy.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/v3/ratelimit_unit.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/v3/semantic_version.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/envoy/type/v3/token_bucket.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/api/annotations.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/api/expr/v1alpha1/checked.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/api/expr/v1alpha1/syntax.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/api/http.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/api/httpbody.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/protobuf/any.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/protobuf/descriptor.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/protobuf/duration.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/protobuf/empty.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/protobuf/struct.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/protobuf/timestamp.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/protobuf/wrappers.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/google/rpc/status.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/opencensus/proto/trace/v1/trace_config.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/src/proto/grpc/lookup/v1/rls_config.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/udpa/annotations/migrate.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/udpa/annotations/security.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/udpa/annotations/sensitive.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/udpa/annotations/status.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/udpa/annotations/versioning.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/validate/validate.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/annotations/v3/migrate.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/annotations/v3/security.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/annotations/v3/sensitive.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/annotations/v3/status.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/annotations/v3/versioning.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/core/v3/authority.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/core/v3/cidr.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/core/v3/collection_entry.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/core/v3/context_params.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/core/v3/extension.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/core/v3/resource.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/core/v3/resource_locator.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/core/v3/resource_name.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/matcher/v3/cel.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/matcher/v3/domain.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/matcher/v3/http_inputs.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/matcher/v3/ip.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/matcher/v3/matcher.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/matcher/v3/range.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/matcher/v3/regex.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/matcher/v3/string.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/v3/cel.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/v3/range.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upbdefs-generated/xds/type/v3/typed_struct.upbdefs.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/certificate_provider_store.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/file_watcher_certificate_provider_factory.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_api.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_audit_logger_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_bootstrap.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_bootstrap_grpc.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_certificate_provider.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_channel_stack_modifier.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_client_grpc.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_client_stats.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_cluster.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_cluster_specifier_plugin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_common_types.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_health_status.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_http_fault_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_http_filters.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_http_rbac_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_http_stateful_session_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_lb_policy_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_listener.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_route_config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_routing.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_server_config_fetcher.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/xds/xds_transport_grpc.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/address_utils/parse_address.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/address_utils/sockaddr_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/backoff/backoff.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/backoff/random_early_detection.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/call_tracer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_args.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_args_preconditioning.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_stack.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_stack_builder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_stack_builder_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channelz.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channelz_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/connected_channel.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/promise_based_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/server_call_tracer_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/status_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/compression/compression.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/compression/compression_internal.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/compression/message_compress.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/config/core_configuration.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/event_log.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/histogram_view.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/stats.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/stats_data.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/ares_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/cf_engine/cf_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/cf_engine/cfstream_endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/cf_engine/dns_service_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/channel_args_endpoint_config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/default_event_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/default_event_engine_factory.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/event_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/forkable.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/memory_allocator.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/ev_epoll1_linux.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/ev_poll_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/event_poller_posix_default.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/internal_errqueue.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/lockfree_event.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/posix_endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/posix_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/posix_engine_listener.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/posix_engine_listener_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/tcp_socket_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/timer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/timer_heap.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/timer_manager.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/traced_buffer_list.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/wakeup_fd_eventfd.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/wakeup_fd_pipe.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/wakeup_fd_posix_default.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/resolved_address.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/shim.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/slice.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/slice_buffer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/tcp_socket_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/thread_pool/thread_count.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/thread_pool/thread_pool_factory.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/thread_pool/work_stealing_thread_pool.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/thready_event_engine/thready_event_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/time_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/iocp.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/win_socket.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/windows_endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/windows_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/windows_listener.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/work_queue/basic_work_queue.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/experiments/config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/experiments/experiments.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/load_file.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/per_cpu.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/ref_counted_string.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/status_helper.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/time.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/time_averaged_stats.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/validation_errors.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/work_serializer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/handshaker/proxy_mapper_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/http/format_request.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/http/httpcli.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/http/httpcli_security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/http/parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/buffer_list.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/call_combiner.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/cfstream_handle.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/closure.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/combiner.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/dualstack_socket_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_cfstream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_pair_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_pair_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/error.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/error_cfstream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_apple.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_epoll1_linux.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_poll_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/event_engine_shims/closure.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/event_engine_shims/endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/event_engine_shims/tcp_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/exec_ctx.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/executor.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/fork_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/fork_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/gethostname_fallback.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/gethostname_host_name_max.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/gethostname_sysconf.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/grpc_if_nametoindex_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/grpc_if_nametoindex_unsupported.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/internal_errqueue.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iocp_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_internal.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_posix_cfstream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/load_file.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/lockfree_event.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/polling_entity.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_set.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_set_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/sockaddr_utils_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_factory_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_mutator.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_common_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_linux.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/systemd_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_cfstream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_utils_posix_common.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/timer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_generic.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_heap.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_manager.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/unix_sockets_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/unix_sockets_posix_noop.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/vsock.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_eventfd.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_nospecial.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_pipe.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/json/json_object_loader.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/json/json_reader.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/json/json_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/json/json_writer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/load_balancing/lb_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/load_balancing/lb_policy_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/matchers/matchers.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/promise/activity.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/promise/party.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/promise/sleep.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/promise/trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resolver/resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resolver/resolver_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resolver/server_address.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/api.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/arena.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/memory_quota.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/periodic_update.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/resource_quota.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/thread_quota.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/audit_logging.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/authorization_policy_provider_vtable.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/evaluate_args.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/grpc_authorization_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/grpc_server_authz_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/matchers.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/rbac_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/stdout_logger.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/certificate_provider/certificate_provider_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/context/security_context.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/alts_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment_linux.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment_no_op.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/grpc_alts_credentials_client_options.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/grpc_alts_credentials_options.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/grpc_alts_credentials_server_options.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/call_creds_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/channel_creds_registry_init.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/composite/composite_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/external/aws_external_account_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/external/aws_request_signer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/external/external_account_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/external/file_external_account_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/external/url_external_account_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/fake/fake_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/google_default/credentials_generic.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/google_default/google_default_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/iam/iam_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/insecure/insecure_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/jwt/json_token.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/jwt/jwt_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/jwt/jwt_verifier.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/local/local_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/oauth2/oauth2_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/plugin/plugin_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/ssl/ssl_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/grpc_tls_certificate_distributor.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/grpc_tls_certificate_match.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/grpc_tls_certificate_provider.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/grpc_tls_certificate_verifier.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/tls_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/tls_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/xds/xds_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/alts/alts_security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/fake/fake_security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/insecure/insecure_security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/load_system_roots_fallback.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/load_system_roots_supported.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/local/local_security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/ssl/ssl_security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/ssl_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/tls/tls_security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/client_auth_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/secure_endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/security_handshaker.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/server_auth_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/tsi_error.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/util/json_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/service_config/service_config_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/service_config/service_config_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/b64.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/percent_encoding.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/slice.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/slice_buffer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/slice_refcount.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/slice_string_helpers.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/api_trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/builtins.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/byte_buffer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/byte_buffer_reader.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/call.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/call_details.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/call_log_batch.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/call_trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/channel.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/channel_init.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/channel_ping.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/channel_stack_type.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/completion_queue.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/completion_queue_factory.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/event_string.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/init.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/init_internally.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/lame_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/metadata_array.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/server.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/validate_metadata.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/version.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/batch_builder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/bdp_estimator.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/connectivity_state.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/error_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/handshaker.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/handshaker_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/http_connect_handshaker.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/metadata_batch.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/parsed_metadata.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/pid_controller.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/status_conversion.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/tcp_connect_handshaker.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/timeout_encoding.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/transport.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/transport_op_string.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/uri/uri_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/plugin_registry/grpc_plugin_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/plugin_registry/grpc_plugin_registry_extra.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/crypt/aes_gcm.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/crypt/gsec.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_counter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_crypter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_frame_protector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_record_protocol_crypter_common.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_seal_privacy_integrity_crypter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_unseal_privacy_integrity_crypter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/frame_handler.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/alts_handshaker_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/alts_shared_resource.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/alts_tsi_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/transport_security_common_api.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_integrity_only_record_protocol.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_privacy_integrity_record_protocol.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_record_protocol_common.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_iovec_record_protocol.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_zero_copy_grpc_protector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/fake_transport_security.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/local_transport_security.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/ssl/key_logging/ssl_key_logging.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/ssl/session_cache/ssl_session_boringssl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/ssl/session_cache/ssl_session_cache.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/ssl/session_cache/ssl_session_openssl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/ssl_transport_security.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/ssl_transport_security_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/transport_security.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/transport_security_grpc.cc
+)
+
+target_compile_features(grpc PUBLIC cxx_std_14)
+
+target_include_directories(grpc
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(grpc
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  ${_gRPC_RE2_LIBRARIES}
+  upb_json_lib
+  upb_textformat_lib
+  ${_gRPC_ZLIB_LIBRARIES}
+  absl::algorithm_container
+  absl::cleanup
+  absl::flat_hash_map
+  absl::flat_hash_set
+  absl::inlined_vector
+  absl::bind_front
+  absl::function_ref
+  absl::hash
+  absl::type_traits
+  absl::random_bit_gen_ref
+  absl::random_distributions
+  absl::statusor
+  absl::span
+  absl::utility
+  ${_gRPC_CARES_LIBRARIES}
+  gpr
+  ${_gRPC_SSL_LIBRARIES}
+  ${_gRPC_ADDRESS_SORTING_LIBRARIES}
+)
+if(_gRPC_PLATFORM_IOS OR _gRPC_PLATFORM_MAC)
+  target_link_libraries(grpc "-framework CoreFoundation")
+endif()
+
+add_library(grpc_unsecure
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/backend_metrics/backend_metric_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/census/grpc_context.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/channel_idle/channel_idle_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/channel_idle/idle_filter_state.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/backend_metric.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/backup_poller.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/channel_connectivity.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_channelz.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_factory.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_plugin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_service_config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/config_selector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/dynamic_filters.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/global_subchannel_pool.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/http_proxy_mapper.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/priority/priority.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/local_subchannel_pool.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/binder/binder_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/dns_resolver_plugin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/event_engine/event_engine_client_channel_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/event_engine/service_config_helper.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/polling_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/retry_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/retry_filter_legacy_call_data.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/retry_service_config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/retry_throttle.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/service_config_channel_arg_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/subchannel.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/subchannel_pool_interface.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/subchannel_stream_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/deadline/deadline_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/fault_injection/fault_injection_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/fault_injection/fault_injection_service_config_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/client/http_client_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/client_authority_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/http_filters_plugin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/message_compress/compression_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/http/server/http_server_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/filters/message_size/message_size_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/client/chttp2_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/server/chttp2_server.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/bin_decoder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/bin_encoder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/chttp2_transport.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/decode_huff.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/flow_control.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_data.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_goaway.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_ping.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_settings.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_window_update.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_encoder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_encoder_table.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_parse_result.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_parser_table.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/http2_settings.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/http_trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/huffsyms.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/max_concurrent_streams_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/parsing.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/ping_abuse_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/ping_callbacks.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/ping_rate_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/stream_lists.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/varint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/write_size_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/writing.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/inproc/inproc_plugin.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/inproc/inproc_transport.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/api/annotations.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/api/http.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/any.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/descriptor.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/duration.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/empty.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/struct.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/timestamp.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/wrappers.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/rpc/status.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/gcp/altscontext.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/gcp/handshaker.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/gcp/transport_security_common.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/health/v1/health.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/lb/v1/load_balancer.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/lookup/v1/rls.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/validate/validate.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/data/orca/v3/orca_load_report.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/xds/service/orca/v3/orca.upb.c
+  ${_gRPC_SOURCE_DIR}/src/core/lib/address_utils/parse_address.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/address_utils/sockaddr_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/backoff/backoff.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/backoff/random_early_detection.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/call_tracer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_args.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_args_preconditioning.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_stack.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_stack_builder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_stack_builder_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channel_trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channelz.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/channelz_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/connected_channel.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/promise_based_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/server_call_tracer_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/channel/status_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/compression/compression.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/compression/compression_internal.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/compression/message_compress.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/config/core_configuration.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/event_log.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/histogram_view.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/stats.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/stats_data.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/debug/trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/ares_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/cf_engine/cf_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/cf_engine/cfstream_endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/cf_engine/dns_service_resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/channel_args_endpoint_config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/default_event_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/default_event_engine_factory.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/event_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/forkable.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/memory_allocator.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/ev_epoll1_linux.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/ev_poll_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/event_poller_posix_default.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/internal_errqueue.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/lockfree_event.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/posix_endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/posix_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/posix_engine_listener.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/posix_engine_listener_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/tcp_socket_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/timer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/timer_heap.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/timer_manager.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/traced_buffer_list.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/wakeup_fd_eventfd.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/wakeup_fd_pipe.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/posix_engine/wakeup_fd_posix_default.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/resolved_address.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/shim.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/slice.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/slice_buffer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/tcp_socket_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/thread_pool/thread_count.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/thread_pool/thread_pool_factory.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/thread_pool/work_stealing_thread_pool.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/thready_event_engine/thready_event_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/time_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/iocp.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/win_socket.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/windows_endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/windows_engine.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/windows/windows_listener.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/event_engine/work_queue/basic_work_queue.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/experiments/config.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/experiments/experiments.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/load_file.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/per_cpu.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/ref_counted_string.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/status_helper.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/time.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/time_averaged_stats.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/validation_errors.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/gprpp/work_serializer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/handshaker/proxy_mapper_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/http/format_request.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/http/httpcli.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/http/parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/buffer_list.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/call_combiner.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/cfstream_handle.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/closure.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/combiner.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/dualstack_socket_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_cfstream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_pair_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_pair_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/error.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/error_cfstream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_apple.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_epoll1_linux.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_poll_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/event_engine_shims/closure.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/event_engine_shims/endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/event_engine_shims/tcp_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/exec_ctx.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/executor.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/fork_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/fork_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/gethostname_fallback.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/gethostname_host_name_max.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/gethostname_sysconf.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/grpc_if_nametoindex_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/grpc_if_nametoindex_unsupported.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/internal_errqueue.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iocp_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_internal.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_posix_cfstream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/load_file.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/lockfree_event.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/polling_entity.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_set.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_set_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/sockaddr_utils_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_factory_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_mutator.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_common_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_linux.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/systemd_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_cfstream.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_utils_posix_common.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/timer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_generic.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_heap.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_manager.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/unix_sockets_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/unix_sockets_posix_noop.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/vsock.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_eventfd.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_nospecial.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_pipe.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/json/json_object_loader.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/json/json_reader.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/json/json_writer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/load_balancing/lb_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/load_balancing/lb_policy_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/promise/activity.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/promise/party.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/promise/sleep.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/promise/trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resolver/resolver.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resolver/resolver_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resolver/server_address.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/api.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/arena.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/memory_quota.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/periodic_update.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/resource_quota.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/thread_quota.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/resource_quota/trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/authorization_policy_provider_vtable.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/evaluate_args.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/authorization/grpc_server_authz_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/certificate_provider/certificate_provider_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/context/security_context.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment_linux.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment_no_op.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment_windows.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/grpc_alts_credentials_client_options.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/grpc_alts_credentials_options.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/grpc_alts_credentials_server_options.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/call_creds_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/composite/composite_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/fake/fake_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/insecure/insecure_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/plugin/plugin_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/tls_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/fake/fake_security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/insecure/insecure_security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/load_system_roots_fallback.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/load_system_roots_supported.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/security_connector/security_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/client_auth_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/secure_endpoint.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/security_handshaker.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/server_auth_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/transport/tsi_error.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/security/util/json_util.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/service_config/service_config_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/service_config/service_config_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/b64.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/percent_encoding.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/slice.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/slice_buffer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/slice_refcount.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/slice/slice_string_helpers.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/api_trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/builtins.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/byte_buffer.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/byte_buffer_reader.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/call.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/call_details.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/call_log_batch.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/call_trace.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/channel.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/channel_init.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/channel_ping.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/channel_stack_type.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/completion_queue.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/completion_queue_factory.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/event_string.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/init.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/init_internally.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/lame_client.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/metadata_array.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/server.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/validate_metadata.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/surface/version.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/batch_builder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/bdp_estimator.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/connectivity_state.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/error_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/handshaker.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/handshaker_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/http_connect_handshaker.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/metadata_batch.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/parsed_metadata.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/pid_controller.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/status_conversion.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/tcp_connect_handshaker.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/timeout_encoding.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/transport.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/transport/transport_op_string.cc
+  ${_gRPC_SOURCE_DIR}/src/core/lib/uri/uri_parser.cc
+  ${_gRPC_SOURCE_DIR}/src/core/plugin_registry/grpc_plugin_registry.cc
+  ${_gRPC_SOURCE_DIR}/src/core/plugin_registry/grpc_plugin_registry_noextra.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/transport_security_common_api.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/fake_transport_security.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/local_transport_security.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/transport_security.cc
+  ${_gRPC_SOURCE_DIR}/src/core/tsi/transport_security_grpc.cc
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/message/accessors.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/build_enum.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/decode.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/internal/base92.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/link.c
+  ${gRPC_ADDITIONAL_DLL_SRC}
+)
+
+target_compile_features(grpc_unsecure PUBLIC cxx_std_14)
+
+target_include_directories(grpc_unsecure
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(grpc_unsecure
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  upb_collections_lib
+  upb
+  ${_gRPC_ZLIB_LIBRARIES}
+  absl::algorithm_container
+  absl::cleanup
+  absl::flat_hash_map
+  absl::flat_hash_set
+  absl::inlined_vector
+  absl::bind_front
+  absl::function_ref
+  absl::hash
+  absl::type_traits
+  absl::random_bit_gen_ref
+  absl::random_distributions
+  absl::statusor
+  absl::span
+  absl::utility
+  ${_gRPC_CARES_LIBRARIES}
+  gpr
+  ${_gRPC_ADDRESS_SORTING_LIBRARIES}
+)
+if(_gRPC_PLATFORM_IOS OR _gRPC_PLATFORM_MAC)
+  target_link_libraries(grpc_unsecure "-framework CoreFoundation")
+endif()
+
+add_library(upb
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/base/status.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/collections/array.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/collections/map.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/collections/map_sorter.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/hash/common.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/lex/atoi.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/lex/round_trip.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/lex/strtod.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/lex/unicode.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mem/alloc.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mem/arena.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/message/message.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_table/extension_registry.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_table/internal/message.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_table/message.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/wire/decode.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/wire/decode_fast.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/wire/encode.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/wire/eps_copy_input_stream.c
+   ${_gRPC_SOURCE_DIR}/third_party/upb/upb/wire/reader.c
+)
+
+target_compile_features(upb PUBLIC cxx_std_14)
+
+target_include_directories(upb
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(upb
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  utf8_range_lib
+)
+
+
+add_library(upb_collections_lib
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/base/status.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/collections/array.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/collections/map.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/collections/map_sorter.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/hash/common.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mem/alloc.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mem/arena.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/message/message.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_table/extension_registry.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_table/internal/message.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_table/message.c
+)
+
+target_compile_features(upb_collections_lib PUBLIC cxx_std_14)
+
+target_include_directories(upb_collections_lib
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(upb_collections_lib
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+)
+
+
+
+add_library(upb_json_lib
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/descriptor.upb.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/json/decode.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/json/encode.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/message/accessors.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/build_enum.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/decode.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/internal/base92.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/internal/encode.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/link.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/def_builder.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/def_pool.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/def_type.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/desc_state.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/enum_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/enum_reserved_range.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/enum_value_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/extension_range.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/field_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/file_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/message.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/message_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/message_reserved_range.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/method_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/oneof_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/service_def.c
+)
+
+target_compile_features(upb_json_lib PUBLIC cxx_std_14)
+
+target_include_directories(upb_json_lib
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(upb_json_lib
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  upb_collections_lib
+  upb
+)
+
+
+add_library(upb_textformat_lib
+  ${_gRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/descriptor.upb.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/message/accessors.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/build_enum.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/decode.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/internal/base92.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/internal/encode.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/mini_descriptor/link.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/def_builder.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/def_pool.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/def_type.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/desc_state.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/enum_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/enum_reserved_range.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/enum_value_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/extension_range.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/field_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/file_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/message.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/message_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/message_reserved_range.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/method_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/oneof_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/reflection/service_def.c
+  ${_gRPC_SOURCE_DIR}/third_party/upb/upb/text/encode.c
+)
+
+target_compile_features(upb_textformat_lib PUBLIC cxx_std_14)
+
+target_include_directories(upb_textformat_lib
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(upb_textformat_lib
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  upb_collections_lib
+  upb
+)
+
+
+add_library(utf8_range_lib
+  ${_gRPC_SOURCE_DIR}/third_party/utf8_range/naive.c
+  ${_gRPC_SOURCE_DIR}/third_party/utf8_range/range2-neon.c
+  ${_gRPC_SOURCE_DIR}/third_party/utf8_range/range2-sse.c
+)
+
+target_compile_features(utf8_range_lib PUBLIC cxx_std_14)
+
+target_include_directories(utf8_range_lib
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(utf8_range_lib
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+)
+
+
+add_library(grpc++
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/client/binder_connector.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/client/channel_create.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/client/channel_create_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/client/connection_id_generator.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/client/endpoint_binder_pool.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/client/jni_utils.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/client/security_policy_setting.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/security_policy/binder_security_policy.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/server/binder_server.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/server/binder_server_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/transport/binder_transport.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/utils/ndk_binder.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/utils/transport_stream_receiver_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/wire_format/binder_android.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/wire_format/binder_constants.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/wire_format/transaction.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/wire_format/wire_reader_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/core/ext/transport/binder/wire_format/wire_writer.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/channel_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/client_callback.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/client_context.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/client_interceptor.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/client_stats_interceptor.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/create_channel.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/create_channel_internal.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/create_channel_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/insecure_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/secure_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/xds_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/alarm.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/auth_property_iterator.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/channel_arguments.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/channel_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/completion_queue_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/resource_quota_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/rpc_method.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/secure_auth_context.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/secure_channel_arguments.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/secure_create_auth_context.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/tls_certificate_provider.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/tls_certificate_verifier.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/tls_credentials_options.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/validate_service_config.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/version_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/async_generic_service.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/backend_metric_recorder.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/channel_argument_option.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/create_default_thread_pool.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/external_connection_acceptor_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/health/default_health_check_service.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/health/health_check_service.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/health/health_check_service_server_builder_option.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/insecure_server_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/secure_server_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_builder.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_callback.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_context.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/xds_server_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/thread_manager/thread_manager.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/util/byte_buffer_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/util/status.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/util/string_ref.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/util/time_cc.cc
+  ${gRPC_UPB_GEN_DUPL_SRC}
+)
+
+target_compile_features(grpc++ PUBLIC cxx_std_14)
+
+target_include_directories(grpc++
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(grpc++
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  grpc
+  ${_gRPC_PROTOBUF_LIBRARIES}
+)
+
+add_library(grpc++_unsecure
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/channel_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/client_callback.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/client_context.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/client_interceptor.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/client_stats_interceptor.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/create_channel.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/create_channel_internal.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/create_channel_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/client/insecure_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/alarm.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/channel_arguments.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/channel_filter.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/completion_queue_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/insecure_create_auth_context.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/resource_quota_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/rpc_method.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/validate_service_config.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/common/version_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/async_generic_service.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/backend_metric_recorder.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/channel_argument_option.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/create_default_thread_pool.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/external_connection_acceptor_impl.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/health/default_health_check_service.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/health/health_check_service.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/health/health_check_service_server_builder_option.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/insecure_server_credentials.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_builder.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_callback.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_context.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/server/server_posix.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/thread_manager/thread_manager.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/util/byte_buffer_cc.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/util/status.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/util/string_ref.cc
+  ${_gRPC_SOURCE_DIR}/src/cpp/util/time_cc.cc
+  ${gRPC_UPB_GEN_DUPL_SRC}
+)
+
+target_compile_features(grpc++_unsecure PUBLIC cxx_std_14)
+
+target_include_directories(grpc++_unsecure
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(grpc++_unsecure
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  grpc_unsecure
+  ${_gRPC_PROTOBUF_LIBRARIES}
+)
+
+add_library(grpc_plugin_support
+  ${_gRPC_SOURCE_DIR}/src/compiler/cpp_generator.cc
+  ${_gRPC_SOURCE_DIR}/src/compiler/csharp_generator.cc
+  ${_gRPC_SOURCE_DIR}/src/compiler/node_generator.cc
+  ${_gRPC_SOURCE_DIR}/src/compiler/objective_c_generator.cc
+  ${_gRPC_SOURCE_DIR}/src/compiler/php_generator.cc
+  ${_gRPC_SOURCE_DIR}/src/compiler/proto_parser_helper.cc
+  ${_gRPC_SOURCE_DIR}/src/compiler/python_generator.cc
+  ${_gRPC_SOURCE_DIR}/src/compiler/ruby_generator.cc
+)
+
+target_compile_features(grpc_plugin_support PUBLIC cxx_std_14)
+
+target_include_directories(grpc_plugin_support
+  PUBLIC ${_gRPC_SOURCE_DIR}/include
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+target_link_libraries(grpc_plugin_support
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  ${_gRPC_PROTOBUF_LIBRARIES}
+  ${_gRPC_PROTOBUF_PROTOC_LIBRARIES}
+)
+
+
+add_executable(grpc_cpp_plugin
+  ${_gRPC_SOURCE_DIR}/src/compiler/cpp_plugin.cc
+)
+target_compile_features(grpc_cpp_plugin PUBLIC cxx_std_14)
+target_include_directories(grpc_cpp_plugin
+  PRIVATE
+    ${_gRPC_SOURCE_DIR}
+    ${_gRPC_SOURCE_DIR}/include
+    ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR}
+    ${_gRPC_RE2_INCLUDE_DIR}
+    ${_gRPC_SSL_INCLUDE_DIR}
+    ${_gRPC_UPB_GENERATED_DIR}
+    ${_gRPC_UPB_GRPC_GENERATED_DIR}
+    ${_gRPC_UPB_INCLUDE_DIR}
+    ${_gRPC_XXHASH_INCLUDE_DIR}
+    ${_gRPC_ZLIB_INCLUDE_DIR}
+)
+
+target_link_libraries(grpc_cpp_plugin
+  ${_gRPC_ALLTARGETS_LIBRARIES}
+  grpc_plugin_support
+)

From b117d8ed12f2f3839a03fc4882f5228ff7d010b5 Mon Sep 17 00:00:00 2001
From: Mikhail Artemenko <michicosun@ya.ru>
Date: Sun, 19 Nov 2023 14:28:33 +0000
Subject: [PATCH 0763/1097] create interpreter with changed
 select_query_options

---
 src/Interpreters/InterpreterSelectQueryAnalyzer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
index abcc2031047..eed9d03ab5a 100644
--- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
+++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
@@ -209,7 +209,7 @@ Block InterpreterSelectQueryAnalyzer::getSampleBlock(const QueryTreeNodePtr & qu
 {
     auto select_query_options_copy = select_query_options;
     select_query_options_copy.only_analyze = true;
-    InterpreterSelectQueryAnalyzer interpreter(query_tree, context, select_query_options);
+    InterpreterSelectQueryAnalyzer interpreter(query_tree, context, select_query_options_copy);
 
     return interpreter.getSampleBlock();
 }

From 3213443ee2a53699f341b72916ca6011fe6f87cd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 19 Nov 2023 15:42:35 +0100
Subject: [PATCH 0764/1097] Own CMake for GRPC

---
 cmake/limit_jobs.cmake        |  4 ++--
 contrib/grpc-cmake/grpc.cmake | 10 ----------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake
index a43e208ff0d..9d693f65528 100644
--- a/cmake/limit_jobs.cmake
+++ b/cmake/limit_jobs.cmake
@@ -21,7 +21,7 @@ if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY)
         set (PARALLEL_COMPILE_JOBS 1)
     endif ()
     if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        message(INFO "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
+        message(INFORMATION "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
     endif()
 endif ()
 
@@ -32,7 +32,7 @@ if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY)
         set (PARALLEL_LINK_JOBS 1)
     endif ()
     if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        message(INFO "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
+        message(INFORMATION "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
     endif()
 endif ()
 
diff --git a/contrib/grpc-cmake/grpc.cmake b/contrib/grpc-cmake/grpc.cmake
index 43d4edd191e..c2488539211 100644
--- a/contrib/grpc-cmake/grpc.cmake
+++ b/contrib/grpc-cmake/grpc.cmake
@@ -55,16 +55,6 @@ if(UNIX)
   endif()
 endif()
 
-if(UNIX AND NOT HAIKU)
-  # -pthread does more than -lpthread
-  set(THREADS_PREFER_PTHREAD_FLAG ON)
-  find_package(Threads)
-  set(_gRPC_ALLTARGETS_LIBRARIES ${CMAKE_DL_LIBS} Threads::Threads)
-  if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_POSIX)
-    set(_gRPC_ALLTARGETS_LIBRARIES ${_gRPC_ALLTARGETS_LIBRARIES} rt)
-  endif()
-endif()
-
 set(_gRPC_ADDRESS_SORTING_INCLUDE_DIR "${_gRPC_SOURCE_DIR}/third_party/address_sorting/include")
 set(_gRPC_ADDRESS_SORTING_LIBRARIES address_sorting)
 

From 907f168e0dea4df5ac305fecf63c9c9a3820e433 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 19 Nov 2023 15:43:09 +0100
Subject: [PATCH 0765/1097] Own CMake for GRPC

---
 cmake/limit_jobs.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake
index 9d693f65528..8e48fc9b9d8 100644
--- a/cmake/limit_jobs.cmake
+++ b/cmake/limit_jobs.cmake
@@ -21,7 +21,7 @@ if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY)
         set (PARALLEL_COMPILE_JOBS 1)
     endif ()
     if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        message(INFORMATION "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
+        message("The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
     endif()
 endif ()
 
@@ -32,7 +32,7 @@ if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY)
         set (PARALLEL_LINK_JOBS 1)
     endif ()
     if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        message(INFORMATION "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
+        message("The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
     endif()
 endif ()
 

From cf4621444042c9dde066ddfe5b1dc50af04e2a8b Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sun, 19 Nov 2023 15:09:52 +0000
Subject: [PATCH 0766/1097] Always send fatal level logs

---
 programs/client/Client.cpp     |  1 -
 programs/local/LocalServer.cpp | 19 ++++---------------
 src/Client/ClientBase.h        |  2 --
 3 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index d29824581fa..8029f97a992 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1241,7 +1241,6 @@ void Client::processConfig()
             global_context->setCurrentQueryId(query_id);
     }
     print_stack_trace = config().getBool("stacktrace", false);
-    logging_initialized = true;
 
     if (config().has("multiquery"))
         is_multiquery = true;
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index f3b551b08d2..9a3f948a01c 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -563,9 +563,6 @@ catch (...)
 
 void LocalServer::updateLoggerLevel(const String & logs_level)
 {
-    if (!logging_initialized)
-        return;
-
     config().setString("logger.level", logs_level);
     updateLevels(config(), logger());
 }
@@ -607,21 +604,13 @@ void LocalServer::processConfig()
         Poco::AutoPtr<OwnPatternFormatter> pf = new OwnPatternFormatter;
         Poco::AutoPtr<OwnFormattingChannel> log = new OwnFormattingChannel(pf, new Poco::SimpleFileChannel(server_logs_file));
         Poco::Logger::root().setChannel(log);
-        logging_initialized = true;
-    }
-    else if (logging || is_interactive)
-    {
-        config().setString("logger", "logger");
-        auto log_level_default = is_interactive && !logging ? "fatal" : level;
-        config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default)));
-        buildLoggers(config(), logger(), "clickhouse-local");
-        logging_initialized = true;
     }
     else
     {
-        Poco::Logger::root().setLevel("none");
-        Poco::Logger::root().setChannel(Poco::AutoPtr<Poco::NullChannel>(new Poco::NullChannel()));
-        logging_initialized = false;
+        config().setString("logger", "logger");
+        auto log_level_default = logging ? level : "fatal";
+        config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default)));
+        buildLoggers(config(), logger(), "clickhouse-local");
     }
 
     shared_context = Context::createShared();
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index 2156aae7181..9fde23cf775 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -321,8 +321,6 @@ protected:
     bool allow_merge_tree_settings = false;
 
     bool cancelled = false;
-
-    bool logging_initialized = false;
 };
 
 }

From 60a17ee397bf98e07ece79bef06d6b025bc0dfe0 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 Nov 2023 15:27:59 +0000
Subject: [PATCH 0767/1097] Fix build

---
 src/Backups/BackupIO_S3.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 76b9fba7b83..ea3f57c27ff 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -253,7 +253,6 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
 {
     LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
     copyS3File(
-        client,
         client,
         /* src_bucket */ s3_uri.bucket,
         /* src_key= */ fs::path(s3_uri.key) / source,

From 4fc658cd1fe751e082ed4cf5bbda581c1b5a145a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 19 Nov 2023 16:31:03 +0100
Subject: [PATCH 0768/1097] Fix build

---
 src/Backups/BackupIO_S3.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 76b9fba7b83..ea3f57c27ff 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -253,7 +253,6 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
 {
     LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
     copyS3File(
-        client,
         client,
         /* src_bucket */ s3_uri.bucket,
         /* src_key= */ fs::path(s3_uri.key) / source,

From 62a87665c551f4efa9ae5b98000604e64590e79e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 19 Nov 2023 16:31:03 +0100
Subject: [PATCH 0769/1097] Fix build

---
 src/Backups/BackupIO_S3.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 76b9fba7b83..ea3f57c27ff 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -253,7 +253,6 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
 {
     LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
     copyS3File(
-        client,
         client,
         /* src_bucket */ s3_uri.bucket,
         /* src_key= */ fs::path(s3_uri.key) / source,

From a3c9f13ac916ddecf74fba96d7c85cfe9c3f7468 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 Nov 2023 15:33:58 +0000
Subject: [PATCH 0770/1097] Add exclude for tryBase64Decode to backward compat
 test (follow-up to #56913)

Fixes #56969
---
 .../integration/test_backward_compatibility/test_functions.py  | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py
index 94771a624e2..b6b6ef28de5 100644
--- a/tests/integration/test_backward_compatibility/test_functions.py
+++ b/tests/integration/test_backward_compatibility/test_functions.py
@@ -153,6 +153,9 @@ def test_string_functions(start_cluster):
         # mandatory or optional). The former lib produces a value based on implicit padding, the latter lib throws an error.
         "FROM_BASE64",
         "base64Decode",
+        # PR #56913 (in v23.11) corrected the way tryBase64Decode() behaved with invalid inputs. Old versions return garbage, new versions
+        # return an empty string (as it was always documented).
+        "tryBase64Decode",
         # Removed in 23.9
         "meiliMatch",
     ]

From 0513c93829d8c6d5f3226d98593b325d2d68f9df Mon Sep 17 00:00:00 2001
From: Igor Nikonov <devcrafter@gmail.com>
Date: Sun, 19 Nov 2023 23:11:05 +0100
Subject: [PATCH 0771/1097] Prefer sccache to ccache by default

---
 cmake/ccache.cmake | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake
index e8bf856332a..0df70d82d2c 100644
--- a/cmake/ccache.cmake
+++ b/cmake/ccache.cmake
@@ -9,10 +9,10 @@ if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MA
     return()
 endif()
 
-set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (ccache, then sccache), 'ccache', 'sccache', or 'disabled'")
+set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (sccache, then ccache), 'ccache', 'sccache', or 'disabled'")
 
 if(COMPILER_CACHE STREQUAL "auto")
-    find_program (CCACHE_EXECUTABLE NAMES ccache sccache)
+    find_program (CCACHE_EXECUTABLE NAMES sccache ccache)
 elseif (COMPILER_CACHE STREQUAL "ccache")
     find_program (CCACHE_EXECUTABLE ccache)
 elseif(COMPILER_CACHE STREQUAL "sccache")
@@ -21,7 +21,7 @@ elseif(COMPILER_CACHE STREQUAL "disabled")
     message(STATUS "Using *ccache: no (disabled via configuration)")
     return()
 else()
-    message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|ccache|sccache|disabled), value: '${COMPILER_CACHE}'")
+    message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|sccache|ccache|disabled), value: '${COMPILER_CACHE}'")
 endif()
 
 
From 33df68cd0149a901812cfb0276a097c042ed800f Mon Sep 17 00:00:00 2001
From: Bharat Nallan Chakravarthy <bharatnc@gmail.com>
Date: Sun, 19 Nov 2023 14:48:21 -0800
Subject: [PATCH 0772/1097] update 02003_memory_limit_in_client.sh

---
 tests/queries/0_stateless/02003_memory_limit_in_client.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.sh b/tests/queries/0_stateless/02003_memory_limit_in_client.sh
index 2d2493828c8..4017c3771a6 100755
--- a/tests/queries/0_stateless/02003_memory_limit_in_client.sh
+++ b/tests/queries/0_stateless/02003_memory_limit_in_client.sh
@@ -1,4 +1,4 @@
-#!/usr/bin/bash -f
+#!/usr/bin/env bash
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From cfdb66389220d28caa1226f70df6e1c6b7ffe0de Mon Sep 17 00:00:00 2001
From: melvynator <melvyn.peignon@gmail.com>
Date: Mon, 20 Nov 2023 00:46:14 +0100
Subject: [PATCH 0773/1097] Remove a tab

---
 src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index 96247e9e1a7..a287b96fe51 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -549,7 +549,7 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory)
         if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "MaterializedPostgreSQL is an experimental table engine."
                                 " You can enable it with the `allow_experimental_materialized_postgresql_table` setting");
-	
+
         if (!args.storage_def->order_by && args.storage_def->primary_key)
             args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone());
 

From d385217012723ba7f380a5caf1348f429e25b14e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 20 Nov 2023 01:15:04 +0100
Subject: [PATCH 0774/1097] One step back

---
 base/glibc-compatibility/CMakeLists.txt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt
index c967fa5b11b..59f29093d5f 100644
--- a/base/glibc-compatibility/CMakeLists.txt
+++ b/base/glibc-compatibility/CMakeLists.txt
@@ -35,6 +35,13 @@ if (GLIBC_COMPATIBILITY)
 
     target_link_libraries(global-libs INTERFACE glibc-compatibility ${MEMCPY_LIBRARY})
 
+    # TODO: remove it
+    install(
+        TARGETS glibc-compatibility ${MEMCPY_LIBRARY}
+        EXPORT global
+        ARCHIVE DESTINATION lib
+    )
+
     message (STATUS "Some symbols from glibc will be replaced for compatibility")
 
 elseif (CLICKHOUSE_OFFICIAL_BUILD)

From b205d4919d8ce88e7776bcc0e6ff3de16460ec9f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 20 Nov 2023 01:24:28 +0100
Subject: [PATCH 0775/1097] Remove garbage

---
 CMakeLists.txt                            | 8 --------
 base/glibc-compatibility/CMakeLists.txt   | 7 -------
 cmake/darwin/default_libs.cmake           | 6 ------
 cmake/freebsd/default_libs.cmake          | 6 ------
 cmake/linux/default_libs.cmake            | 6 ------
 contrib/llvm-project-cmake/CMakeLists.txt | 3 +++
 6 files changed, 3 insertions(+), 33 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4fe7a1e05e7..2486b970c59 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -460,14 +460,6 @@ endif ()
 
 message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE}")
 
-include (GNUInstallDirs)
-
-# When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc.
-
-if (TARGET global-group)
-    install (EXPORT global DESTINATION cmake)
-endif ()
-
 add_subdirectory (contrib EXCLUDE_FROM_ALL)
 
 if (NOT ENABLE_JEMALLOC)
diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt
index 59f29093d5f..c967fa5b11b 100644
--- a/base/glibc-compatibility/CMakeLists.txt
+++ b/base/glibc-compatibility/CMakeLists.txt
@@ -35,13 +35,6 @@ if (GLIBC_COMPATIBILITY)
 
     target_link_libraries(global-libs INTERFACE glibc-compatibility ${MEMCPY_LIBRARY})
 
-    # TODO: remove it
-    install(
-        TARGETS glibc-compatibility ${MEMCPY_LIBRARY}
-        EXPORT global
-        ARCHIVE DESTINATION lib
-    )
-
     message (STATUS "Some symbols from glibc will be replaced for compatibility")
 
 elseif (CLICKHOUSE_OFFICIAL_BUILD)
diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake
index 42b8473cb75..cf0210d9b45 100644
--- a/cmake/darwin/default_libs.cmake
+++ b/cmake/darwin/default_libs.cmake
@@ -22,9 +22,3 @@ link_libraries(global-group)
 target_link_libraries(global-group INTERFACE
     $<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
 )
-
-# FIXME: remove when all contribs will get custom cmake lists
-install(
-    TARGETS global-group global-libs
-    EXPORT global
-)
diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake
index 65bf296ee09..1eeb1a872bd 100644
--- a/cmake/freebsd/default_libs.cmake
+++ b/cmake/freebsd/default_libs.cmake
@@ -25,9 +25,3 @@ link_libraries(global-group)
 target_link_libraries(global-group INTERFACE
     $<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
 )
-
-# FIXME: remove when all contribs will get custom cmake lists
-install(
-    TARGETS global-group global-libs
-    EXPORT global
-)
diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake
index 56a663a708e..8552097fa57 100644
--- a/cmake/linux/default_libs.cmake
+++ b/cmake/linux/default_libs.cmake
@@ -50,9 +50,3 @@ target_link_libraries(global-group INTERFACE
     $<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
     -Wl,--end-group
 )
-
-# FIXME: remove when all contribs will get custom cmake lists
-install(
-    TARGETS global-group global-libs
-    EXPORT global
-)
diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt
index d6133f145bc..406bac73e90 100644
--- a/contrib/llvm-project-cmake/CMakeLists.txt
+++ b/contrib/llvm-project-cmake/CMakeLists.txt
@@ -61,6 +61,9 @@ set (REQUIRED_LLVM_LIBRARIES
     LLVMDemangle
 )
 
+# Skip useless "install" instructions from CMake:
+set (LLVM_INSTALL_TOOLCHAIN_ONLY 1 CACHE INTERNAL "")
+
 if (ARCH_AMD64)
     set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "")
     list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen)

From f6e4c29669f85b15ae6c84938d5e5cc8ca2628d1 Mon Sep 17 00:00:00 2001
From: Peignon Melvyn <melvyn@clickhouse.com>
Date: Mon, 20 Nov 2023 01:49:17 +0100
Subject: [PATCH 0776/1097] MaterializedMysql doc

Add experimental flag for materializedMysql
---
 docs/en/engines/database-engines/materialized-mysql.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md
index b7e567c7b6c..f32698f84f6 100644
--- a/docs/en/engines/database-engines/materialized-mysql.md
+++ b/docs/en/engines/database-engines/materialized-mysql.md
@@ -7,7 +7,10 @@ sidebar_position: 70
 # [experimental] MaterializedMySQL 
 
 :::note
-This is an experimental feature that should not be used in production.
+This database engine is experimental. To use it, set `allow_experimental_database_materialized_mysql` to 1 in your configuration files or by using the `SET` command:
+```sql
+SET allow_experimental_database_materialized_mysql=1
+```
 :::
 
 Creates a ClickHouse database with all the tables existing in MySQL, and all the data in those tables. The ClickHouse server works as MySQL replica. It reads `binlog` and performs DDL and DML queries.

From 6dc3efb3f9aef0bf65360a1b9a1413e9e8ead71b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 20 Nov 2023 01:55:34 +0100
Subject: [PATCH 0777/1097] Fix build

---
 CMakeLists.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2486b970c59..063cfc77302 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,8 +21,11 @@ include (cmake/clang_tidy.cmake)
 include (cmake/git.cmake)
 include (cmake/utils.cmake)
 
+# This is needed to set up the CMAKE_INSTALL_BINDIR variable.
+include (GNUInstallDirs)
+
 # Ignore export() since we don't use it,
-# but it gets broken with a global targets via link_libraries()
+# but it gets broken with global targets via link_libraries()
 macro (export)
 endmacro ()
 

From ab5f3d12b63388f403ce7d1758076a47839a7b23 Mon Sep 17 00:00:00 2001
From: santrancisco <san.tran@ebfe.pw>
Date: Mon, 20 Nov 2023 12:51:30 +1100
Subject: [PATCH 0778/1097] Fix sqlite file path validation to make sure it
 does not skip validation on relative path

---
 src/Databases/SQLite/SQLiteUtils.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Databases/SQLite/SQLiteUtils.cpp b/src/Databases/SQLite/SQLiteUtils.cpp
index 152370050f1..4fe7c6f1707 100644
--- a/src/Databases/SQLite/SQLiteUtils.cpp
+++ b/src/Databases/SQLite/SQLiteUtils.cpp
@@ -26,10 +26,12 @@ void processSQLiteError(const String & message, bool throw_on_error)
 
 String validateSQLiteDatabasePath(const String & path, const String & user_files_path, bool need_check, bool throw_on_error)
 {
-    if (fs::path(path).is_relative())
-        return fs::absolute(fs::path(user_files_path) / path).lexically_normal();
-
+    
     String absolute_path = fs::absolute(path).lexically_normal();
+    
+    if (fs::path(path).is_relative())
+        absolute_path = fs::absolute(fs::path(user_files_path) / path).lexically_normal();
+
     String absolute_user_files_path = fs::absolute(user_files_path).lexically_normal();
 
     if (need_check && !absolute_path.starts_with(absolute_user_files_path))

From f05f572b3c34bd437218f23fc1140e30cf7a4c62 Mon Sep 17 00:00:00 2001
From: santrancisco <san.tran@ebfe.pw>
Date: Mon, 20 Nov 2023 13:37:27 +1100
Subject: [PATCH 0779/1097] Fixed trailing whitespace

---
 src/Databases/SQLite/SQLiteUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/SQLite/SQLiteUtils.cpp b/src/Databases/SQLite/SQLiteUtils.cpp
index 4fe7c6f1707..ddc2fb911e9 100644
--- a/src/Databases/SQLite/SQLiteUtils.cpp
+++ b/src/Databases/SQLite/SQLiteUtils.cpp
@@ -28,7 +28,7 @@ String validateSQLiteDatabasePath(const String & path, const String & user_files
 {
     
     String absolute_path = fs::absolute(path).lexically_normal();
-    
+
     if (fs::path(path).is_relative())
         absolute_path = fs::absolute(fs::path(user_files_path) / path).lexically_normal();
 

From 96c603ef97ce63a617637b0c1f45053ce0b5899c Mon Sep 17 00:00:00 2001
From: Chuan-Zheng Lee <chuan-zheng@kamiapp.com>
Date: Mon, 20 Nov 2023 17:17:27 +1300
Subject: [PATCH 0780/1097] [Docs] MaterializedPostgreSQL: Change DETACH to
 DETACH PERMANENTLY

If I'm not mistaken, ClickHouse/ClickHouse#35158 changed the syntax for dynamically removing tables from MaterializedPostgreSQL databases from `DETACH` to `DETACH PERMANENTLY`. Currently when just running `DETACH TABLE postgres_database.table_to_remove`, it shows an error:
```
DETACH TABLE not allowed, use DETACH PERMANENTLY. (NOT_IMPLEMENTED)
```

This adds the keyword `PERMANENTLY` to both places where `DETACH` occurs on the MaterializedPostgreSQL database engine page.
---
 docs/en/engines/database-engines/materialized-postgresql.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md
index 4e978947e36..3aa6dd01ea3 100644
--- a/docs/en/engines/database-engines/materialized-postgresql.md
+++ b/docs/en/engines/database-engines/materialized-postgresql.md
@@ -8,7 +8,7 @@ sidebar_position: 60
 
 Creates a ClickHouse database with tables from PostgreSQL database. Firstly, database with engine `MaterializedPostgreSQL` creates a snapshot of PostgreSQL database and loads required tables. Required tables can include any subset of tables from any subset of schemas from specified database. Along with the snapshot database engine acquires LSN and once initial dump of tables is performed - it starts pulling updates from WAL. After database is created, newly added tables to PostgreSQL database are not automatically added to replication. They have to be added manually with `ATTACH TABLE db.table` query.
 
-Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. In this case you should use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).
+Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. In this case you should use `ATTACH`/ `DETACH PERMANENTLY` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).
 
 :::note
 This database engine is experimental. To use it, set `allow_experimental_database_materialized_postgresql` to 1 in your configuration files or by using the `SET` command:
@@ -63,7 +63,7 @@ Before version 22.1, adding a table to replication left a non-removed temporary
 It is possible to remove specific tables from replication:
 
 ``` sql
-DETACH TABLE postgres_database.table_to_remove;
+DETACH TABLE postgres_database.table_to_remove PERMANENTLY;
 ```
 
 ## PostgreSQL schema {#schema}

From af0c954c9e14c23bad56670b5b4c626e32a08c57 Mon Sep 17 00:00:00 2001
From: Kevin Mingtarja <kevin.mingtarja@u.nus.edu>
Date: Mon, 20 Nov 2023 15:05:12 +0800
Subject: [PATCH 0781/1097] Add implicit constraint for CollapsingMergeTree
 sign column

---
 src/Storages/MergeTree/MergeTreeSettings.h    |  1 +
 .../MergeTree/registerStorageMergeTree.cpp    | 27 +++++++++++++---
 ...onstraints_for_collapsing_engine.reference |  2 ++
 ...olumn_constraints_for_collapsing_engine.sh | 31 +++++++++++++++++++
 4 files changed, 56 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference
 create mode 100644 tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 69307e74d1d..df1e8a17e18 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -84,6 +84,7 @@ struct Settings;
     M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
     M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0) \
+    M(Bool, add_implicit_sign_column_constraint_for_collapsing_engine, false, "If true, add implicit constraint for sign column for CollapsingMergeTree engine.", 0) \
     \
     /* Part removal settings. */ \
     M(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index f22d86499c2..9285dfcdd91 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -598,11 +598,6 @@ static StoragePtr create(const StorageFactory::Arguments & args)
                 metadata.projections.add(std::move(projection));
             }
 
-        auto constraints = metadata.constraints.getConstraints();
-        if (args.query.columns_list && args.query.columns_list->constraints)
-            for (auto & constraint : args.query.columns_list->constraints->children)
-                constraints.push_back(constraint);
-        metadata.constraints = ConstraintsDescription(constraints);
 
         auto column_ttl_asts = columns.getColumnTTLs();
         for (const auto & [name, ast] : column_ttl_asts)
@@ -620,6 +615,28 @@ static StoragePtr create(const StorageFactory::Arguments & args)
                 args.getLocalContext()->checkMergeTreeSettingsConstraints(initial_storage_settings, storage_settings->changes());
             metadata.settings_changes = args.storage_def->settings->ptr();
         }
+
+        auto constraints = metadata.constraints.getConstraints();
+        if (args.query.columns_list && args.query.columns_list->constraints)
+            for (auto & constraint : args.query.columns_list->constraints->children)
+                constraints.push_back(constraint);
+        if (merging_params.mode == MergeTreeData::MergingParams::Collapsing && storage_settings->add_implicit_sign_column_constraint_for_collapsing_engine)
+        {
+            auto sign_column_check_constraint = std::make_unique<ASTConstraintDeclaration>();
+            sign_column_check_constraint->name = "check_sign_column";
+            sign_column_check_constraint->type = ASTConstraintDeclaration::Type::CHECK;
+
+            Array valid_values_array;
+            valid_values_array.emplace_back(-1);
+            valid_values_array.emplace_back(1);
+
+            auto valid_values_ast = std::make_unique<ASTLiteral>(std::move(valid_values_array));
+            auto sign_column_ast = std::make_unique<ASTIdentifier>(merging_params.sign_column);
+            sign_column_check_constraint->set(sign_column_check_constraint->expr, makeASTFunction("in", std::move(sign_column_ast), std::move(valid_values_ast)));
+
+            constraints.push_back(std::move(sign_column_check_constraint));
+        }
+        metadata.constraints = ConstraintsDescription(constraints);
     }
     else
     {
diff --git a/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference
new file mode 100644
index 00000000000..5c6c001014d
--- /dev/null
+++ b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference
@@ -0,0 +1,2 @@
+1	2504	1
+ok
diff --git a/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh
new file mode 100644
index 00000000000..bee12afc511
--- /dev/null
+++ b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+EXCEPTION_SUCCESS_TEXT=ok
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS collapsing_merge_tree;"
+
+# CollapsingSortedAlgorithm::merge() also has a check for sign column value
+# optimize_on_insert = 0 is required to avoid this automatic merge behavior
+$CLICKHOUSE_CLIENT --query="SET optimize_on_insert = 0;"
+
+$CLICKHOUSE_CLIENT --query="CREATE TABLE collapsing_merge_tree
+(
+    Key UInt32, 
+    Count UInt16, 
+    Sign Int8
+)
+ENGINE=CollapsingMergeTree(Sign) ORDER BY Key
+SETTINGS add_implicit_sign_column_constraint_for_collapsing_engine=1;"
+
+# Should succeed
+$CLICKHOUSE_CLIENT --query="INSERT INTO collapsing_merge_tree VALUES (1, 2504, 1);"
+$CLICKHOUSE_CLIENT --query="SELECT * FROM collapsing_merge_tree;"
+
+# Should throw an exception
+$CLICKHOUSE_CLIENT --query="INSERT INTO collapsing_merge_tree VALUES (1, 2504, 5);" 2>&1 \
+    | grep -q VIOLATED_CONSTRAINT && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not throw an exception"
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE collapsing_merge_tree;"

From 1f044b11e36d17097fab066044edf7ebde746ffc Mon Sep 17 00:00:00 2001
From: Zhiguo Zhou <zhiguo.zhou@intel.com>
Date: Mon, 20 Nov 2023 13:56:03 +0800
Subject: [PATCH 0782/1097] Enhance unittest MergeTree.CombineFilters

---
 .../MergeTree/tests/gtest_combine_filters.cpp | 84 ++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
index 64cccd4cbad..91e9e53c230 100644
--- a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
+++ b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
@@ -7,7 +7,7 @@
 using namespace DB;
 
 /* The combineFilters function from MergeTreeRangeReader.cpp could be optimized with Intel's AVX512VBMI2 intrinsic,
- * _mm512_mask_expandloadu_epi8. And this test is added to ensure that the vectorized code outputs the exact results
+ * _mm512_mask_expandloadu_epi8. And these tests are added to ensure that the vectorized code outputs the exact results
  * as the original scalar code when the required hardware feature is supported on the device.
  *
  * To avoid the contingency of the all-one/all-zero sequences, this test fills in the filters with alternating 1s and
@@ -54,8 +54,81 @@ bool testCombineFilters(size_t size)
     return true;
 }
 
+/* This test is to further test DB::combineFilters by combining two UInt8 columns. Given the implementation of
+ * DB::combineFilters, the non-zero values in the first column are contiguously replaced with the elements in the
+ * second column. And to validate the first column with arbitrary intervals, this test constructs its values in
+ * the following manner: the count of 0s between two consecutive 1s increases in step of 1. An example column
+ * with the size of 16 looks like:
+ * [1 1 0 1 0 0 1 0 0 0 1 0 0 0 0 1]
+ *
+ * The second column contains the consecutively incremented UInt8 integers between 0x00 and 0xFF, and when the overflow
+ * occurs, the value would reset to 0x00 and increment again.
+ */
+bool testCombineColumns(size_t size)
+{
+    auto generateFirstColumn = [] (size_t len, size_t & non_zero_count)->ColumnPtr
+    {
+        auto column = ColumnUInt8::create(len, 0);
+        auto & column_data = column->getData();
+
+        non_zero_count = 0;
+        for (size_t i = 0; i < len; non_zero_count++, i+=non_zero_count)
+        {
+            column_data[i] = 1;
+        }
+
+        return column;
+    };
+
+    auto generateSecondColumn = [] (size_t len)->ColumnPtr
+    {
+        auto column = ColumnUInt8::create(len, 0);
+        auto & column_data = column->getData();
+
+        for (size_t i = 0; i < len; i++)
+        {
+            column_data[i] = static_cast<UInt8>(i);
+        }
+
+        return column;
+    };
+
+    size_t non_zero_count = 0;
+    auto first_column = generateFirstColumn(size, non_zero_count);
+    const auto & first_column_data = typeid_cast<const ColumnUInt8 *>(first_column.get())->getData();
+
+    /// The count of non-zero values in the first column should be the size of the second column.
+    auto second_column = generateSecondColumn(non_zero_count);
+
+    auto result = combineFilters(first_column, second_column);
+    const auto & result_data = typeid_cast<const ColumnUInt8 *>(result.get())->getData();
+
+    if (result->size() != size) return false;
+
+    UInt8 expected = 0;
+    for (size_t i = 0; i < size; ++i)
+    {
+        if (first_column_data[i])
+        {
+            if (result_data[i] != expected)
+            {
+                return false;
+            }
+            /// Integer overflow is speculated during the integer increments. It is the expected behavior.
+            expected++;
+        }
+        else
+        {
+            if (result_data[i] != 0) return false;
+        }
+    }
+
+    return true;
+}
+
 TEST(MergeTree, CombineFilters)
 {
+    /// Tests with only 0/1 and fixed intervals.
     EXPECT_TRUE(testCombineFilters(1));
     EXPECT_TRUE(testCombineFilters(2));
     EXPECT_TRUE(testCombineFilters(63));
@@ -63,4 +136,13 @@ TEST(MergeTree, CombineFilters)
     EXPECT_TRUE(testCombineFilters(65));
     EXPECT_TRUE(testCombineFilters(200));
     EXPECT_TRUE(testCombineFilters(201));
+    /// Extended tests: combination of two UInt8 columns.
+    EXPECT_TRUE(testCombineColumns(1));
+    EXPECT_TRUE(testCombineColumns(2));
+    EXPECT_TRUE(testCombineColumns(63));
+    EXPECT_TRUE(testCombineColumns(64));
+    EXPECT_TRUE(testCombineColumns(200));
+    EXPECT_TRUE(testCombineColumns(201));
+    EXPECT_TRUE(testCombineColumns(2000));
+    EXPECT_TRUE(testCombineColumns(200000));
 }

From f226fa685bc55f7b81f4838a45a6037a4202dc51 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Mon, 20 Nov 2023 10:41:07 +0000
Subject: [PATCH 0783/1097] merge_row_policy: alias test separated out and
 disabled if analyzer

---
 tests/analyzer_tech_debt.txt                  |  1 +
 .../02763_row_policy_storage_merge.reference  | 50 -------------------
 .../02763_row_policy_storage_merge.sql.j2     | 34 -------------
 ...3_row_policy_storage_merge_alias.reference | 49 ++++++++++++++++++
 ...2763_row_policy_storage_merge_alias.sql.j2 | 41 +++++++++++++++
 5 files changed, 91 insertions(+), 84 deletions(-)
 create mode 100644 tests/queries/0_stateless/02763_row_policy_storage_merge_alias.reference
 create mode 100644 tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index e155ee72ebb..0984d238782 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -42,6 +42,7 @@
 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
 02404_memory_bound_merging
 02725_agg_projection_resprect_PK
+02763_row_policy_storage_merge_alias
 02765_parallel_replicas_final_modifier
 02784_parallel_replicas_automatic_decision_join
 02818_parameterized_view_with_cte_multiple_usage
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
index 0b7664deb7d..9fa5612e7cd 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference
@@ -166,31 +166,6 @@ SELECT x, y from merge(currentDatabase(), 02763_merge
 4	14
 4	14
 4	14
-02763_merge_aliases
-x, y, z FROM 02763_a_merge
-3	13	16
-4	14	18
-* FROM 02763_a_merge
-3	13	16
-4	14	18
-x, y FROM 02763_a_merge
-3	13
-4	14
-SELECT x, y FROM merge(currentDatabase(), 02763_alias)
-3	13
-4	14
-SELECT x, y FROM merge(currentDatabase(), 02763_alias)
-2	12
-3	13
-4	14
-SELECT x FROM merge(currentDatabase(), 02763_alias)
-12
-13
-14
-SELECT y FROM merge(currentDatabase(), 02763_alias)
-2
-3
-4
 SETTINGS optimize_move_to_prewhere= 1
 SELECT * FROM 02763_merge_log_1
 3	13
@@ -337,28 +312,3 @@ SELECT x, y from merge(currentDatabase(), 02763_merge
 4	14
 4	14
 4	14
-02763_merge_aliases
-x, y, z FROM 02763_a_merge
-3	13	16
-4	14	18
-* FROM 02763_a_merge
-3	13	16
-4	14	18
-x, y FROM 02763_a_merge
-3	13
-4	14
-SELECT x, y FROM merge(currentDatabase(), 02763_alias)
-3	13
-4	14
-SELECT x, y FROM merge(currentDatabase(), 02763_alias)
-2	12
-3	13
-4	14
-SELECT x FROM merge(currentDatabase(), 02763_alias)
-12
-13
-14
-SELECT y FROM merge(currentDatabase(), 02763_alias)
-2
-3
-4
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
index a22a9c5b641..0263e1a974f 100644
--- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2
@@ -3,7 +3,6 @@ DROP TABLE IF EXISTS 02763_merge_log_2;
 DROP TABLE IF EXISTS 02763_merge_merge_1;
 DROP TABLE IF EXISTS 02763_merge_merge_2;
 DROP TABLE IF EXISTS 02763_merge_fancycols;
-DROP TABLE IF EXISTS 02763_merge_aliases;
 DROP ROW POLICY IF EXISTS 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY IF EXISTS 02763_filter_2 ON 02763_merge_merge_1;
 DROP ROW POLICY IF EXISTS 02763_filter_3 ON 02763_merge_log_1;
@@ -132,36 +131,6 @@ SELECT x, y from merge(currentDatabase(), '02763_merge') ORDER BY x SETTINGS opt
 
 DROP TABLE 02763_merge_fancycols;
 
-SELECT '02763_merge_aliases';
-CREATE TABLE 02763_alias (x UInt8, y UInt64, z UInt64 ALIAS plus(x,y)) ENGINE = MergeTree ORDER BY x;
-INSERT INTO 02763_alias VALUES (1, 11), (2, 12), (3, 13), (4, 14);
-
-CREATE ROW POLICY 02763_filter_7 ON 02763_alias USING z>15 AS permissive TO ALL;
-
-CREATE TABLE 02763_a_merge (x UInt8, y UInt64, z UInt64) ENGINE = Merge(currentDatabase(), '02763_alias');
-
-
-SELECT 'x, y, z FROM 02763_a_merge';
-SELECT x, y, z FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
-SELECT '* FROM 02763_a_merge';
-SELECT * FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
-SELECT 'x, y FROM 02763_a_merge';
-SELECT x, y FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
-SELECT 'SELECT x, y FROM merge(currentDatabase(), 02763_alias)';
-SELECT x, y FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
-
-CREATE ROW POLICY 02763_filter_8 ON 02763_alias USING y>11 AS permissive TO ALL;
-
-SELECT 'SELECT x, y FROM merge(currentDatabase(), 02763_alias)';
-SELECT x, y FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
-SELECT 'SELECT x FROM merge(currentDatabase(), 02763_alias)';
-SELECT y FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
-SELECT 'SELECT y FROM merge(currentDatabase(), 02763_alias)';
-SELECT x FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
-
-DROP TABLE 02763_alias;
-DROP TABLE 02763_a_merge;
-
 DROP ROW POLICY 02763_filter_1 ON 02763_merge_log_1;
 DROP ROW POLICY 02763_filter_2 ON 02763_merge_merge_1;
 
@@ -171,7 +140,4 @@ DROP ROW POLICY 02763_filter_4 ON 02763_merge_merge_1;
 DROP ROW POLICY 02763_filter_5 ON 02763_merge_fancycols;
 DROP ROW POLICY 02763_filter_6 ON 02763_merge_fancycols;
 
-DROP ROW POLICY 02763_filter_7 ON 02763_alias;
-DROP ROW POLICY 02763_filter_8 ON 02763_alias;
-
 {% endfor %}
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.reference
new file mode 100644
index 00000000000..56bfdbe0b18
--- /dev/null
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.reference
@@ -0,0 +1,49 @@
+02763_merge_aliases
+x, y, z FROM 02763_a_merge
+3	13	16
+4	14	18
+* FROM 02763_a_merge
+3	13	16
+4	14	18
+x, y FROM 02763_a_merge
+3	13
+4	14
+SELECT x, y FROM merge(currentDatabase(), 02763_alias)
+3	13
+4	14
+SELECT x, y FROM merge(currentDatabase(), 02763_alias)
+2	12
+3	13
+4	14
+SELECT x FROM merge(currentDatabase(), 02763_alias)
+12
+13
+14
+SELECT y FROM merge(currentDatabase(), 02763_alias)
+2
+3
+4
+x, y, z FROM 02763_a_merge
+3	13	16
+4	14	18
+* FROM 02763_a_merge
+3	13	16
+4	14	18
+x, y FROM 02763_a_merge
+3	13
+4	14
+SELECT x, y FROM merge(currentDatabase(), 02763_alias)
+3	13
+4	14
+SELECT x, y FROM merge(currentDatabase(), 02763_alias)
+2	12
+3	13
+4	14
+SELECT x FROM merge(currentDatabase(), 02763_alias)
+12
+13
+14
+SELECT y FROM merge(currentDatabase(), 02763_alias)
+2
+3
+4
diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2
new file mode 100644
index 00000000000..bdd456951dd
--- /dev/null
+++ b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2
@@ -0,0 +1,41 @@
+DROP TABLE IF EXISTS 02763_alias;
+DROP TABLE IF EXISTS 02763_a_merge;
+
+
+SELECT '02763_merge_aliases';
+CREATE TABLE 02763_alias (x UInt8, y UInt64, z UInt64 ALIAS plus(x,y)) ENGINE = MergeTree ORDER BY x;
+INSERT INTO 02763_alias VALUES (1, 11), (2, 12), (3, 13), (4, 14);
+
+CREATE ROW POLICY 02763_filter_7 ON 02763_alias USING z>15 AS permissive TO ALL;
+
+CREATE TABLE 02763_a_merge (x UInt8, y UInt64, z UInt64) ENGINE = Merge(currentDatabase(), '02763_alias');
+
+{% for prew in [0 , 1] -%}
+
+
+
+SELECT 'x, y, z FROM 02763_a_merge';
+SELECT x, y, z FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT '* FROM 02763_a_merge';
+SELECT * FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'x, y FROM 02763_a_merge';
+SELECT x, y FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'SELECT x, y FROM merge(currentDatabase(), 02763_alias)';
+SELECT x, y FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+CREATE ROW POLICY 02763_filter_8 ON 02763_alias USING y>11 AS permissive TO ALL;
+
+SELECT 'SELECT x, y FROM merge(currentDatabase(), 02763_alias)';
+SELECT x, y FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'SELECT x FROM merge(currentDatabase(), 02763_alias)';
+SELECT y FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+SELECT 'SELECT y FROM merge(currentDatabase(), 02763_alias)';
+SELECT x FROM merge(currentDatabase(), '02763_alias') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}};
+
+DROP ROW POLICY 02763_filter_8 ON 02763_alias;
+{% endfor %}
+
+DROP TABLE 02763_alias;
+DROP TABLE 02763_a_merge;
+
+DROP ROW POLICY 02763_filter_7 ON 02763_alias;

From f9a8df4296b9c4cc657cb14d6cd2cfaa84d1a6ba Mon Sep 17 00:00:00 2001
From: Aleksandr Musorin <aleksandr.musorin@semrush.com>
Date: Thu, 16 Nov 2023 13:57:58 +0100
Subject: [PATCH 0784/1097] Added comment to prevent using --remote to update
 submodules

---
 contrib/update-submodules.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/contrib/update-submodules.sh b/contrib/update-submodules.sh
index b612d25352b..6d187f124a4 100755
--- a/contrib/update-submodules.sh
+++ b/contrib/update-submodules.sh
@@ -9,4 +9,8 @@ cd $GIT_DIR
 contrib/sparse-checkout/setup-sparse-checkout.sh
 git submodule init
 git submodule sync
-git config --file .gitmodules --get-regexp .*path | sed 's/[^ ]* //' | xargs -I _ --max-procs 64 git submodule update --depth=1 --single-branch _
+# NOTE: do not use --remote for `git submodule update`[1] command, since the submodule references to the specific commit SHA1 in the subproject.
+#       It may cause unexpected behavior. Instead you need to commit a new SHA1 for a submodule.
+#
+#       [1] - https://git-scm.com/book/en/v2/Git-Tools-Submodules
+git config --file .gitmodules --get-regexp '.*path' | sed 's/[^ ]* //' | xargs -I _ --max-procs 64 git submodule update --depth=1 --single-branch _

From ebb66c1a9e33ccea792ee4ff64519767ea82cd67 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 20 Nov 2023 12:13:24 +0100
Subject: [PATCH 0785/1097] add comments

---
 src/IO/Lz4DeflatingWriteBuffer.cpp | 3 +++
 src/IO/WriteBuffer.h               | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp
index 0af205a426d..1f937ac545d 100644
--- a/src/IO/Lz4DeflatingWriteBuffer.cpp
+++ b/src/IO/Lz4DeflatingWriteBuffer.cpp
@@ -6,6 +6,9 @@ namespace
 {
     using namespace DB;
 
+    /// SinkToOut provides the safe way to do direct write into buffer's memory
+    /// When out->capacity() is not less that guaranteed_capacity, SinkToOut is pointing directly to out_'s memory.
+    /// Otherwise the writes are directed to the temporary memory. That data is copied to out_ at finalize call.
     class SinkToOut
     {
     public:
diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h
index d29ca6d5c6c..67dbb9b2e7a 100644
--- a/src/IO/WriteBuffer.h
+++ b/src/IO/WriteBuffer.h
@@ -34,7 +34,12 @@ public:
     void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); }
 
     /** write the data in the buffer (from the beginning of the buffer to the current position);
-      * set the position to the beginning; throw an exception, if something is wrong
+      * set the position to the beginning; throw an exception, if something is wrong.
+      *
+      * Next call doesn't guarantee that buffer capacity is regained after.
+      * Some buffers (i.g WriteBufferFromS3) flush its data only after certain amount of consumed data.
+      * If direct write is performed into [position(), buffer().end()) and its length is not enough,
+      * you need to fill it first (i.g with write call), after it the capacity is regained.
       */
     inline void next()
     {

From 088022df5ddad0347d29d31062a861af0c4b9d88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 20 Nov 2023 12:20:26 +0100
Subject: [PATCH 0786/1097] Add test

---
 .../0_stateless/02918_sqlite_path_check.reference   |  2 ++
 .../queries/0_stateless/02918_sqlite_path_check.sh  | 13 +++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 tests/queries/0_stateless/02918_sqlite_path_check.reference
 create mode 100755 tests/queries/0_stateless/02918_sqlite_path_check.sh

diff --git a/tests/queries/0_stateless/02918_sqlite_path_check.reference b/tests/queries/0_stateless/02918_sqlite_path_check.reference
new file mode 100644
index 00000000000..56b832a6469
--- /dev/null
+++ b/tests/queries/0_stateless/02918_sqlite_path_check.reference
@@ -0,0 +1,2 @@
+SQLite database file path '/etc/passwd' must be inside 'user_files' directory. (PATH_ACCESS_DENIED)
+SQLite database file path '../../../../etc/passwd' must be inside 'user_files' directory. (PATH_ACCESS_DENIED)
diff --git a/tests/queries/0_stateless/02918_sqlite_path_check.sh b/tests/queries/0_stateless/02918_sqlite_path_check.sh
new file mode 100755
index 00000000000..1f250387a71
--- /dev/null
+++ b/tests/queries/0_stateless/02918_sqlite_path_check.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+function get_exception_message()
+{
+  $CLICKHOUSE_CLIENT --query "$1" |& head -n1 | sed 's/.*DB::Exception: \(.*\) (version.*/\1/g'
+}
+
+get_exception_message "Select * from sqlite('/etc/passwd', 'something');"
+get_exception_message "Select * from sqlite('../../../../etc/passwd', 'something');

From 44874859bb0a27635a590617baa8fc3d957b8dd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 20 Nov 2023 12:21:12 +0100
Subject: [PATCH 0787/1097] Fix style

---
 src/Databases/SQLite/SQLiteUtils.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Databases/SQLite/SQLiteUtils.cpp b/src/Databases/SQLite/SQLiteUtils.cpp
index ddc2fb911e9..19b8662707b 100644
--- a/src/Databases/SQLite/SQLiteUtils.cpp
+++ b/src/Databases/SQLite/SQLiteUtils.cpp
@@ -26,7 +26,6 @@ void processSQLiteError(const String & message, bool throw_on_error)
 
 String validateSQLiteDatabasePath(const String & path, const String & user_files_path, bool need_check, bool throw_on_error)
 {
-    
     String absolute_path = fs::absolute(path).lexically_normal();
 
     if (fs::path(path).is_relative())

From 10fb40ece964947fc7e8d32641d8caaa66cd2a81 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 20 Nov 2023 12:34:16 +0100
Subject: [PATCH 0788/1097] Fix

---
 src/Storages/S3Queue/S3QueueMetadataFactory.cpp | 1 +
 src/Storages/S3Queue/StorageS3Queue.cpp         | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp b/src/Storages/S3Queue/S3QueueMetadataFactory.cpp
index bd01bd52425..92cdab6355d 100644
--- a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp
+++ b/src/Storages/S3Queue/S3QueueMetadataFactory.cpp
@@ -43,6 +43,7 @@ void S3QueueMetadataFactory::remove(const std::string & zookeeper_path)
     if (it == metadata_by_path.end())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata with zookeeper path {} does not exist", zookeeper_path);
 
+    chassert(it->second.ref_count > 0);
     if (--it->second.ref_count == 0)
     {
         try
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 72e74d3c2a0..d2cf074e6a1 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -112,7 +112,6 @@ StorageS3Queue::StorageS3Queue(
     , s3queue_settings(std::move(s3queue_settings_))
     , zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *s3queue_settings))
     , after_processing(s3queue_settings->after_processing)
-    , files_metadata(S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings))
     , configuration{configuration_}
     , format_settings(format_settings_)
     , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms)
@@ -157,6 +156,13 @@ StorageS3Queue::StorageS3Queue(
 
 void StorageS3Queue::startup()
 {
+    if (!files_metadata)
+    {
+        /// Get metadata manager from S3QueueMetadataFactory,
+        /// it will increase the ref count for the metadata object.
+        /// The ref count is decreased when StorageS3Queue::drop() method is called.
+        files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings);
+    }
     if (task)
         task->activateAndSchedule();
 }

From 1ade4b797b1e18d82b52a113c74dad9b4e203487 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 20 Nov 2023 12:48:49 +0100
Subject: [PATCH 0789/1097] Add a comment

---
 src/Storages/S3Queue/S3QueueMetadataFactory.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.h b/src/Storages/S3Queue/S3QueueMetadataFactory.h
index 55c2dfad5dd..c5e94d59050 100644
--- a/src/Storages/S3Queue/S3QueueMetadataFactory.h
+++ b/src/Storages/S3Queue/S3QueueMetadataFactory.h
@@ -25,6 +25,7 @@ private:
         explicit Metadata(std::shared_ptr<S3QueueFilesMetadata> metadata_) : metadata(metadata_), ref_count(1) {}
 
         std::shared_ptr<S3QueueFilesMetadata> metadata;
+        /// TODO: the ref count should be kept in keeper, because of the case with distributed processing.
         size_t ref_count = 0;
     };
     using MetadataByPath = std::unordered_map<std::string, Metadata>;

From c3a3cf8d24d5ca1205c2fe9cb2d8d266ea276b00 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 20 Nov 2023 12:57:10 +0100
Subject: [PATCH 0790/1097] Make check for the limited cmake dependencies the
 part of sparse checkout

---
 contrib/update-submodules.sh    | 8 ++++++++
 docker/packager/binary/build.sh | 9 ---------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/contrib/update-submodules.sh b/contrib/update-submodules.sh
index 6d187f124a4..b12f3f924dc 100755
--- a/contrib/update-submodules.sh
+++ b/contrib/update-submodules.sh
@@ -14,3 +14,11 @@ git submodule sync
 #
 #       [1] - https://git-scm.com/book/en/v2/Git-Tools-Submodules
 git config --file .gitmodules --get-regexp '.*path' | sed 's/[^ ]* //' | xargs -I _ --max-procs 64 git submodule update --depth=1 --single-branch _
+
+# We don't want to depend on any third-party CMake files.
+# To check it, find and delete them.
+grep -o -P '"contrib/[^"]+"' .gitmodules |
+  grep -v -P 'contrib/(llvm-project|google-protobuf|grpc|abseil-cpp|corrosion)' |
+  xargs -I@ find @ \
+    -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' \
+    -delete
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index f943011df9d..fd9bfcaabb2 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -34,15 +34,6 @@ cd /build/build_docker
 rm -f CMakeCache.txt
 
 
-# We don't want to depend on any third-party CMake files.
-# To check it, find and delete them.
-
-grep -o -P '"contrib/[^"]+"' ../.gitmodules |
-  grep -v -P 'llvm-project|google-protobuf|grpc|abseil-cpp|corrosion' |
-  xargs -I@ find ../@ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' |
-  xargs rm
-
-
 if [ -n "$MAKE_DEB" ]; then
   rm -rf /build/packages/root
   # NOTE: this is for backward compatibility with previous releases,

From 28bb76d56861750a12a223a1da3401bc159fde02 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 20 Nov 2023 13:21:46 +0100
Subject: [PATCH 0791/1097] address review comments

---
 .../Passes/LogicalExpressionOptimizerPass.cpp         | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index e667b603020..dfaccbc5cdb 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -59,9 +59,11 @@ private:
     static bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context)
     {
         auto & function_node = node->as<FunctionNode &>();
-        assert(function_node.getFunctionName() == "or");
+        chassert(function_node.getFunctionName() == "or");
+
 
         QueryTreeNodes or_operands;
+        or_operands.reserve(function_node.getArguments()->getNodes().size());  
 
         /// Indices of `equals` or `isNotDistinctFrom` functions in the vector above
         std::vector<size_t> equals_functions_indices;
@@ -88,9 +90,10 @@ private:
 
             const auto & func_name = argument_function->getFunctionName();
             if (func_name == "equals" || func_name == "isNotDistinctFrom")
+            {
                 equals_functions_indices.push_back(or_operands.size() - 1);
-
-            if (func_name == "and")
+            }
+            else if (func_name == "and")
             {
                 for (const auto & and_argument : argument_function->getArguments().getNodes())
                 {
@@ -169,7 +172,7 @@ private:
                 if (function->getFunctionName() == "equals")
                 {
                     /// We should replace `a = b` with `a <=> b` because we removed checks for IS NULL
-                    need_reresolve = need_reresolve || function->getResultType()->isNullable();
+                    need_reresolve |= function->getResultType()->isNullable();
                     function->resolveAsFunction(strict_equals_function_resolver);
                     new_or_operands.emplace_back(std::move(or_operands[i]));
                 }

From bf141f5affb23a78c5a393465b592f5fda896cf6 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 20 Nov 2023 13:25:51 +0100
Subject: [PATCH 0792/1097] Update src/Core/Settings.h

Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index f1379d162ae..fd8637ca6b2 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -791,7 +791,7 @@ class IColumn;
     M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \
     \
     /** Experimental functions */ \
-    M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the materialized PostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
+    M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
     M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
     M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
     M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \

From 779a8971e05cfbc0dc43aecb8e25e98a1f8c94c6 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Mon, 20 Nov 2023 13:33:39 +0100
Subject: [PATCH 0793/1097] Disable settings randomisation for
 `02896_memory_accounting_for_user.sh` (#56709)

---
 tests/queries/0_stateless/02896_memory_accounting_for_user.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02896_memory_accounting_for_user.sh b/tests/queries/0_stateless/02896_memory_accounting_for_user.sh
index 72f4be1475d..f3016671420 100755
--- a/tests/queries/0_stateless/02896_memory_accounting_for_user.sh
+++ b/tests/queries/0_stateless/02896_memory_accounting_for_user.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-parallel, long
+# Tags: no-parallel, long, no-random-settings
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From aa07403f2773d4abc50beb9f2674867f16a8b5ea Mon Sep 17 00:00:00 2001
From: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Date: Mon, 20 Nov 2023 12:37:23 +0000
Subject: [PATCH 0794/1097] Make test 02918 executable

---
 ...2918_implicit_sign_column_constraints_for_collapsing_engine.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh

diff --git a/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh
old mode 100644
new mode 100755

From 06c6282eb74d9b9292548f698f5c73b0362bb339 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 20 Nov 2023 13:46:37 +0100
Subject: [PATCH 0795/1097] fix build

---
 src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index dfaccbc5cdb..081a27eb8fa 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -63,7 +63,7 @@ private:
 
 
         QueryTreeNodes or_operands;
-        or_operands.reserve(function_node.getArguments()->getNodes().size());  
+        or_operands.reserve(function_node.getArguments().getNodes().size());
 
         /// Indices of `equals` or `isNotDistinctFrom` functions in the vector above
         std::vector<size_t> equals_functions_indices;

From fafd169e7b313c5e0a5046afe0183778e828edeb Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Mon, 20 Nov 2023 14:12:52 +0100
Subject: [PATCH 0796/1097] Update src/IO/Lz4DeflatingWriteBuffer.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/IO/Lz4DeflatingWriteBuffer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp
index 1f937ac545d..8241bfd4f3c 100644
--- a/src/IO/Lz4DeflatingWriteBuffer.cpp
+++ b/src/IO/Lz4DeflatingWriteBuffer.cpp
@@ -7,7 +7,7 @@ namespace
     using namespace DB;
 
     /// SinkToOut provides the safe way to do direct write into buffer's memory
-    /// When out->capacity() is not less that guaranteed_capacity, SinkToOut is pointing directly to out_'s memory.
+    /// When out->capacity() is not less than guaranteed_capacity, SinkToOut is pointing directly to out_'s memory.
     /// Otherwise the writes are directed to the temporary memory. That data is copied to out_ at finalize call.
     class SinkToOut
     {

From 5031f239c3ecb92d6da853ae2e14b82eadc0ba43 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Mon, 20 Nov 2023 14:28:59 +0100
Subject: [PATCH 0797/1097] Revert "s3 adaptive timeouts"

---
 base/poco/Net/src/HTTPServerSession.cpp       |   1 +
 base/poco/Net/src/HTTPSession.cpp             |  31 +----
 docs/en/operations/settings/settings.md       |   7 -
 src/Backups/BackupIO_S3.cpp                   |   9 +-
 src/Coordination/KeeperSnapshotManagerS3.cpp  |   1 +
 src/Core/Settings.h                           |   3 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  58 +++++---
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  15 +-
 src/Disks/ObjectStorages/S3/diskSettings.cpp  |   8 +-
 src/IO/ConnectionTimeouts.cpp                 |  82 -----------
 src/IO/ConnectionTimeouts.h                   |   2 -
 src/IO/HTTPCommon.cpp                         |  12 +-
 src/IO/HTTPCommon.h                           |   2 -
 src/IO/ReadBufferFromS3.cpp                   |  24 ++--
 src/IO/ReadBufferFromS3.h                     |   4 +-
 src/IO/S3/Client.cpp                          |  12 +-
 src/IO/S3/Client.h                            |  10 +-
 src/IO/S3/PocoHTTPClient.cpp                  | 111 ++++-----------
 src/IO/S3/PocoHTTPClient.h                    |   5 -
 src/IO/S3/copyS3File.cpp                      |  26 ++--
 src/IO/S3/copyS3File.h                        |   7 +
 src/IO/S3/tests/gtest_aws_s3_client.cpp       |   3 +-
 src/IO/WriteBufferFromS3.cpp                  |   4 +-
 src/IO/WriteBufferFromS3.h                    |   3 +
 src/IO/tests/gtest_writebuffer_s3.cpp         |   1 +
 src/Storages/StorageS3.cpp                    |   3 +
 src/Storages/StorageS3.h                      |   1 +
 src/Storages/StorageS3Settings.h              |   3 +-
 .../configs/inf_s3_retries.xml                |   1 -
 .../configs/s3_retries.xml                    |   1 -
 .../configs/storage_conf.xml                  |  16 +--
 .../test_checking_s3_blobs_paranoid/test.py   | 129 ++++--------------
 .../configs/config.d/storage_conf.xml         |   2 -
 .../test_storage_s3/configs/defaultS3.xml     |   5 +
 .../test_storage_s3/configs/s3_retry.xml      |   4 +-
 .../s3_mocks/unstable_server.py               |  17 +--
 tests/integration/test_storage_s3/test.py     |   9 --
 37 files changed, 202 insertions(+), 430 deletions(-)

diff --git a/base/poco/Net/src/HTTPServerSession.cpp b/base/poco/Net/src/HTTPServerSession.cpp
index d4f2b24879e..f6d3c4e5b92 100644
--- a/base/poco/Net/src/HTTPServerSession.cpp
+++ b/base/poco/Net/src/HTTPServerSession.cpp
@@ -26,6 +26,7 @@ HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParam
 	_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
 {
 	setTimeout(pParams->getTimeout());
+	this->socket().setReceiveTimeout(pParams->getTimeout());
 }
 
 
diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp
index 8f951b3102c..d2663baaf9f 100644
--- a/base/poco/Net/src/HTTPSession.cpp
+++ b/base/poco/Net/src/HTTPSession.cpp
@@ -93,34 +93,9 @@ void HTTPSession::setTimeout(const Poco::Timespan& timeout)
 
 void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco::Timespan& sendTimeout, const Poco::Timespan& receiveTimeout)
 {
-     try
-     {
-         _connectionTimeout = connectionTimeout;
-
-         if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds()) {
-             _sendTimeout = sendTimeout;
-
-             if (connected())
-                 _socket.setSendTimeout(_sendTimeout);
-         }
-
-         if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds()) {
-             _receiveTimeout = receiveTimeout;
-
-             if (connected())
-                 _socket.setReceiveTimeout(_receiveTimeout);
-         }
-     }
-     catch (NetException &)
-     {
-#ifndef NDEBUG
-         throw;
-#else
-         // mute exceptions in release
-         // just in case when changing settings on socket is not allowed
-         // however it should be OK for timeouts
-#endif
-     }
+	 _connectionTimeout = connectionTimeout;
+	 _sendTimeout = sendTimeout;
+	 _receiveTimeout = receiveTimeout;
 }
 
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index edc1c9bdfd7..e61934d2168 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4826,10 +4826,3 @@ When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY`
 When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. 
 
 Default value: `false`.
-
-## s3_use_adaptive_timeouts {#s3_use_adaptive_timeouts}
-
-When set to `true` than for all s3 requests first two attempts are made with low send and receive timeouts.
-When set to `false` than all attempts are made with identical timeouts.
-
-Default value: `true`.
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index ea3f57c27ff..eb9dcf6b45a 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -55,9 +55,7 @@ namespace
             static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
             static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_retry_attempts),
             context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
-            /* for_disk_s3 = */ false,
-            request_settings.get_request_throttler,
-            request_settings.put_request_throttler,
+            /* for_disk_s3 = */ false, request_settings.get_request_throttler, request_settings.put_request_throttler,
             s3_uri.uri.getScheme());
 
         client_configuration.endpointOverride = s3_uri.endpoint;
@@ -169,6 +167,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                                 blob_path.size(), mode);
 
             copyS3File(
+                client,
                 client,
                 s3_uri.bucket,
                 fs::path(s3_uri.key) / path_in_backup,
@@ -230,6 +229,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
         {
             LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName());
             copyS3File(
+                client,
                 client,
                 /* src_bucket */ blob_path[1],
                 /* src_key= */ blob_path[0],
@@ -268,7 +268,7 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
 
 void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
-    copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
+    copyDataToS3File(create_read_buffer, start_pos, length, client, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
                      threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
@@ -298,6 +298,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
 {
     return std::make_unique<WriteBufferFromS3>(
         client,
+        client, // already has long timeout
         s3_uri.bucket,
         fs::path(s3_uri.key) / file_name,
         DBMS_DEFAULT_BUFFER_SIZE,
diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp
index bedde0d7b39..302e05c8418 100644
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@@ -148,6 +148,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
         const auto create_writer = [&](const auto & key)
         {
             return WriteBufferFromS3(
+                s3_client->client,
                 s3_client->client,
                 s3_client->uri.bucket,
                 key,
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index bb5e4322485..ac4c6b6c17f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -94,7 +94,6 @@ class IColumn;
     M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \
     M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \
     M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
-    M(Bool, s3_use_adaptive_timeouts, true, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \
     M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
     M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
     M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \
@@ -105,7 +104,7 @@ class IColumn;
     M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
     M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
     M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
-    M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
+    M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
     M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. Only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \
     M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
     M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 308db389ee1..3af316bf0cf 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -155,7 +155,7 @@ private:
 bool S3ObjectStorage::exists(const StoredObject & object) const
 {
     auto settings_ptr = s3_settings.get();
-    return S3::objectExists(*client.get(), bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+    return S3::objectExists(*clients.get()->client, bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
@@ -174,7 +174,7 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
         (const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
     {
         return std::make_unique<ReadBufferFromS3>(
-            client.get(),
+            clients.get()->client,
             bucket,
             path,
             version_id,
@@ -224,7 +224,7 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObject( /// NOLINT
 {
     auto settings_ptr = s3_settings.get();
     return std::make_unique<ReadBufferFromS3>(
-        client.get(),
+        clients.get()->client,
         bucket,
         object.remote_path,
         version_id,
@@ -249,8 +249,10 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
     if (write_settings.s3_allow_parallel_part_upload)
         scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "VFSWrite");
 
+    auto clients_ = clients.get();
     return std::make_unique<WriteBufferFromS3>(
-        client.get(),
+        clients_->client,
+        clients_->client_with_long_timeout,
         bucket,
         object.remote_path,
         buf_size,
@@ -264,12 +266,15 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
 ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const
 {
     auto settings_ptr = s3_settings.get();
-    return std::make_shared<S3IteratorAsync>(bucket, path_prefix, client.get(), settings_ptr->list_object_keys_size);
+    auto client_ptr = clients.get()->client;
+
+    return std::make_shared<S3IteratorAsync>(bucket, path_prefix, client_ptr, settings_ptr->list_object_keys_size);
 }
 
 void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
 {
     auto settings_ptr = s3_settings.get();
+    auto client_ptr = clients.get()->client;
 
     S3::ListObjectsV2Request request;
     request.SetBucket(bucket);
@@ -284,7 +289,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
     {
         ProfileEvents::increment(ProfileEvents::S3ListObjects);
         ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
-        outcome = client.get()->ListObjectsV2(request);
+        outcome = client_ptr->ListObjectsV2(request);
         throwIfError(outcome);
 
         auto result = outcome.GetResult();
@@ -315,12 +320,14 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
 
 void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exists)
 {
+    auto client_ptr = clients.get()->client;
+
     ProfileEvents::increment(ProfileEvents::S3DeleteObjects);
     ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects);
     S3::DeleteObjectRequest request;
     request.SetBucket(bucket);
     request.SetKey(object.remote_path);
-    auto outcome = client.get()->DeleteObject(request);
+    auto outcome = client_ptr->DeleteObject(request);
 
     throwIfUnexpectedError(outcome, if_exists);
 
@@ -339,6 +346,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
     }
     else
     {
+        auto client_ptr = clients.get()->client;
         auto settings_ptr = s3_settings.get();
 
         size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete;
@@ -367,7 +375,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
             S3::DeleteObjectsRequest request;
             request.SetBucket(bucket);
             request.SetDelete(delkeys);
-            auto outcome = client.get()->DeleteObjects(request);
+            auto outcome = client_ptr->DeleteObjects(request);
 
             throwIfUnexpectedError(outcome, if_exists);
 
@@ -399,7 +407,7 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
 std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const
 {
     auto settings_ptr = s3_settings.get();
-    auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false);
+    auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false);
 
     if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty())
         return {};
@@ -415,7 +423,7 @@ std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::s
 ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const
 {
     auto settings_ptr = s3_settings.get();
-    auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true);
+    auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true);
 
     ObjectMetadata result;
     result.size_bytes = object_info.size;
@@ -436,12 +444,12 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
     /// Shortcut for S3
     if (auto * dest_s3 = dynamic_cast<S3ObjectStorage * >(&object_storage_to); dest_s3 != nullptr)
     {
-        auto client_ = client.get();
+        auto clients_ = clients.get();
         auto settings_ptr = s3_settings.get();
-        auto size = S3::getObjectSize(*client_, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+        auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
         auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-        copyS3File(
-            client.get(),
+        copyS3File(clients_->client,
+            clients_->client_with_long_timeout,
             bucket,
             object_from.remote_path,
             0,
@@ -465,11 +473,12 @@ void S3ObjectStorage::copyObject( // NOLINT
     const WriteSettings &,
     std::optional<ObjectAttributes> object_to_attributes)
 {
-    auto client_ = client.get();
+    auto clients_ = clients.get();
     auto settings_ptr = s3_settings.get();
-    auto size = S3::getObjectSize(*client_, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+    auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
     auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-    copyS3File(client_,
+    copyS3File(clients_->client,
+        clients_->client_with_long_timeout,
         bucket,
         object_from.remote_path,
         0,
@@ -490,25 +499,31 @@ void S3ObjectStorage::setNewSettings(std::unique_ptr<S3ObjectStorageSettings> &&
 
 void S3ObjectStorage::shutdown()
 {
+    auto clients_ptr = clients.get();
     /// This call stops any next retry attempts for ongoing S3 requests.
     /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome.
     /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors.
     /// This should significantly speed up shutdown process if S3 is unhealthy.
-    const_cast<S3::Client &>(*client.get()).DisableRequestProcessing();
+    const_cast<S3::Client &>(*clients_ptr->client).DisableRequestProcessing();
+    const_cast<S3::Client &>(*clients_ptr->client_with_long_timeout).DisableRequestProcessing();
 }
 
 void S3ObjectStorage::startup()
 {
+    auto clients_ptr = clients.get();
+
     /// Need to be enabled if it was disabled during shutdown() call.
-    const_cast<S3::Client &>(*client.get()).EnableRequestProcessing();
+    const_cast<S3::Client &>(*clients_ptr->client).EnableRequestProcessing();
+    const_cast<S3::Client &>(*clients_ptr->client_with_long_timeout).EnableRequestProcessing();
 }
 
 void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
 {
     auto new_s3_settings = getSettings(config, config_prefix, context);
     auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
+    auto new_clients = std::make_unique<Clients>(std::move(new_client), *new_s3_settings);
     s3_settings.set(std::move(new_s3_settings));
-    client.set(std::move(new_client));
+    clients.set(std::move(new_clients));
 }
 
 std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
@@ -523,6 +538,9 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
         endpoint, object_key_prefix);
 }
 
+S3ObjectStorage::Clients::Clients(std::shared_ptr<S3::Client> client_, const S3ObjectStorageSettings & settings)
+    : client(std::move(client_)), client_with_long_timeout(client->clone(std::nullopt, settings.request_settings.long_request_timeout_ms)) {}
+
 ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string &) const
 {
     /// Path to store the new S3 object.
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 7d14482311f..b1b3fb22366 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -39,6 +39,16 @@ struct S3ObjectStorageSettings
 
 class S3ObjectStorage : public IObjectStorage
 {
+public:
+    struct Clients
+    {
+        std::shared_ptr<S3::Client> client;
+        std::shared_ptr<S3::Client> client_with_long_timeout;
+
+        Clients() = default;
+        Clients(std::shared_ptr<S3::Client> client, const S3ObjectStorageSettings & settings);
+    };
+
 private:
     friend class S3PlainObjectStorage;
 
@@ -53,7 +63,7 @@ private:
         String object_key_prefix_)
         : bucket(std::move(bucket_))
         , object_key_prefix(std::move(object_key_prefix_))
-        , client(std::move(client_))
+        , clients(std::make_unique<Clients>(std::move(client_), *s3_settings_))
         , s3_settings(std::move(s3_settings_))
         , s3_capabilities(s3_capabilities_)
         , version_id(std::move(version_id_))
@@ -174,8 +184,7 @@ private:
     std::string bucket;
     String object_key_prefix;
 
-
-    MultiVersion<S3::Client> client;
+    MultiVersion<Clients> clients;
     MultiVersion<S3ObjectStorageSettings> s3_settings;
     S3Capabilities s3_capabilities;
 
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 0232a6eb070..de88c876922 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -60,15 +60,13 @@ std::unique_ptr<S3::Client> getClient(
         uri.uri.getScheme());
 
     client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000);
-    client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000);
+    client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000);
     client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
     client_configuration.endpointOverride = uri.endpoint;
-    client_configuration.http_keep_alive_timeout_ms = config.getUInt(
-        config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
+    client_configuration.http_keep_alive_timeout_ms
+        = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
     client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000);
     client_configuration.wait_on_pool_size_limit = false;
-    client_configuration.s3_use_adaptive_timeouts = config.getBool(
-        config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
 
     /*
      * Override proxy configuration for backwards compatibility with old configuration format.
diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp
index 970afc75ec3..01fbaa4f817 100644
--- a/src/IO/ConnectionTimeouts.cpp
+++ b/src/IO/ConnectionTimeouts.cpp
@@ -133,86 +133,4 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
         settings.http_receive_timeout);
 }
 
-class SendReceiveTimeoutsForFirstAttempt
-{
-private:
-    static constexpr size_t known_methods_count = 6;
-    using KnownMethodsArray = std::array<String, known_methods_count>;
-    static const KnownMethodsArray known_methods;
-
-    /// HTTP_POST is used for CompleteMultipartUpload requests. Its latency could be high.
-    /// These requests need longer timeout, especially when minio is used.
-    /// The same assumption are made for HTTP_DELETE, HTTP_PATCH
-    /// That requests are more heavy that HTTP_GET, HTTP_HEAD, HTTP_PUT
-
-    static constexpr Poco::Timestamp::TimeDiff first_byte_ms[known_methods_count][2] =
-    {
-        /* GET */ {200, 200},
-        /* POST */ {200, 200},
-        /* DELETE */ {200, 200},
-        /* PUT */ {200, 200},
-        /* HEAD */ {200, 200},
-        /* PATCH */ {200, 200},
-    };
-
-    static constexpr Poco::Timestamp::TimeDiff rest_bytes_ms[known_methods_count][2] =
-    {
-        /* GET */ {500, 500},
-        /* POST */ {1000, 30000},
-        /* DELETE */ {1000, 10000},
-        /* PUT */ {1000, 3000},
-        /* HEAD */ {500, 500},
-        /* PATCH */ {1000, 10000},
-    };
-
-    static_assert(sizeof(first_byte_ms) == sizeof(rest_bytes_ms));
-    static_assert(sizeof(first_byte_ms) == known_methods_count * sizeof(Poco::Timestamp::TimeDiff) * 2);
-
-    static size_t getMethodIndex(const String & method)
-    {
-        KnownMethodsArray::const_iterator it = std::find(known_methods.begin(), known_methods.end(), method);
-        chassert(it != known_methods.end());
-        if (it == known_methods.end())
-            return 0;
-        return std::distance(known_methods.begin(), it);
-    }
-
-public:
-    static std::pair<Poco::Timespan, Poco::Timespan> getSendReceiveTimeout(const String & method, bool first_byte)
-    {
-        auto idx = getMethodIndex(method);
-
-        if (first_byte)
-            return std::make_pair(
-                Poco::Timespan(first_byte_ms[idx][0] * 1000),
-                Poco::Timespan(first_byte_ms[idx][1] * 1000)
-            );
-
-        return std::make_pair(
-            Poco::Timespan(rest_bytes_ms[idx][0] * 1000),
-            Poco::Timespan(rest_bytes_ms[idx][1] * 1000)
-        );
-    }
-};
-
-const SendReceiveTimeoutsForFirstAttempt::KnownMethodsArray SendReceiveTimeoutsForFirstAttempt::known_methods =
-{
-        "GET", "POST", "DELETE", "PUT", "HEAD", "PATCH"
-};
-
-
-ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const
-{
-    if (!first_attempt)
-        return *this;
-
-    auto [send, recv] = SendReceiveTimeoutsForFirstAttempt::getSendReceiveTimeout(method, first_byte);
-
-    auto aggressive = *this;
-    aggressive.send_timeout = saturate(send, send_timeout);
-    aggressive.receive_timeout = saturate(recv, receive_timeout);
-
-    return aggressive;
-}
-
 }
diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h
index aabebdb836d..684af42827f 100644
--- a/src/IO/ConnectionTimeouts.h
+++ b/src/IO/ConnectionTimeouts.h
@@ -67,8 +67,6 @@ struct ConnectionTimeouts
     /// Timeouts for the case when we will try many addresses in a loop.
     static ConnectionTimeouts getTCPTimeoutsWithFailover(const Settings & settings);
     static ConnectionTimeouts getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout);
-
-    ConnectionTimeouts getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const;
 };
 
 }
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index cce394c67c9..65ffa51a466 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -50,6 +50,12 @@ namespace ErrorCodes
 
 namespace
 {
+    void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts)
+    {
+        session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout);
+        session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout);
+    }
+
     Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const ProxyConfiguration & proxy_configuration)
     {
         Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config;
@@ -353,12 +359,6 @@ namespace
     };
 }
 
-void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts)
-{
-    session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout);
-    session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout);
-}
-
 void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout)
 {
     if (!response.getKeepAlive())
diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h
index c9968fc6915..de62b5d5c16 100644
--- a/src/IO/HTTPCommon.h
+++ b/src/IO/HTTPCommon.h
@@ -113,6 +113,4 @@ std::istream * receiveResponse(
 
 void assertResponseIsOk(
     const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false);
-
-void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts);
 }
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index c9c9319c44c..f19978ccb47 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -167,9 +167,9 @@ bool ReadBufferFromS3::nextImpl()
     }
 
     size_t sleep_time_with_backoff_milliseconds = 100;
-    for (size_t attempt = 1; !next_result; ++attempt)
+    for (size_t attempt = 0; !next_result; ++attempt)
     {
-        bool last_attempt = attempt >= request_settings.max_single_read_retries;
+        bool last_attempt = attempt + 1 >= request_settings.max_single_read_retries;
 
         ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromS3Microseconds);
 
@@ -177,7 +177,7 @@ bool ReadBufferFromS3::nextImpl()
         {
             if (!impl)
             {
-                impl = initialize(attempt);
+                impl = initialize();
 
                 if (use_external_buffer)
                 {
@@ -232,9 +232,9 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
 {
     size_t initial_n = n;
     size_t sleep_time_with_backoff_milliseconds = 100;
-    for (size_t attempt = 1; n > 0; ++attempt)
+    for (size_t attempt = 0; n > 0; ++attempt)
     {
-        bool last_attempt = attempt >= request_settings.max_single_read_retries;
+        bool last_attempt = attempt + 1 >= request_settings.max_single_read_retries;
         size_t bytes_copied = 0;
 
         ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromS3Microseconds);
@@ -266,7 +266,7 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
 
         try
         {
-            result = sendRequest(attempt, range_begin, range_begin + n - 1);
+            result = sendRequest(range_begin, range_begin + n - 1);
             std::istream & istr = result->GetBody();
 
             copyFromIStreamWithProgressCallback(istr, to, n, progress_callback, &bytes_copied);
@@ -304,8 +304,8 @@ bool ReadBufferFromS3::processException(Poco::Exception & e, size_t read_offset,
     LOG_DEBUG(
         log,
         "Caught exception while reading S3 object. Bucket: {}, Key: {}, Version: {}, Offset: {}, "
-        "Attempt: {}/{}, Message: {}",
-        bucket, key, version_id.empty() ? "Latest" : version_id, read_offset, attempt, request_settings.max_single_read_retries, e.message());
+        "Attempt: {}, Message: {}",
+        bucket, key, version_id.empty() ? "Latest" : version_id, read_offset, attempt, e.message());
 
 
     if (auto * s3_exception = dynamic_cast<S3Exception *>(&e))
@@ -463,7 +463,7 @@ ReadBufferFromS3::~ReadBufferFromS3()
     }
 }
 
-std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize(size_t attempt)
+std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
 {
     resetSessionIfNeeded(readAllRangeSuccessfully(), read_result);
     read_all_range_successfully = false;
@@ -475,13 +475,13 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize(size_t attempt)
     if (read_until_position && offset >= read_until_position)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1);
 
-    read_result = sendRequest(attempt, offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt);
+    read_result = sendRequest(offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt);
 
     size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size;
     return std::make_unique<ReadBufferFromIStream>(read_result->GetBody(), buffer_size);
 }
 
-Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, size_t range_begin, std::optional<size_t> range_end_incl) const
+Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin, std::optional<size_t> range_end_incl) const
 {
     S3::GetObjectRequest req;
     req.SetBucket(bucket);
@@ -489,8 +489,6 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, si
     if (!version_id.empty())
         req.SetVersionId(version_id);
 
-    req.SetAdditionalCustomHeaderValue("clickhouse-request", fmt::format("attempt={}", attempt));
-
     if (range_end_incl)
     {
         req.SetRange(fmt::format("bytes={}-{}", range_begin, *range_end_incl));
diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h
index 101e25f8b43..0835e52a5b2 100644
--- a/src/IO/ReadBufferFromS3.h
+++ b/src/IO/ReadBufferFromS3.h
@@ -79,7 +79,7 @@ public:
     bool supportsReadAt() override { return true; }
 
 private:
-    std::unique_ptr<ReadBuffer> initialize(size_t attempt);
+    std::unique_ptr<ReadBuffer> initialize();
 
     /// If true, if we destroy impl now, no work was wasted. Just for metrics.
     bool atEndOfRequestedRangeGuess();
@@ -88,7 +88,7 @@ private:
     /// Returns true if the error looks retriable.
     bool processException(Poco::Exception & e, size_t read_offset, size_t attempt) const;
 
-    Aws::S3::Model::GetObjectResult sendRequest(size_t attempt, size_t range_begin, std::optional<size_t> range_end_incl) const;
+    Aws::S3::Model::GetObjectResult sendRequest(size_t range_begin, std::optional<size_t> range_end_incl) const;
 
     bool readAllRangeSuccessfully() const;
 
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 4630e68fbb6..ceb7d275299 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -118,9 +118,16 @@ std::unique_ptr<Client> Client::create(
         new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, use_virtual_addressing));
 }
 
-std::unique_ptr<Client> Client::clone() const
+std::unique_ptr<Client> Client::clone(
+    std::optional<std::shared_ptr<RetryStrategy>> override_retry_strategy,
+    std::optional<Int64> override_request_timeout_ms) const
 {
-    return std::unique_ptr<Client>(new Client(*this, client_configuration));
+    PocoHTTPClientConfiguration new_configuration = client_configuration;
+    if (override_retry_strategy.has_value())
+        new_configuration.retryStrategy = *override_retry_strategy;
+    if (override_request_timeout_ms.has_value())
+        new_configuration.requestTimeoutMs = *override_request_timeout_ms;
+    return std::unique_ptr<Client>(new Client(*this, new_configuration));
 }
 
 namespace
@@ -898,7 +905,6 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT
         s3_retry_attempts,
         enable_s3_requests_logging,
         for_disk_s3,
-        context->getGlobalContext()->getSettingsRef().s3_use_adaptive_timeouts,
         get_request_throttler,
         put_request_throttler,
         error_report);
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index 5ad57a9d827..48310bc21af 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -118,7 +118,15 @@ public:
             Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
             bool use_virtual_addressing);
 
-    std::unique_ptr<Client> clone() const;
+    /// Create a client with adjusted settings:
+    ///  * override_retry_strategy can be used to disable retries to avoid nested retries when we have
+    ///    a retry loop outside of S3 client. Specifically, for read and write buffers. Currently not
+    ///    actually used.
+    ///  * override_request_timeout_ms is used to increase timeout for CompleteMultipartUploadRequest
+    ///    because it often sits idle for 10 seconds: https://github.com/ClickHouse/ClickHouse/pull/42321
+    std::unique_ptr<Client> clone(
+        std::optional<std::shared_ptr<RetryStrategy>> override_retry_strategy = std::nullopt,
+        std::optional<Int64> override_request_timeout_ms = std::nullopt) const;
 
     Client & operator=(const Client &) = delete;
 
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 4a1b6def133..d0f248f48a6 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -99,7 +99,6 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
         unsigned int s3_retry_attempts_,
         bool enable_s3_requests_logging_,
         bool for_disk_s3_,
-        bool s3_use_adaptive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
         std::function<void(const DB::ProxyConfiguration &)> error_report_)
@@ -112,7 +111,6 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
     , for_disk_s3(for_disk_s3_)
     , get_request_throttler(get_request_throttler_)
     , put_request_throttler(put_request_throttler_)
-    , s3_use_adaptive_timeouts(s3_use_adaptive_timeouts_)
     , error_report(error_report_)
 {
 }
@@ -159,7 +157,6 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config
           Poco::Timespan(client_configuration.http_keep_alive_timeout_ms * 1000))) /// flag indicating whether keep-alive is enabled is set to each session upon creation
     , remote_host_filter(client_configuration.remote_host_filter)
     , s3_max_redirects(client_configuration.s3_max_redirects)
-    , s3_use_adaptive_timeouts(client_configuration.s3_use_adaptive_timeouts)
     , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging)
     , for_disk_s3(client_configuration.for_disk_s3)
     , get_request_throttler(client_configuration.get_request_throttler)
@@ -271,38 +268,6 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT
         ProfileEvents::increment(disk_s3_events_map[static_cast<unsigned int>(type)][static_cast<unsigned int>(kind)], amount);
 }
 
-String extractAttemptFromInfo(const Aws::String & request_info)
-{
-    static auto key = Aws::String("attempt=");
-
-    auto key_begin = request_info.find(key, 0);
-    if (key_begin == Aws::String::npos)
-        return "1";
-
-    auto val_begin = key_begin + key.size();
-    auto val_end = request_info.find(';', val_begin);
-    if (val_end == Aws::String::npos)
-        val_end = request_info.size();
-
-    return request_info.substr(val_begin, val_end-val_begin);
-}
-
-String getOrEmpty(const Aws::Http::HeaderValueCollection & map, const String & key)
-{
-    auto it = map.find(key);
-    if (it == map.end())
-        return {};
-    return it->second;
-}
-
-ConnectionTimeouts PocoHTTPClient::getTimeouts(const String & method, bool first_attempt, bool first_byte) const
-{
-    if (!s3_use_adaptive_timeouts)
-        return timeouts;
-
-    return timeouts.getAdaptiveTimeouts(method, first_attempt, first_byte);
-}
-
 void PocoHTTPClient::makeRequestInternal(
     Aws::Http::HttpRequest & request,
     std::shared_ptr<PocoHTTPResponse> & response,
@@ -317,25 +282,6 @@ void PocoHTTPClient::makeRequestInternal(
         makeRequestInternalImpl<false>(request, request_configuration, response, readLimiter, writeLimiter);
 }
 
-String getMethod(const Aws::Http::HttpRequest & request)
-{
-    switch (request.GetMethod())
-    {
-        case Aws::Http::HttpMethod::HTTP_GET:
-            return Poco::Net::HTTPRequest::HTTP_GET;
-        case Aws::Http::HttpMethod::HTTP_POST:
-            return Poco::Net::HTTPRequest::HTTP_POST;
-        case Aws::Http::HttpMethod::HTTP_DELETE:
-            return Poco::Net::HTTPRequest::HTTP_DELETE;
-        case Aws::Http::HttpMethod::HTTP_PUT:
-            return Poco::Net::HTTPRequest::HTTP_PUT;
-        case Aws::Http::HttpMethod::HTTP_HEAD:
-            return Poco::Net::HTTPRequest::HTTP_HEAD;
-        case Aws::Http::HttpMethod::HTTP_PATCH:
-            return Poco::Net::HTTPRequest::HTTP_PATCH;
-    }
-}
-
 template <bool pooled>
 void PocoHTTPClient::makeRequestInternalImpl(
     Aws::Http::HttpRequest & request,
@@ -349,14 +295,9 @@ void PocoHTTPClient::makeRequestInternalImpl(
     Poco::Logger * log = &Poco::Logger::get("AWSClient");
 
     auto uri = request.GetUri().GetURIString();
-    auto method = getMethod(request);
-
-    auto sdk_attempt = extractAttemptFromInfo(getOrEmpty(request.GetHeaders(), Aws::Http::SDK_REQUEST_HEADER));
-    auto ch_attempt = extractAttemptFromInfo(getOrEmpty(request.GetHeaders(), "clickhouse-request"));
-    bool first_attempt = ch_attempt == "1" && sdk_attempt == "1";
 
     if (enable_s3_requests_logging)
-        LOG_TEST(log, "Make request to: {}, aws sdk attempt: {}, clickhouse attempt: {}", uri, sdk_attempt, ch_attempt);
+        LOG_TEST(log, "Make request to: {}", uri);
 
     switch (request.GetMethod())
     {
@@ -407,29 +348,17 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 /// This can lead to request signature difference on S3 side.
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri,
-                        getTimeouts(method, first_attempt, /*first_byte*/ true),
-                        http_connection_pool_size,
-                        wait_on_pool_size_limit,
-                        proxy_configuration);
+                        target_uri, timeouts, http_connection_pool_size, wait_on_pool_size_limit, proxy_configuration);
                 else
-                    session = makeHTTPSession(
-                            target_uri,
-                            getTimeouts(method, first_attempt, /*first_byte*/ true),
-                            proxy_configuration);
+                    session = makeHTTPSession(target_uri, timeouts, proxy_configuration);
             }
             else
             {
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri,
-                        getTimeouts(method, first_attempt, /*first_byte*/ true),
-                        http_connection_pool_size,
-                        wait_on_pool_size_limit);
+                        target_uri, timeouts, http_connection_pool_size, wait_on_pool_size_limit);
                 else
-                    session = makeHTTPSession(
-                            target_uri,
-                            getTimeouts(method, first_attempt, /*first_byte*/ true));
+                    session = makeHTTPSession(target_uri, timeouts);
             }
 
             /// In case of error this address will be written to logs
@@ -463,7 +392,28 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 path_and_query = "/";
 
             poco_request.setURI(path_and_query);
-            poco_request.setMethod(method);
+
+            switch (request.GetMethod())
+            {
+                case Aws::Http::HttpMethod::HTTP_GET:
+                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_GET);
+                    break;
+                case Aws::Http::HttpMethod::HTTP_POST:
+                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_POST);
+                    break;
+                case Aws::Http::HttpMethod::HTTP_DELETE:
+                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_DELETE);
+                    break;
+                case Aws::Http::HttpMethod::HTTP_PUT:
+                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_PUT);
+                    break;
+                case Aws::Http::HttpMethod::HTTP_HEAD:
+                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_HEAD);
+                    break;
+                case Aws::Http::HttpMethod::HTTP_PATCH:
+                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_PATCH);
+                    break;
+            }
 
             /// Headers coming from SDK are lower-cased.
             for (const auto & [header_name, header_value] : request.GetHeaders())
@@ -488,7 +438,6 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 request.GetContentBody()->clear();
                 request.GetContentBody()->seekg(0);
 
-                setTimeouts(*session, getTimeouts(method, first_attempt, /*first_byte*/ false));
                 auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream);
                 if (enable_s3_requests_logging)
                     LOG_TEST(log, "Written {} bytes to request body", size);
@@ -498,8 +447,6 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 LOG_TEST(log, "Receiving response...");
             auto & response_body_stream = session->receiveResponse(poco_response);
 
-            setTimeouts(*session, getTimeouts(method, first_attempt, /*first_byte*/ false));
-
             watch.stop();
             addMetric(request, S3MetricType::Microseconds, watch.elapsedMicroseconds());
 
@@ -551,7 +498,6 @@ void PocoHTTPClient::makeRequestInternalImpl(
             /// Request is successful but for some special requests we can have actual error message in body
             if (status_code >= SUCCESS_RESPONSE_MIN && status_code <= SUCCESS_RESPONSE_MAX && checkRequestCanReturn2xxAndErrorInBody(request))
             {
-                /// reading the full response
                 std::string response_string((std::istreambuf_iterator<char>(response_body_stream)),
                                std::istreambuf_iterator<char>());
 
@@ -566,6 +512,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
                     addMetric(request, S3MetricType::Errors);
                     if (error_report)
                         error_report(proxy_configuration);
+
                 }
 
                 /// Set response from string
@@ -584,8 +531,6 @@ void PocoHTTPClient::makeRequestInternalImpl(
                     if (status_code >= 500 && error_report)
                         error_report(proxy_configuration);
                 }
-
-                /// expose stream, after that client reads data from that stream without built-in retries
                 response->SetResponseBody(response_body_stream, session);
             }
 
diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 5178d75e7b6..2a449458360 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -55,7 +55,6 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
     size_t http_connection_pool_size = 0;
     /// See PoolBase::BehaviourOnLimit
     bool wait_on_pool_size_limit = true;
-    bool s3_use_adaptive_timeouts = true;
 
     std::function<void(const DB::ProxyConfiguration &)> error_report;
 
@@ -70,7 +69,6 @@ private:
         unsigned int s3_retry_attempts,
         bool enable_s3_requests_logging_,
         bool for_disk_s3_,
-        bool s3_use_adaptive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
         std::function<void(const DB::ProxyConfiguration &)> error_report_
@@ -171,8 +169,6 @@ private:
         Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
         Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
 
-    ConnectionTimeouts getTimeouts(const String & method, bool first_attempt, bool first_byte) const;
-
 protected:
     static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request);
     void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const;
@@ -182,7 +178,6 @@ protected:
     ConnectionTimeouts timeouts;
     const RemoteHostFilter & remote_host_filter;
     unsigned int s3_max_redirects;
-    bool s3_use_adaptive_timeouts = true;
     bool enable_s3_requests_logging;
     bool for_disk_s3;
 
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index 30da1c580c1..a16a1a41505 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -53,6 +53,7 @@ namespace
     public:
         UploadHelper(
             const std::shared_ptr<const S3::Client> & client_ptr_,
+            const std::shared_ptr<const S3::Client> & client_with_long_timeout_ptr_,
             const String & dest_bucket_,
             const String & dest_key_,
             const S3Settings::RequestSettings & request_settings_,
@@ -61,6 +62,7 @@ namespace
             bool for_disk_s3_,
             const Poco::Logger * log_)
             : client_ptr(client_ptr_)
+            , client_with_long_timeout_ptr(client_with_long_timeout_ptr_)
             , dest_bucket(dest_bucket_)
             , dest_key(dest_key_)
             , request_settings(request_settings_)
@@ -76,6 +78,7 @@ namespace
 
     protected:
         std::shared_ptr<const S3::Client> client_ptr;
+        std::shared_ptr<const S3::Client> client_with_long_timeout_ptr;
         const String & dest_bucket;
         const String & dest_key;
         const S3Settings::RequestSettings & request_settings;
@@ -176,7 +179,7 @@ namespace
                 if (for_disk_s3)
                     ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);
 
-                auto outcome = client_ptr->CompleteMultipartUpload(request);
+                auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(request);
 
                 if (outcome.IsSuccess())
                 {
@@ -430,13 +433,14 @@ namespace
             size_t offset_,
             size_t size_,
             const std::shared_ptr<const S3::Client> & client_ptr_,
+            const std::shared_ptr<const S3::Client> & client_with_long_timeout_ptr_,
             const String & dest_bucket_,
             const String & dest_key_,
             const S3Settings::RequestSettings & request_settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_s3_)
-            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File"))
+            : UploadHelper(client_ptr_, client_with_long_timeout_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File"))
             , create_read_buffer(create_read_buffer_)
             , offset(offset_)
             , size(size_)
@@ -598,6 +602,7 @@ namespace
     public:
         CopyFileHelper(
             const std::shared_ptr<const S3::Client> & client_ptr_,
+            const std::shared_ptr<const S3::Client> & client_with_long_timeout_ptr_,
             const String & src_bucket_,
             const String & src_key_,
             size_t src_offset_,
@@ -609,7 +614,7 @@ namespace
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_s3_)
-            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File"))
+            : UploadHelper(client_ptr_, client_with_long_timeout_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File"))
             , src_bucket(src_bucket_)
             , src_key(src_key_)
             , offset(src_offset_)
@@ -672,7 +677,7 @@ namespace
             /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
             request.SetContentType("binary/octet-stream");
 
-            client_ptr->setKMSHeaders(request);
+            client_with_long_timeout_ptr->setKMSHeaders(request);
         }
 
         void processCopyRequest(const S3::CopyObjectRequest & request)
@@ -684,7 +689,7 @@ namespace
                 if (for_disk_s3)
                     ProfileEvents::increment(ProfileEvents::DiskS3CopyObject);
 
-                auto outcome = client_ptr->CopyObject(request);
+                auto outcome = client_with_long_timeout_ptr->CopyObject(request);
                 if (outcome.IsSuccess())
                 {
                     LOG_TRACE(
@@ -709,6 +714,7 @@ namespace
                             offset,
                             size,
                             client_ptr,
+                            client_with_long_timeout_ptr,
                             dest_bucket,
                             dest_key,
                             request_settings,
@@ -782,7 +788,7 @@ namespace
             if (for_disk_s3)
                 ProfileEvents::increment(ProfileEvents::DiskS3UploadPartCopy);
 
-            auto outcome = client_ptr->UploadPartCopy(req);
+            auto outcome = client_with_long_timeout_ptr->UploadPartCopy(req);
             if (!outcome.IsSuccess())
             {
                 abortMultipartUpload();
@@ -800,6 +806,7 @@ void copyDataToS3File(
     size_t offset,
     size_t size,
     const std::shared_ptr<const S3::Client> & dest_s3_client,
+    const std::shared_ptr<const S3::Client> & dest_s3_client_with_long_timeout,
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
@@ -807,13 +814,14 @@ void copyDataToS3File(
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_s3)
 {
-    CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
+    CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_s3_client_with_long_timeout, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
     helper.performCopy();
 }
 
 
 void copyS3File(
     const std::shared_ptr<const S3::Client> & s3_client,
+    const std::shared_ptr<const S3::Client> & s3_client_with_long_timeout,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
@@ -828,7 +836,7 @@ void copyS3File(
 {
     if (settings.allow_native_copy)
     {
-        CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3};
+        CopyFileHelper helper{s3_client, s3_client_with_long_timeout, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3};
         helper.performCopy();
     }
     else
@@ -837,7 +845,7 @@ void copyS3File(
         {
             return std::make_unique<ReadBufferFromS3>(s3_client, src_bucket, src_key, "", settings, read_settings);
         };
-        copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3);
+        copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, s3_client_with_long_timeout, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3);
     }
 }
 
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index 33e22fdfba2..1bcbfd7735e 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -27,9 +27,15 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 /// because it is a known issue, it is fallbacks to read-write copy
 /// (copyDataToS3File()).
 ///
+/// s3_client_with_long_timeout (may be equal to s3_client) is used for native copy and
+/// CompleteMultipartUpload requests. These requests need longer timeout because S3 servers often
+/// block on them for multiple seconds without sending or receiving data from us (maybe the servers
+/// are copying data internally, or maybe throttling, idk).
+///
 /// read_settings - is used for throttling in case of native copy is not possible
 void copyS3File(
     const std::shared_ptr<const S3::Client> & s3_client,
+    const std::shared_ptr<const S3::Client> & s3_client_with_long_timeout,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
@@ -52,6 +58,7 @@ void copyDataToS3File(
     size_t offset,
     size_t size,
     const std::shared_ptr<const S3::Client> & dest_s3_client,
+    const std::shared_ptr<const S3::Client> & dest_s3_client_with_long_timeout,
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp
index bff9ca6fa7b..c42f14e9a53 100644
--- a/src/IO/S3/tests/gtest_aws_s3_client.cpp
+++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp
@@ -91,6 +91,7 @@ void doWriteRequest(std::shared_ptr<const DB::S3::Client> client, const DB::S3::
     DB::S3Settings::RequestSettings request_settings;
     request_settings.max_unexpected_write_error_retries = max_unexpected_write_error_retries;
     DB::WriteBufferFromS3 write_buffer(
+        client,
         client,
         uri.bucket,
         uri.key,
@@ -170,7 +171,6 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeadersRead)
         "authorization: ... SignedHeaders="
         "amz-sdk-invocation-id;"
         "amz-sdk-request;"
-        "clickhouse-request;"
         "content-type;"
         "host;"
         "x-amz-api-version;"
@@ -216,7 +216,6 @@ TEST(IOTestAwsS3Client, AppendExtraSSEKMSHeadersRead)
         "authorization: ... SignedHeaders="
         "amz-sdk-invocation-id;"
         "amz-sdk-request;"
-        "clickhouse-request;"
         "content-type;"
         "host;"
         "x-amz-api-version;"
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index 62d0c80f1f2..e1b9c17efe9 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -77,6 +77,7 @@ struct WriteBufferFromS3::PartData
 
 WriteBufferFromS3::WriteBufferFromS3(
     std::shared_ptr<const S3::Client> client_ptr_,
+    std::shared_ptr<const S3::Client> client_with_long_timeout_ptr_,
     const String & bucket_,
     const String & key_,
     size_t buf_size_,
@@ -91,6 +92,7 @@ WriteBufferFromS3::WriteBufferFromS3(
     , upload_settings(request_settings.getUploadSettings())
     , write_settings(write_settings_)
     , client_ptr(std::move(client_ptr_))
+    , client_with_long_timeout_ptr(std::move(client_with_long_timeout_ptr_))
     , object_metadata(std::move(object_metadata_))
     , buffer_allocation_policy(ChooseBufferPolicy(upload_settings))
     , task_tracker(
@@ -564,7 +566,7 @@ void WriteBufferFromS3::completeMultipartUpload()
             ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);
 
         Stopwatch watch;
-        auto outcome = client_ptr->CompleteMultipartUpload(req);
+        auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(req);
         watch.stop();
 
         ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index 590342cc997..95148c49779 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -30,6 +30,8 @@ class WriteBufferFromS3 final : public WriteBufferFromFileBase
 public:
     WriteBufferFromS3(
         std::shared_ptr<const S3::Client> client_ptr_,
+        /// for CompleteMultipartUploadRequest, because it blocks on recv() for a few seconds on big uploads
+        std::shared_ptr<const S3::Client> client_with_long_timeout_ptr_,
         const String & bucket_,
         const String & key_,
         size_t buf_size_,
@@ -88,6 +90,7 @@ private:
     const S3Settings::RequestSettings::PartUploadSettings & upload_settings;
     const WriteSettings write_settings;
     const std::shared_ptr<const S3::Client> client_ptr;
+    const std::shared_ptr<const S3::Client> client_with_long_timeout_ptr;
     const std::optional<std::map<String, String>> object_metadata;
     Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3");
     LogSeriesLimiterPtr limitedLog = std::make_shared<LogSeriesLimiter>(log, 1, 5);
diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp
index c82f97f8b20..21bdd9a6f26 100644
--- a/src/IO/tests/gtest_writebuffer_s3.cpp
+++ b/src/IO/tests/gtest_writebuffer_s3.cpp
@@ -549,6 +549,7 @@ public:
         getAsyncPolicy().setAutoExecute(false);
 
         return std::make_unique<WriteBufferFromS3>(
+                    client,
                     client,
                     bucket,
                     file_name,
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index bdbba5abd96..80ee1e9339d 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -825,6 +825,7 @@ public:
         write_buf = wrapWriteBufferWithCompressionMethod(
             std::make_unique<WriteBufferFromS3>(
                 configuration_.client,
+                configuration_.client_with_long_timeout,
                 bucket,
                 key,
                 DBMS_DEFAULT_BUFFER_SIZE,
@@ -1329,6 +1330,8 @@ void StorageS3::Configuration::connect(ContextPtr context)
                 context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
             auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
         });
+
+    client_with_long_timeout = client->clone(std::nullopt, request_settings.long_request_timeout_ms);
 }
 
 void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection)
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 3f35c578e19..3330ac6c210 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -311,6 +311,7 @@ public:
         HTTPHeaderEntries headers_from_ast;
 
         std::shared_ptr<const S3::Client> client;
+        std::shared_ptr<const S3::Client> client_with_long_timeout;
         std::vector<String> keys;
     };
 
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index 728972c948c..e3d577ca0b3 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -69,7 +69,8 @@ struct S3Settings
         ThrottlerPtr get_request_throttler;
         ThrottlerPtr put_request_throttler;
         size_t retry_attempts = 10;
-        size_t request_timeout_ms = 30000;
+        size_t request_timeout_ms = 3000;
+        size_t long_request_timeout_ms = 30000; // TODO: Take this from config like request_timeout_ms
         bool allow_native_copy = true;
 
         bool throw_on_zero_files_match = false;
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
index 4210c13b727..206eb4f2bad 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
@@ -4,7 +4,6 @@
     <profiles>
         <default>
             <s3_retry_attempts>1000000</s3_retry_attempts>
-            <s3_use_adaptive_timeouts>1</s3_use_adaptive_timeouts>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
index 95a313ea4f2..556bf60d385 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
@@ -4,7 +4,6 @@
     <profiles>
         <default>
             <s3_retry_attempts>5</s3_retry_attempts>
-            <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
index 7b1f503ed55..b77e72d808b 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
@@ -7,18 +7,11 @@
 
     <storage_configuration>
         <disks>
-            <s3>
-                <type>s3</type>
-                <endpoint>http://minio1:9001/root/data/</endpoint>
-                <access_key_id>minio</access_key_id>
-                <secret_access_key>minio123</secret_access_key>
-            </s3>
             <broken_s3>
                 <type>s3</type>
                 <endpoint>http://resolver:8083/root/data/</endpoint>
                 <access_key_id>minio</access_key_id>
                 <secret_access_key>minio123</secret_access_key>
-                <skip_access_check>1</skip_access_check>
             </broken_s3>
         </disks>
 
@@ -30,16 +23,9 @@
                     </main>
                 </volumes>
             </broken_s3>
-            <s3>
-                <volumes>
-                    <main>
-                        <disk>s3</disk>
-                    </main>
-                </volumes>
-            </s3>
         </policies>
     </storage_configuration>
     <merge_tree>
-        <storage_policy>s3</storage_policy>
+        <storage_policy>broken_s3</storage_policy>
     </merge_tree>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index b000ccabcf4..d6bcb3fb8f4 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -64,8 +64,6 @@ def test_upload_after_check_works(cluster, broken_s3):
             data String
         ) ENGINE=MergeTree()
         ORDER BY id
-        SETTINGS
-            storage_policy='broken_s3'
         """
     )
 
@@ -80,7 +78,7 @@ def test_upload_after_check_works(cluster, broken_s3):
     assert "suddenly disappeared" in error, error
 
 
-def get_multipart_counters(node, query_id, log_type="ExceptionWhileProcessing"):
+def get_counters(node, query_id, log_type="ExceptionWhileProcessing"):
     node.query("SYSTEM FLUSH LOGS")
     return [
         int(x)
@@ -89,25 +87,7 @@ def get_multipart_counters(node, query_id, log_type="ExceptionWhileProcessing"):
                 SELECT
                     ProfileEvents['S3CreateMultipartUpload'],
                     ProfileEvents['S3UploadPart'],
-                    ProfileEvents['S3WriteRequestsErrors'],
-                FROM system.query_log
-                WHERE query_id='{query_id}'
-                    AND type='{log_type}'
-                """
-        ).split()
-        if x
-    ]
-
-
-def get_put_counters(node, query_id, log_type="ExceptionWhileProcessing"):
-    node.query("SYSTEM FLUSH LOGS")
-    return [
-        int(x)
-        for x in node.query(
-            f"""
-                SELECT
-                    ProfileEvents['S3PutObject'],
-                    ProfileEvents['S3WriteRequestsErrors'],
+                    ProfileEvents['S3WriteRequestsErrors']
                 FROM system.query_log
                 WHERE query_id='{query_id}'
                     AND type='{log_type}'
@@ -149,12 +129,12 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression
     assert "Code: 499" in error, error
     assert "mock s3 injected error" in error, error
 
-    create_multipart, upload_parts, s3_errors = get_multipart_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
         node, insert_query_id
     )
-    assert create_multipart == 1
-    assert upload_parts == 0
-    assert s3_errors == 1
+    assert count_create_multi_part_uploads == 1
+    assert count_upload_parts == 0
+    assert count_s3_errors == 1
 
 
 #  Add "lz4" compression method in the list after https://github.com/ClickHouse/ClickHouse/issues/50975 is fixed
@@ -192,12 +172,12 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload(
     assert "Code: 499" in error, error
     assert "mock s3 injected error" in error, error
 
-    create_multipart, upload_parts, s3_errors = get_multipart_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
         node, insert_query_id
     )
-    assert create_multipart == 1
-    assert upload_parts >= 2
-    assert s3_errors >= 2
+    assert count_create_multi_part_uploads == 1
+    assert count_upload_parts >= 2
+    assert count_s3_errors >= 2
 
 
 def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
@@ -227,12 +207,12 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
         query_id=insert_query_id,
     )
 
-    create_multipart, upload_parts, s3_errors = get_multipart_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
-    assert create_multipart == 1
-    assert upload_parts == 39
-    assert s3_errors == 3
+    assert count_create_multi_part_uploads == 1
+    assert count_upload_parts == 39
+    assert count_s3_errors == 3
 
     broken_s3.setup_at_part_upload(count=1000, after=2, action="connection_refused")
     insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED_1"
@@ -299,13 +279,13 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried(
         query_id=insert_query_id,
     )
 
-    create_multipart, upload_parts, s3_errors = get_multipart_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert create_multipart == 1
-    assert upload_parts == 39
-    assert s3_errors == 3
+    assert count_create_multi_part_uploads == 1
+    assert count_upload_parts == 39
+    assert count_s3_errors == 3
 
     broken_s3.setup_at_part_upload(
         count=1000,
@@ -381,13 +361,13 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried(
         query_id=insert_query_id,
     )
 
-    create_multipart, upload_parts, s3_errors = get_multipart_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert create_multipart == 1
-    assert upload_parts == 39
-    assert s3_errors == 3
+    assert count_create_multi_part_uploads == 1
+    assert count_upload_parts == 39
+    assert count_s3_errors == 3
 
     broken_s3.setup_at_create_multi_part_upload(
         count=1000,
@@ -458,13 +438,13 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3):
         query_id=insert_query_id,
     )
 
-    create_multipart, upload_parts, s3_errors = get_multipart_counters(
+    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert create_multipart == 1
-    assert upload_parts == 7
-    assert s3_errors == 3
+    assert count_create_multi_part_uploads == 1
+    assert count_upload_parts == 7
+    assert count_s3_errors == 3
 
     broken_s3.setup_at_part_upload(
         count=1000,
@@ -553,60 +533,3 @@ def test_query_is_canceled_with_inf_retries(cluster, broken_s3):
         retry_count=120,
         sleep_time=1,
     )
-
-
-@pytest.mark.parametrize("node_name", ["node", "node_with_inf_s3_retries"])
-def test_adaptive_timeouts(cluster, broken_s3, node_name):
-    node = cluster.instances[node_name]
-
-    broken_s3.setup_fake_puts(part_length=1)
-    broken_s3.setup_slow_answers(
-        timeout=5,
-        count=1000000,
-    )
-
-    insert_query_id = f"TEST_ADAPTIVE_TIMEOUTS_{node_name}"
-    node.query(
-        f"""
-            INSERT INTO
-                TABLE FUNCTION s3(
-                    'http://resolver:8083/root/data/adaptive_timeouts',
-                    'minio', 'minio123',
-                    'CSV', auto, 'none'
-                )
-            SELECT
-                *
-            FROM system.numbers
-            LIMIT 1
-            SETTINGS
-                s3_request_timeout_ms=30000,
-                s3_check_objects_after_upload=0
-            """,
-        query_id=insert_query_id,
-    )
-
-    broken_s3.reset()
-
-    put_objects, s3_errors = get_put_counters(
-        node, insert_query_id, log_type="QueryFinish"
-    )
-
-    assert put_objects == 1
-
-    s3_use_adaptive_timeouts = node.query(
-        f"""
-        SELECT
-            value
-        FROM system.settings
-        WHERE
-            name='s3_use_adaptive_timeouts'
-        """
-    ).strip()
-
-    if node_name == "node_with_inf_s3_retries":
-        # first 2 attempts failed
-        assert s3_use_adaptive_timeouts == "1"
-        assert s3_errors == 1
-    else:
-        assert s3_use_adaptive_timeouts == "0"
-        assert s3_errors == 0
diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
index 6303e9273fc..235b9a7b7a1 100644
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
@@ -11,7 +11,6 @@
                 <skip_access_check>true</skip_access_check>
                 <!-- Avoid extra retries to speed up tests -->
                 <retry_attempts>0</retry_attempts>
-                <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
                 <connect_timeout_ms>20000</connect_timeout_ms>
             </s3>
             <s3_retryable>
@@ -34,7 +33,6 @@
                 <skip_access_check>true</skip_access_check>
                 <!-- Avoid extra retries to speed up tests -->
                 <retry_attempts>1</retry_attempts>
-                <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
                 <s3_max_single_read_retries>1</s3_max_single_read_retries>
                 <connect_timeout_ms>20000</connect_timeout_ms>
             </s3_no_retries>
diff --git a/tests/integration/test_storage_s3/configs/defaultS3.xml b/tests/integration/test_storage_s3/configs/defaultS3.xml
index 7dac6d9fbb5..37454ef6781 100644
--- a/tests/integration/test_storage_s3/configs/defaultS3.xml
+++ b/tests/integration/test_storage_s3/configs/defaultS3.xml
@@ -1,4 +1,9 @@
 <clickhouse>
+    <profiles>
+        <default>
+            <s3_retry_attempts>5</s3_retry_attempts>
+        </default>
+    </profiles>
     <s3>
         <s3_mock>
             <endpoint>http://resolver:8080</endpoint>
diff --git a/tests/integration/test_storage_s3/configs/s3_retry.xml b/tests/integration/test_storage_s3/configs/s3_retry.xml
index 3171da051d0..727e23273cf 100644
--- a/tests/integration/test_storage_s3/configs/s3_retry.xml
+++ b/tests/integration/test_storage_s3/configs/s3_retry.xml
@@ -1,9 +1,7 @@
 <clickhouse>
     <profiles>
         <default>
-            <enable_s3_requests_logging>1</enable_s3_requests_logging>
-            <s3_retry_attempts>10</s3_retry_attempts>
-            <s3_max_inflight_parts_for_one_file>5</s3_max_inflight_parts_for_one_file>
+            <s3_retry_attempts>5</s3_retry_attempts>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
index 5ef781bdc9e..103dd30340c 100644
--- a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
+++ b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
@@ -4,7 +4,6 @@ import re
 import socket
 import struct
 import sys
-import time
 
 
 def gen_n_digit_number(n):
@@ -40,14 +39,14 @@ random.seed("Unstable server/1.0")
 
 # Generating some "random" data and append a line which contains sum of numbers in column 4.
 lines = (
-    b"".join([gen_line() for _ in range(500000)])
+    b"".join((gen_line() for _ in range(500000)))
     + f"0,0,0,{-sum_in_4_column}\n".encode()
 )
 
 
 class RequestHandler(http.server.BaseHTTPRequestHandler):
     def do_HEAD(self):
-        if self.path == "/root/test.csv" or self.path == "/root/slow_send_test.csv":
+        if self.path == "/root/test.csv":
             self.from_bytes = 0
             self.end_bytes = len(lines)
             self.size = self.end_bytes
@@ -102,18 +101,6 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
                     print("Dropping connection")
                     break
 
-        if self.path == "/root/slow_send_test.csv":
-            self.send_block_size = 81920
-
-            for c, i in enumerate(
-                range(self.from_bytes, self.end_bytes, self.send_block_size)
-            ):
-                self.wfile.write(
-                    lines[i : min(i + self.send_block_size, self.end_bytes)]
-                )
-                self.wfile.flush()
-                time.sleep(1)
-
         elif self.path == "/":
             self.wfile.write(b"OK")
 
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 835c8b908f0..3dd3c9e39d0 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -818,15 +818,6 @@ def test_storage_s3_get_unstable(started_cluster):
     assert result.splitlines() == ["500001,500000,0"]
 
 
-def test_storage_s3_get_slow(started_cluster):
-    bucket = started_cluster.minio_bucket
-    instance = started_cluster.instances["dummy"]
-    table_format = "column1 Int64, column2 Int64, column3 Int64, column4 Int64"
-    get_query = f"SELECT count(), sum(column3), sum(column4) FROM s3('http://resolver:8081/{started_cluster.minio_bucket}/slow_send_test.csv', 'CSV', '{table_format}') FORMAT CSV"
-    result = run_query(instance, get_query)
-    assert result.splitlines() == ["500001,500000,0"]
-
-
 def test_storage_s3_put_uncompressed(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]

From f999337daee891499a78cadec1cc562cf29ebcaa Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Mon, 20 Nov 2023 14:53:22 +0100
Subject: [PATCH 0798/1097] Revert "Revert "s3 adaptive timeouts""

---
 base/poco/Net/src/HTTPServerSession.cpp       |   1 -
 base/poco/Net/src/HTTPSession.cpp             |  31 ++++-
 docs/en/operations/settings/settings.md       |   7 +
 src/Backups/BackupIO_S3.cpp                   |   9 +-
 src/Coordination/KeeperSnapshotManagerS3.cpp  |   1 -
 src/Core/Settings.h                           |   3 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  58 +++-----
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  15 +-
 src/Disks/ObjectStorages/S3/diskSettings.cpp  |   8 +-
 src/IO/ConnectionTimeouts.cpp                 |  82 +++++++++++
 src/IO/ConnectionTimeouts.h                   |   2 +
 src/IO/HTTPCommon.cpp                         |  12 +-
 src/IO/HTTPCommon.h                           |   2 +
 src/IO/ReadBufferFromS3.cpp                   |  24 ++--
 src/IO/ReadBufferFromS3.h                     |   4 +-
 src/IO/S3/Client.cpp                          |  12 +-
 src/IO/S3/Client.h                            |  10 +-
 src/IO/S3/PocoHTTPClient.cpp                  | 111 +++++++++++----
 src/IO/S3/PocoHTTPClient.h                    |   5 +
 src/IO/S3/copyS3File.cpp                      |  26 ++--
 src/IO/S3/copyS3File.h                        |   7 -
 src/IO/S3/tests/gtest_aws_s3_client.cpp       |   3 +-
 src/IO/WriteBufferFromS3.cpp                  |   4 +-
 src/IO/WriteBufferFromS3.h                    |   3 -
 src/IO/tests/gtest_writebuffer_s3.cpp         |   1 -
 src/Storages/StorageS3.cpp                    |   3 -
 src/Storages/StorageS3.h                      |   1 -
 src/Storages/StorageS3Settings.h              |   3 +-
 .../configs/inf_s3_retries.xml                |   1 +
 .../configs/s3_retries.xml                    |   1 +
 .../configs/storage_conf.xml                  |  16 ++-
 .../test_checking_s3_blobs_paranoid/test.py   | 129 ++++++++++++++----
 .../configs/config.d/storage_conf.xml         |   2 +
 .../test_storage_s3/configs/defaultS3.xml     |   5 -
 .../test_storage_s3/configs/s3_retry.xml      |   4 +-
 .../s3_mocks/unstable_server.py               |  17 ++-
 tests/integration/test_storage_s3/test.py     |   9 ++
 37 files changed, 430 insertions(+), 202 deletions(-)

diff --git a/base/poco/Net/src/HTTPServerSession.cpp b/base/poco/Net/src/HTTPServerSession.cpp
index f6d3c4e5b92..d4f2b24879e 100644
--- a/base/poco/Net/src/HTTPServerSession.cpp
+++ b/base/poco/Net/src/HTTPServerSession.cpp
@@ -26,7 +26,6 @@ HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParam
 	_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
 {
 	setTimeout(pParams->getTimeout());
-	this->socket().setReceiveTimeout(pParams->getTimeout());
 }
 
 
diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp
index d2663baaf9f..8f951b3102c 100644
--- a/base/poco/Net/src/HTTPSession.cpp
+++ b/base/poco/Net/src/HTTPSession.cpp
@@ -93,9 +93,34 @@ void HTTPSession::setTimeout(const Poco::Timespan& timeout)
 
 void HTTPSession::setTimeout(const Poco::Timespan& connectionTimeout, const Poco::Timespan& sendTimeout, const Poco::Timespan& receiveTimeout)
 {
-	 _connectionTimeout = connectionTimeout;
-	 _sendTimeout = sendTimeout;
-	 _receiveTimeout = receiveTimeout;
+     try
+     {
+         _connectionTimeout = connectionTimeout;
+
+         if (_sendTimeout.totalMicroseconds() != sendTimeout.totalMicroseconds()) {
+             _sendTimeout = sendTimeout;
+
+             if (connected())
+                 _socket.setSendTimeout(_sendTimeout);
+         }
+
+         if (_receiveTimeout.totalMicroseconds() != receiveTimeout.totalMicroseconds()) {
+             _receiveTimeout = receiveTimeout;
+
+             if (connected())
+                 _socket.setReceiveTimeout(_receiveTimeout);
+         }
+     }
+     catch (NetException &)
+     {
+#ifndef NDEBUG
+         throw;
+#else
+         // mute exceptions in release
+         // just in case when changing settings on socket is not allowed
+         // however it should be OK for timeouts
+#endif
+     }
 }
 
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index e61934d2168..edc1c9bdfd7 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4826,3 +4826,10 @@ When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY`
 When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. 
 
 Default value: `false`.
+
+## s3_use_adaptive_timeouts {#s3_use_adaptive_timeouts}
+
+When set to `true` than for all s3 requests first two attempts are made with low send and receive timeouts.
+When set to `false` than all attempts are made with identical timeouts.
+
+Default value: `true`.
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index eb9dcf6b45a..ea3f57c27ff 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -55,7 +55,9 @@ namespace
             static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
             static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_retry_attempts),
             context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
-            /* for_disk_s3 = */ false, request_settings.get_request_throttler, request_settings.put_request_throttler,
+            /* for_disk_s3 = */ false,
+            request_settings.get_request_throttler,
+            request_settings.put_request_throttler,
             s3_uri.uri.getScheme());
 
         client_configuration.endpointOverride = s3_uri.endpoint;
@@ -167,7 +169,6 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                                 blob_path.size(), mode);
 
             copyS3File(
-                client,
                 client,
                 s3_uri.bucket,
                 fs::path(s3_uri.key) / path_in_backup,
@@ -229,7 +230,6 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
         {
             LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName());
             copyS3File(
-                client,
                 client,
                 /* src_bucket */ blob_path[1],
                 /* src_key= */ blob_path[0],
@@ -268,7 +268,7 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
 
 void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
-    copyDataToS3File(create_read_buffer, start_pos, length, client, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
+    copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
                      threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
@@ -298,7 +298,6 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
 {
     return std::make_unique<WriteBufferFromS3>(
         client,
-        client, // already has long timeout
         s3_uri.bucket,
         fs::path(s3_uri.key) / file_name,
         DBMS_DEFAULT_BUFFER_SIZE,
diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp
index 302e05c8418..bedde0d7b39 100644
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@@ -148,7 +148,6 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
         const auto create_writer = [&](const auto & key)
         {
             return WriteBufferFromS3(
-                s3_client->client,
                 s3_client->client,
                 s3_client->uri.bucket,
                 key,
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ac4c6b6c17f..bb5e4322485 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -94,6 +94,7 @@ class IColumn;
     M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \
     M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \
     M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
+    M(Bool, s3_use_adaptive_timeouts, true, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \
     M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \
     M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
     M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \
@@ -104,7 +105,7 @@ class IColumn;
     M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
     M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
     M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
-    M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
+    M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
     M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. Only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \
     M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
     M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 3af316bf0cf..308db389ee1 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -155,7 +155,7 @@ private:
 bool S3ObjectStorage::exists(const StoredObject & object) const
 {
     auto settings_ptr = s3_settings.get();
-    return S3::objectExists(*clients.get()->client, bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+    return S3::objectExists(*client.get(), bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
@@ -174,7 +174,7 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
         (const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
     {
         return std::make_unique<ReadBufferFromS3>(
-            clients.get()->client,
+            client.get(),
             bucket,
             path,
             version_id,
@@ -224,7 +224,7 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObject( /// NOLINT
 {
     auto settings_ptr = s3_settings.get();
     return std::make_unique<ReadBufferFromS3>(
-        clients.get()->client,
+        client.get(),
         bucket,
         object.remote_path,
         version_id,
@@ -249,10 +249,8 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
     if (write_settings.s3_allow_parallel_part_upload)
         scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "VFSWrite");
 
-    auto clients_ = clients.get();
     return std::make_unique<WriteBufferFromS3>(
-        clients_->client,
-        clients_->client_with_long_timeout,
+        client.get(),
         bucket,
         object.remote_path,
         buf_size,
@@ -266,15 +264,12 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
 ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const
 {
     auto settings_ptr = s3_settings.get();
-    auto client_ptr = clients.get()->client;
-
-    return std::make_shared<S3IteratorAsync>(bucket, path_prefix, client_ptr, settings_ptr->list_object_keys_size);
+    return std::make_shared<S3IteratorAsync>(bucket, path_prefix, client.get(), settings_ptr->list_object_keys_size);
 }
 
 void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
 {
     auto settings_ptr = s3_settings.get();
-    auto client_ptr = clients.get()->client;
 
     S3::ListObjectsV2Request request;
     request.SetBucket(bucket);
@@ -289,7 +284,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
     {
         ProfileEvents::increment(ProfileEvents::S3ListObjects);
         ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
-        outcome = client_ptr->ListObjectsV2(request);
+        outcome = client.get()->ListObjectsV2(request);
         throwIfError(outcome);
 
         auto result = outcome.GetResult();
@@ -320,14 +315,12 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
 
 void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exists)
 {
-    auto client_ptr = clients.get()->client;
-
     ProfileEvents::increment(ProfileEvents::S3DeleteObjects);
     ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects);
     S3::DeleteObjectRequest request;
     request.SetBucket(bucket);
     request.SetKey(object.remote_path);
-    auto outcome = client_ptr->DeleteObject(request);
+    auto outcome = client.get()->DeleteObject(request);
 
     throwIfUnexpectedError(outcome, if_exists);
 
@@ -346,7 +339,6 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
     }
     else
     {
-        auto client_ptr = clients.get()->client;
         auto settings_ptr = s3_settings.get();
 
         size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete;
@@ -375,7 +367,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
             S3::DeleteObjectsRequest request;
             request.SetBucket(bucket);
             request.SetDelete(delkeys);
-            auto outcome = client_ptr->DeleteObjects(request);
+            auto outcome = client.get()->DeleteObjects(request);
 
             throwIfUnexpectedError(outcome, if_exists);
 
@@ -407,7 +399,7 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
 std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const
 {
     auto settings_ptr = s3_settings.get();
-    auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false);
+    auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false);
 
     if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty())
         return {};
@@ -423,7 +415,7 @@ std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::s
 ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const
 {
     auto settings_ptr = s3_settings.get();
-    auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true);
+    auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true);
 
     ObjectMetadata result;
     result.size_bytes = object_info.size;
@@ -444,12 +436,12 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
     /// Shortcut for S3
     if (auto * dest_s3 = dynamic_cast<S3ObjectStorage * >(&object_storage_to); dest_s3 != nullptr)
     {
-        auto clients_ = clients.get();
+        auto client_ = client.get();
         auto settings_ptr = s3_settings.get();
-        auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+        auto size = S3::getObjectSize(*client_, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
         auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-        copyS3File(clients_->client,
-            clients_->client_with_long_timeout,
+        copyS3File(
+            client.get(),
             bucket,
             object_from.remote_path,
             0,
@@ -473,12 +465,11 @@ void S3ObjectStorage::copyObject( // NOLINT
     const WriteSettings &,
     std::optional<ObjectAttributes> object_to_attributes)
 {
-    auto clients_ = clients.get();
+    auto client_ = client.get();
     auto settings_ptr = s3_settings.get();
-    auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+    auto size = S3::getObjectSize(*client_, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
     auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-    copyS3File(clients_->client,
-        clients_->client_with_long_timeout,
+    copyS3File(client_,
         bucket,
         object_from.remote_path,
         0,
@@ -499,31 +490,25 @@ void S3ObjectStorage::setNewSettings(std::unique_ptr<S3ObjectStorageSettings> &&
 
 void S3ObjectStorage::shutdown()
 {
-    auto clients_ptr = clients.get();
     /// This call stops any next retry attempts for ongoing S3 requests.
     /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome.
     /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors.
     /// This should significantly speed up shutdown process if S3 is unhealthy.
-    const_cast<S3::Client &>(*clients_ptr->client).DisableRequestProcessing();
-    const_cast<S3::Client &>(*clients_ptr->client_with_long_timeout).DisableRequestProcessing();
+    const_cast<S3::Client &>(*client.get()).DisableRequestProcessing();
 }
 
 void S3ObjectStorage::startup()
 {
-    auto clients_ptr = clients.get();
-
     /// Need to be enabled if it was disabled during shutdown() call.
-    const_cast<S3::Client &>(*clients_ptr->client).EnableRequestProcessing();
-    const_cast<S3::Client &>(*clients_ptr->client_with_long_timeout).EnableRequestProcessing();
+    const_cast<S3::Client &>(*client.get()).EnableRequestProcessing();
 }
 
 void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
 {
     auto new_s3_settings = getSettings(config, config_prefix, context);
     auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
-    auto new_clients = std::make_unique<Clients>(std::move(new_client), *new_s3_settings);
     s3_settings.set(std::move(new_s3_settings));
-    clients.set(std::move(new_clients));
+    client.set(std::move(new_client));
 }
 
 std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
@@ -538,9 +523,6 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
         endpoint, object_key_prefix);
 }
 
-S3ObjectStorage::Clients::Clients(std::shared_ptr<S3::Client> client_, const S3ObjectStorageSettings & settings)
-    : client(std::move(client_)), client_with_long_timeout(client->clone(std::nullopt, settings.request_settings.long_request_timeout_ms)) {}
-
 ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string &) const
 {
     /// Path to store the new S3 object.
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index b1b3fb22366..7d14482311f 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -39,16 +39,6 @@ struct S3ObjectStorageSettings
 
 class S3ObjectStorage : public IObjectStorage
 {
-public:
-    struct Clients
-    {
-        std::shared_ptr<S3::Client> client;
-        std::shared_ptr<S3::Client> client_with_long_timeout;
-
-        Clients() = default;
-        Clients(std::shared_ptr<S3::Client> client, const S3ObjectStorageSettings & settings);
-    };
-
 private:
     friend class S3PlainObjectStorage;
 
@@ -63,7 +53,7 @@ private:
         String object_key_prefix_)
         : bucket(std::move(bucket_))
         , object_key_prefix(std::move(object_key_prefix_))
-        , clients(std::make_unique<Clients>(std::move(client_), *s3_settings_))
+        , client(std::move(client_))
         , s3_settings(std::move(s3_settings_))
         , s3_capabilities(s3_capabilities_)
         , version_id(std::move(version_id_))
@@ -184,7 +174,8 @@ private:
     std::string bucket;
     String object_key_prefix;
 
-    MultiVersion<Clients> clients;
+
+    MultiVersion<S3::Client> client;
     MultiVersion<S3ObjectStorageSettings> s3_settings;
     S3Capabilities s3_capabilities;
 
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index de88c876922..0232a6eb070 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -60,13 +60,15 @@ std::unique_ptr<S3::Client> getClient(
         uri.uri.getScheme());
 
     client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000);
-    client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000);
+    client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000);
     client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
     client_configuration.endpointOverride = uri.endpoint;
-    client_configuration.http_keep_alive_timeout_ms
-        = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
+    client_configuration.http_keep_alive_timeout_ms = config.getUInt(
+        config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
     client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000);
     client_configuration.wait_on_pool_size_limit = false;
+    client_configuration.s3_use_adaptive_timeouts = config.getBool(
+        config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
 
     /*
      * Override proxy configuration for backwards compatibility with old configuration format.
diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp
index 01fbaa4f817..970afc75ec3 100644
--- a/src/IO/ConnectionTimeouts.cpp
+++ b/src/IO/ConnectionTimeouts.cpp
@@ -133,4 +133,86 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
         settings.http_receive_timeout);
 }
 
+class SendReceiveTimeoutsForFirstAttempt
+{
+private:
+    static constexpr size_t known_methods_count = 6;
+    using KnownMethodsArray = std::array<String, known_methods_count>;
+    static const KnownMethodsArray known_methods;
+
+    /// HTTP_POST is used for CompleteMultipartUpload requests. Its latency could be high.
+    /// These requests need longer timeout, especially when minio is used.
+    /// The same assumption are made for HTTP_DELETE, HTTP_PATCH
+    /// That requests are more heavy that HTTP_GET, HTTP_HEAD, HTTP_PUT
+
+    static constexpr Poco::Timestamp::TimeDiff first_byte_ms[known_methods_count][2] =
+    {
+        /* GET */ {200, 200},
+        /* POST */ {200, 200},
+        /* DELETE */ {200, 200},
+        /* PUT */ {200, 200},
+        /* HEAD */ {200, 200},
+        /* PATCH */ {200, 200},
+    };
+
+    static constexpr Poco::Timestamp::TimeDiff rest_bytes_ms[known_methods_count][2] =
+    {
+        /* GET */ {500, 500},
+        /* POST */ {1000, 30000},
+        /* DELETE */ {1000, 10000},
+        /* PUT */ {1000, 3000},
+        /* HEAD */ {500, 500},
+        /* PATCH */ {1000, 10000},
+    };
+
+    static_assert(sizeof(first_byte_ms) == sizeof(rest_bytes_ms));
+    static_assert(sizeof(first_byte_ms) == known_methods_count * sizeof(Poco::Timestamp::TimeDiff) * 2);
+
+    static size_t getMethodIndex(const String & method)
+    {
+        KnownMethodsArray::const_iterator it = std::find(known_methods.begin(), known_methods.end(), method);
+        chassert(it != known_methods.end());
+        if (it == known_methods.end())
+            return 0;
+        return std::distance(known_methods.begin(), it);
+    }
+
+public:
+    static std::pair<Poco::Timespan, Poco::Timespan> getSendReceiveTimeout(const String & method, bool first_byte)
+    {
+        auto idx = getMethodIndex(method);
+
+        if (first_byte)
+            return std::make_pair(
+                Poco::Timespan(first_byte_ms[idx][0] * 1000),
+                Poco::Timespan(first_byte_ms[idx][1] * 1000)
+            );
+
+        return std::make_pair(
+            Poco::Timespan(rest_bytes_ms[idx][0] * 1000),
+            Poco::Timespan(rest_bytes_ms[idx][1] * 1000)
+        );
+    }
+};
+
+const SendReceiveTimeoutsForFirstAttempt::KnownMethodsArray SendReceiveTimeoutsForFirstAttempt::known_methods =
+{
+        "GET", "POST", "DELETE", "PUT", "HEAD", "PATCH"
+};
+
+
+ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const
+{
+    if (!first_attempt)
+        return *this;
+
+    auto [send, recv] = SendReceiveTimeoutsForFirstAttempt::getSendReceiveTimeout(method, first_byte);
+
+    auto aggressive = *this;
+    aggressive.send_timeout = saturate(send, send_timeout);
+    aggressive.receive_timeout = saturate(recv, receive_timeout);
+
+    return aggressive;
+}
+
 }
diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h
index 684af42827f..aabebdb836d 100644
--- a/src/IO/ConnectionTimeouts.h
+++ b/src/IO/ConnectionTimeouts.h
@@ -67,6 +67,8 @@ struct ConnectionTimeouts
     /// Timeouts for the case when we will try many addresses in a loop.
     static ConnectionTimeouts getTCPTimeoutsWithFailover(const Settings & settings);
     static ConnectionTimeouts getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout);
+
+    ConnectionTimeouts getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const;
 };
 
 }
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index 65ffa51a466..cce394c67c9 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -50,12 +50,6 @@ namespace ErrorCodes
 
 namespace
 {
-    void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts)
-    {
-        session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout);
-        session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout);
-    }
-
     Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const ProxyConfiguration & proxy_configuration)
     {
         Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config;
@@ -359,6 +353,12 @@ namespace
     };
 }
 
+void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts)
+{
+    session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout);
+    session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout);
+}
+
 void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout)
 {
     if (!response.getKeepAlive())
diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h
index de62b5d5c16..c9968fc6915 100644
--- a/src/IO/HTTPCommon.h
+++ b/src/IO/HTTPCommon.h
@@ -113,4 +113,6 @@ std::istream * receiveResponse(
 
 void assertResponseIsOk(
     const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false);
+
+void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts);
 }
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index f19978ccb47..c9c9319c44c 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -167,9 +167,9 @@ bool ReadBufferFromS3::nextImpl()
     }
 
     size_t sleep_time_with_backoff_milliseconds = 100;
-    for (size_t attempt = 0; !next_result; ++attempt)
+    for (size_t attempt = 1; !next_result; ++attempt)
     {
-        bool last_attempt = attempt + 1 >= request_settings.max_single_read_retries;
+        bool last_attempt = attempt >= request_settings.max_single_read_retries;
 
         ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromS3Microseconds);
 
@@ -177,7 +177,7 @@ bool ReadBufferFromS3::nextImpl()
         {
             if (!impl)
             {
-                impl = initialize();
+                impl = initialize(attempt);
 
                 if (use_external_buffer)
                 {
@@ -232,9 +232,9 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
 {
     size_t initial_n = n;
     size_t sleep_time_with_backoff_milliseconds = 100;
-    for (size_t attempt = 0; n > 0; ++attempt)
+    for (size_t attempt = 1; n > 0; ++attempt)
     {
-        bool last_attempt = attempt + 1 >= request_settings.max_single_read_retries;
+        bool last_attempt = attempt >= request_settings.max_single_read_retries;
         size_t bytes_copied = 0;
 
         ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromS3Microseconds);
@@ -266,7 +266,7 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
 
         try
         {
-            result = sendRequest(range_begin, range_begin + n - 1);
+            result = sendRequest(attempt, range_begin, range_begin + n - 1);
             std::istream & istr = result->GetBody();
 
             copyFromIStreamWithProgressCallback(istr, to, n, progress_callback, &bytes_copied);
@@ -304,8 +304,8 @@ bool ReadBufferFromS3::processException(Poco::Exception & e, size_t read_offset,
     LOG_DEBUG(
         log,
         "Caught exception while reading S3 object. Bucket: {}, Key: {}, Version: {}, Offset: {}, "
-        "Attempt: {}, Message: {}",
-        bucket, key, version_id.empty() ? "Latest" : version_id, read_offset, attempt, e.message());
+        "Attempt: {}/{}, Message: {}",
+        bucket, key, version_id.empty() ? "Latest" : version_id, read_offset, attempt, request_settings.max_single_read_retries, e.message());
 
 
     if (auto * s3_exception = dynamic_cast<S3Exception *>(&e))
@@ -463,7 +463,7 @@ ReadBufferFromS3::~ReadBufferFromS3()
     }
 }
 
-std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
+std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize(size_t attempt)
 {
     resetSessionIfNeeded(readAllRangeSuccessfully(), read_result);
     read_all_range_successfully = false;
@@ -475,13 +475,13 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
     if (read_until_position && offset >= read_until_position)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1);
 
-    read_result = sendRequest(offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt);
+    read_result = sendRequest(attempt, offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt);
 
     size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size;
     return std::make_unique<ReadBufferFromIStream>(read_result->GetBody(), buffer_size);
 }
 
-Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin, std::optional<size_t> range_end_incl) const
+Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, size_t range_begin, std::optional<size_t> range_end_incl) const
 {
     S3::GetObjectRequest req;
     req.SetBucket(bucket);
@@ -489,6 +489,8 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin
     if (!version_id.empty())
         req.SetVersionId(version_id);
 
+    req.SetAdditionalCustomHeaderValue("clickhouse-request", fmt::format("attempt={}", attempt));
+
     if (range_end_incl)
     {
         req.SetRange(fmt::format("bytes={}-{}", range_begin, *range_end_incl));
diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h
index 0835e52a5b2..101e25f8b43 100644
--- a/src/IO/ReadBufferFromS3.h
+++ b/src/IO/ReadBufferFromS3.h
@@ -79,7 +79,7 @@ public:
     bool supportsReadAt() override { return true; }
 
 private:
-    std::unique_ptr<ReadBuffer> initialize();
+    std::unique_ptr<ReadBuffer> initialize(size_t attempt);
 
     /// If true, if we destroy impl now, no work was wasted. Just for metrics.
     bool atEndOfRequestedRangeGuess();
@@ -88,7 +88,7 @@ private:
     /// Returns true if the error looks retriable.
     bool processException(Poco::Exception & e, size_t read_offset, size_t attempt) const;
 
-    Aws::S3::Model::GetObjectResult sendRequest(size_t range_begin, std::optional<size_t> range_end_incl) const;
+    Aws::S3::Model::GetObjectResult sendRequest(size_t attempt, size_t range_begin, std::optional<size_t> range_end_incl) const;
 
     bool readAllRangeSuccessfully() const;
 
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index ceb7d275299..4630e68fbb6 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -118,16 +118,9 @@ std::unique_ptr<Client> Client::create(
         new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, use_virtual_addressing));
 }
 
-std::unique_ptr<Client> Client::clone(
-    std::optional<std::shared_ptr<RetryStrategy>> override_retry_strategy,
-    std::optional<Int64> override_request_timeout_ms) const
+std::unique_ptr<Client> Client::clone() const
 {
-    PocoHTTPClientConfiguration new_configuration = client_configuration;
-    if (override_retry_strategy.has_value())
-        new_configuration.retryStrategy = *override_retry_strategy;
-    if (override_request_timeout_ms.has_value())
-        new_configuration.requestTimeoutMs = *override_request_timeout_ms;
-    return std::unique_ptr<Client>(new Client(*this, new_configuration));
+    return std::unique_ptr<Client>(new Client(*this, client_configuration));
 }
 
 namespace
@@ -905,6 +898,7 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT
         s3_retry_attempts,
         enable_s3_requests_logging,
         for_disk_s3,
+        context->getGlobalContext()->getSettingsRef().s3_use_adaptive_timeouts,
         get_request_throttler,
         put_request_throttler,
         error_report);
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index 48310bc21af..5ad57a9d827 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -118,15 +118,7 @@ public:
             Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
             bool use_virtual_addressing);
 
-    /// Create a client with adjusted settings:
-    ///  * override_retry_strategy can be used to disable retries to avoid nested retries when we have
-    ///    a retry loop outside of S3 client. Specifically, for read and write buffers. Currently not
-    ///    actually used.
-    ///  * override_request_timeout_ms is used to increase timeout for CompleteMultipartUploadRequest
-    ///    because it often sits idle for 10 seconds: https://github.com/ClickHouse/ClickHouse/pull/42321
-    std::unique_ptr<Client> clone(
-        std::optional<std::shared_ptr<RetryStrategy>> override_retry_strategy = std::nullopt,
-        std::optional<Int64> override_request_timeout_ms = std::nullopt) const;
+    std::unique_ptr<Client> clone() const;
 
     Client & operator=(const Client &) = delete;
 
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index d0f248f48a6..4a1b6def133 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -99,6 +99,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
         unsigned int s3_retry_attempts_,
         bool enable_s3_requests_logging_,
         bool for_disk_s3_,
+        bool s3_use_adaptive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
         std::function<void(const DB::ProxyConfiguration &)> error_report_)
@@ -111,6 +112,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
     , for_disk_s3(for_disk_s3_)
     , get_request_throttler(get_request_throttler_)
     , put_request_throttler(put_request_throttler_)
+    , s3_use_adaptive_timeouts(s3_use_adaptive_timeouts_)
     , error_report(error_report_)
 {
 }
@@ -157,6 +159,7 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config
           Poco::Timespan(client_configuration.http_keep_alive_timeout_ms * 1000))) /// flag indicating whether keep-alive is enabled is set to each session upon creation
     , remote_host_filter(client_configuration.remote_host_filter)
     , s3_max_redirects(client_configuration.s3_max_redirects)
+    , s3_use_adaptive_timeouts(client_configuration.s3_use_adaptive_timeouts)
     , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging)
     , for_disk_s3(client_configuration.for_disk_s3)
     , get_request_throttler(client_configuration.get_request_throttler)
@@ -268,6 +271,38 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT
         ProfileEvents::increment(disk_s3_events_map[static_cast<unsigned int>(type)][static_cast<unsigned int>(kind)], amount);
 }
 
+String extractAttemptFromInfo(const Aws::String & request_info)
+{
+    static auto key = Aws::String("attempt=");
+
+    auto key_begin = request_info.find(key, 0);
+    if (key_begin == Aws::String::npos)
+        return "1";
+
+    auto val_begin = key_begin + key.size();
+    auto val_end = request_info.find(';', val_begin);
+    if (val_end == Aws::String::npos)
+        val_end = request_info.size();
+
+    return request_info.substr(val_begin, val_end-val_begin);
+}
+
+String getOrEmpty(const Aws::Http::HeaderValueCollection & map, const String & key)
+{
+    auto it = map.find(key);
+    if (it == map.end())
+        return {};
+    return it->second;
+}
+
+ConnectionTimeouts PocoHTTPClient::getTimeouts(const String & method, bool first_attempt, bool first_byte) const
+{
+    if (!s3_use_adaptive_timeouts)
+        return timeouts;
+
+    return timeouts.getAdaptiveTimeouts(method, first_attempt, first_byte);
+}
+
 void PocoHTTPClient::makeRequestInternal(
     Aws::Http::HttpRequest & request,
     std::shared_ptr<PocoHTTPResponse> & response,
@@ -282,6 +317,25 @@ void PocoHTTPClient::makeRequestInternal(
         makeRequestInternalImpl<false>(request, request_configuration, response, readLimiter, writeLimiter);
 }
 
+String getMethod(const Aws::Http::HttpRequest & request)
+{
+    switch (request.GetMethod())
+    {
+        case Aws::Http::HttpMethod::HTTP_GET:
+            return Poco::Net::HTTPRequest::HTTP_GET;
+        case Aws::Http::HttpMethod::HTTP_POST:
+            return Poco::Net::HTTPRequest::HTTP_POST;
+        case Aws::Http::HttpMethod::HTTP_DELETE:
+            return Poco::Net::HTTPRequest::HTTP_DELETE;
+        case Aws::Http::HttpMethod::HTTP_PUT:
+            return Poco::Net::HTTPRequest::HTTP_PUT;
+        case Aws::Http::HttpMethod::HTTP_HEAD:
+            return Poco::Net::HTTPRequest::HTTP_HEAD;
+        case Aws::Http::HttpMethod::HTTP_PATCH:
+            return Poco::Net::HTTPRequest::HTTP_PATCH;
+    }
+}
+
 template <bool pooled>
 void PocoHTTPClient::makeRequestInternalImpl(
     Aws::Http::HttpRequest & request,
@@ -295,9 +349,14 @@ void PocoHTTPClient::makeRequestInternalImpl(
     Poco::Logger * log = &Poco::Logger::get("AWSClient");
 
     auto uri = request.GetUri().GetURIString();
+    auto method = getMethod(request);
+
+    auto sdk_attempt = extractAttemptFromInfo(getOrEmpty(request.GetHeaders(), Aws::Http::SDK_REQUEST_HEADER));
+    auto ch_attempt = extractAttemptFromInfo(getOrEmpty(request.GetHeaders(), "clickhouse-request"));
+    bool first_attempt = ch_attempt == "1" && sdk_attempt == "1";
 
     if (enable_s3_requests_logging)
-        LOG_TEST(log, "Make request to: {}", uri);
+        LOG_TEST(log, "Make request to: {}, aws sdk attempt: {}, clickhouse attempt: {}", uri, sdk_attempt, ch_attempt);
 
     switch (request.GetMethod())
     {
@@ -348,17 +407,29 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 /// This can lead to request signature difference on S3 side.
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri, timeouts, http_connection_pool_size, wait_on_pool_size_limit, proxy_configuration);
+                        target_uri,
+                        getTimeouts(method, first_attempt, /*first_byte*/ true),
+                        http_connection_pool_size,
+                        wait_on_pool_size_limit,
+                        proxy_configuration);
                 else
-                    session = makeHTTPSession(target_uri, timeouts, proxy_configuration);
+                    session = makeHTTPSession(
+                            target_uri,
+                            getTimeouts(method, first_attempt, /*first_byte*/ true),
+                            proxy_configuration);
             }
             else
             {
                 if constexpr (pooled)
                     session = makePooledHTTPSession(
-                        target_uri, timeouts, http_connection_pool_size, wait_on_pool_size_limit);
+                        target_uri,
+                        getTimeouts(method, first_attempt, /*first_byte*/ true),
+                        http_connection_pool_size,
+                        wait_on_pool_size_limit);
                 else
-                    session = makeHTTPSession(target_uri, timeouts);
+                    session = makeHTTPSession(
+                            target_uri,
+                            getTimeouts(method, first_attempt, /*first_byte*/ true));
             }
 
             /// In case of error this address will be written to logs
@@ -392,28 +463,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 path_and_query = "/";
 
             poco_request.setURI(path_and_query);
-
-            switch (request.GetMethod())
-            {
-                case Aws::Http::HttpMethod::HTTP_GET:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_GET);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_POST:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_POST);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_DELETE:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_DELETE);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_PUT:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_PUT);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_HEAD:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_HEAD);
-                    break;
-                case Aws::Http::HttpMethod::HTTP_PATCH:
-                    poco_request.setMethod(Poco::Net::HTTPRequest::HTTP_PATCH);
-                    break;
-            }
+            poco_request.setMethod(method);
 
             /// Headers coming from SDK are lower-cased.
             for (const auto & [header_name, header_value] : request.GetHeaders())
@@ -438,6 +488,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 request.GetContentBody()->clear();
                 request.GetContentBody()->seekg(0);
 
+                setTimeouts(*session, getTimeouts(method, first_attempt, /*first_byte*/ false));
                 auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream);
                 if (enable_s3_requests_logging)
                     LOG_TEST(log, "Written {} bytes to request body", size);
@@ -447,6 +498,8 @@ void PocoHTTPClient::makeRequestInternalImpl(
                 LOG_TEST(log, "Receiving response...");
             auto & response_body_stream = session->receiveResponse(poco_response);
 
+            setTimeouts(*session, getTimeouts(method, first_attempt, /*first_byte*/ false));
+
             watch.stop();
             addMetric(request, S3MetricType::Microseconds, watch.elapsedMicroseconds());
 
@@ -498,6 +551,7 @@ void PocoHTTPClient::makeRequestInternalImpl(
             /// Request is successful but for some special requests we can have actual error message in body
             if (status_code >= SUCCESS_RESPONSE_MIN && status_code <= SUCCESS_RESPONSE_MAX && checkRequestCanReturn2xxAndErrorInBody(request))
             {
+                /// reading the full response
                 std::string response_string((std::istreambuf_iterator<char>(response_body_stream)),
                                std::istreambuf_iterator<char>());
 
@@ -512,7 +566,6 @@ void PocoHTTPClient::makeRequestInternalImpl(
                     addMetric(request, S3MetricType::Errors);
                     if (error_report)
                         error_report(proxy_configuration);
-
                 }
 
                 /// Set response from string
@@ -531,6 +584,8 @@ void PocoHTTPClient::makeRequestInternalImpl(
                     if (status_code >= 500 && error_report)
                         error_report(proxy_configuration);
                 }
+
+                /// expose stream, after that client reads data from that stream without built-in retries
                 response->SetResponseBody(response_body_stream, session);
             }
 
diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 2a449458360..5178d75e7b6 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -55,6 +55,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
     size_t http_connection_pool_size = 0;
     /// See PoolBase::BehaviourOnLimit
     bool wait_on_pool_size_limit = true;
+    bool s3_use_adaptive_timeouts = true;
 
     std::function<void(const DB::ProxyConfiguration &)> error_report;
 
@@ -69,6 +70,7 @@ private:
         unsigned int s3_retry_attempts,
         bool enable_s3_requests_logging_,
         bool for_disk_s3_,
+        bool s3_use_adaptive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
         std::function<void(const DB::ProxyConfiguration &)> error_report_
@@ -169,6 +171,8 @@ private:
         Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
         Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
 
+    ConnectionTimeouts getTimeouts(const String & method, bool first_attempt, bool first_byte) const;
+
 protected:
     static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request);
     void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const;
@@ -178,6 +182,7 @@ protected:
     ConnectionTimeouts timeouts;
     const RemoteHostFilter & remote_host_filter;
     unsigned int s3_max_redirects;
+    bool s3_use_adaptive_timeouts = true;
     bool enable_s3_requests_logging;
     bool for_disk_s3;
 
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index a16a1a41505..30da1c580c1 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -53,7 +53,6 @@ namespace
     public:
         UploadHelper(
             const std::shared_ptr<const S3::Client> & client_ptr_,
-            const std::shared_ptr<const S3::Client> & client_with_long_timeout_ptr_,
             const String & dest_bucket_,
             const String & dest_key_,
             const S3Settings::RequestSettings & request_settings_,
@@ -62,7 +61,6 @@ namespace
             bool for_disk_s3_,
             const Poco::Logger * log_)
             : client_ptr(client_ptr_)
-            , client_with_long_timeout_ptr(client_with_long_timeout_ptr_)
             , dest_bucket(dest_bucket_)
             , dest_key(dest_key_)
             , request_settings(request_settings_)
@@ -78,7 +76,6 @@ namespace
 
     protected:
         std::shared_ptr<const S3::Client> client_ptr;
-        std::shared_ptr<const S3::Client> client_with_long_timeout_ptr;
         const String & dest_bucket;
         const String & dest_key;
         const S3Settings::RequestSettings & request_settings;
@@ -179,7 +176,7 @@ namespace
                 if (for_disk_s3)
                     ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);
 
-                auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(request);
+                auto outcome = client_ptr->CompleteMultipartUpload(request);
 
                 if (outcome.IsSuccess())
                 {
@@ -433,14 +430,13 @@ namespace
             size_t offset_,
             size_t size_,
             const std::shared_ptr<const S3::Client> & client_ptr_,
-            const std::shared_ptr<const S3::Client> & client_with_long_timeout_ptr_,
             const String & dest_bucket_,
             const String & dest_key_,
             const S3Settings::RequestSettings & request_settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_s3_)
-            : UploadHelper(client_ptr_, client_with_long_timeout_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File"))
+            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File"))
             , create_read_buffer(create_read_buffer_)
             , offset(offset_)
             , size(size_)
@@ -602,7 +598,6 @@ namespace
     public:
         CopyFileHelper(
             const std::shared_ptr<const S3::Client> & client_ptr_,
-            const std::shared_ptr<const S3::Client> & client_with_long_timeout_ptr_,
             const String & src_bucket_,
             const String & src_key_,
             size_t src_offset_,
@@ -614,7 +609,7 @@ namespace
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_s3_)
-            : UploadHelper(client_ptr_, client_with_long_timeout_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File"))
+            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File"))
             , src_bucket(src_bucket_)
             , src_key(src_key_)
             , offset(src_offset_)
@@ -677,7 +672,7 @@ namespace
             /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
             request.SetContentType("binary/octet-stream");
 
-            client_with_long_timeout_ptr->setKMSHeaders(request);
+            client_ptr->setKMSHeaders(request);
         }
 
         void processCopyRequest(const S3::CopyObjectRequest & request)
@@ -689,7 +684,7 @@ namespace
                 if (for_disk_s3)
                     ProfileEvents::increment(ProfileEvents::DiskS3CopyObject);
 
-                auto outcome = client_with_long_timeout_ptr->CopyObject(request);
+                auto outcome = client_ptr->CopyObject(request);
                 if (outcome.IsSuccess())
                 {
                     LOG_TRACE(
@@ -714,7 +709,6 @@ namespace
                             offset,
                             size,
                             client_ptr,
-                            client_with_long_timeout_ptr,
                             dest_bucket,
                             dest_key,
                             request_settings,
@@ -788,7 +782,7 @@ namespace
             if (for_disk_s3)
                 ProfileEvents::increment(ProfileEvents::DiskS3UploadPartCopy);
 
-            auto outcome = client_with_long_timeout_ptr->UploadPartCopy(req);
+            auto outcome = client_ptr->UploadPartCopy(req);
             if (!outcome.IsSuccess())
             {
                 abortMultipartUpload();
@@ -806,7 +800,6 @@ void copyDataToS3File(
     size_t offset,
     size_t size,
     const std::shared_ptr<const S3::Client> & dest_s3_client,
-    const std::shared_ptr<const S3::Client> & dest_s3_client_with_long_timeout,
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
@@ -814,14 +807,13 @@ void copyDataToS3File(
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_s3)
 {
-    CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_s3_client_with_long_timeout, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
+    CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
     helper.performCopy();
 }
 
 
 void copyS3File(
     const std::shared_ptr<const S3::Client> & s3_client,
-    const std::shared_ptr<const S3::Client> & s3_client_with_long_timeout,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
@@ -836,7 +828,7 @@ void copyS3File(
 {
     if (settings.allow_native_copy)
     {
-        CopyFileHelper helper{s3_client, s3_client_with_long_timeout, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3};
+        CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3};
         helper.performCopy();
     }
     else
@@ -845,7 +837,7 @@ void copyS3File(
         {
             return std::make_unique<ReadBufferFromS3>(s3_client, src_bucket, src_key, "", settings, read_settings);
         };
-        copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, s3_client_with_long_timeout, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3);
+        copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3);
     }
 }
 
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index 1bcbfd7735e..33e22fdfba2 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -27,15 +27,9 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 /// because it is a known issue, it is fallbacks to read-write copy
 /// (copyDataToS3File()).
 ///
-/// s3_client_with_long_timeout (may be equal to s3_client) is used for native copy and
-/// CompleteMultipartUpload requests. These requests need longer timeout because S3 servers often
-/// block on them for multiple seconds without sending or receiving data from us (maybe the servers
-/// are copying data internally, or maybe throttling, idk).
-///
 /// read_settings - is used for throttling in case of native copy is not possible
 void copyS3File(
     const std::shared_ptr<const S3::Client> & s3_client,
-    const std::shared_ptr<const S3::Client> & s3_client_with_long_timeout,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
@@ -58,7 +52,6 @@ void copyDataToS3File(
     size_t offset,
     size_t size,
     const std::shared_ptr<const S3::Client> & dest_s3_client,
-    const std::shared_ptr<const S3::Client> & dest_s3_client_with_long_timeout,
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp
index c42f14e9a53..bff9ca6fa7b 100644
--- a/src/IO/S3/tests/gtest_aws_s3_client.cpp
+++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp
@@ -91,7 +91,6 @@ void doWriteRequest(std::shared_ptr<const DB::S3::Client> client, const DB::S3::
     DB::S3Settings::RequestSettings request_settings;
     request_settings.max_unexpected_write_error_retries = max_unexpected_write_error_retries;
     DB::WriteBufferFromS3 write_buffer(
-        client,
         client,
         uri.bucket,
         uri.key,
@@ -171,6 +170,7 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeadersRead)
         "authorization: ... SignedHeaders="
         "amz-sdk-invocation-id;"
         "amz-sdk-request;"
+        "clickhouse-request;"
         "content-type;"
         "host;"
         "x-amz-api-version;"
@@ -216,6 +216,7 @@ TEST(IOTestAwsS3Client, AppendExtraSSEKMSHeadersRead)
         "authorization: ... SignedHeaders="
         "amz-sdk-invocation-id;"
         "amz-sdk-request;"
+        "clickhouse-request;"
         "content-type;"
         "host;"
         "x-amz-api-version;"
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index e1b9c17efe9..62d0c80f1f2 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -77,7 +77,6 @@ struct WriteBufferFromS3::PartData
 
 WriteBufferFromS3::WriteBufferFromS3(
     std::shared_ptr<const S3::Client> client_ptr_,
-    std::shared_ptr<const S3::Client> client_with_long_timeout_ptr_,
     const String & bucket_,
     const String & key_,
     size_t buf_size_,
@@ -92,7 +91,6 @@ WriteBufferFromS3::WriteBufferFromS3(
     , upload_settings(request_settings.getUploadSettings())
     , write_settings(write_settings_)
     , client_ptr(std::move(client_ptr_))
-    , client_with_long_timeout_ptr(std::move(client_with_long_timeout_ptr_))
     , object_metadata(std::move(object_metadata_))
     , buffer_allocation_policy(ChooseBufferPolicy(upload_settings))
     , task_tracker(
@@ -566,7 +564,7 @@ void WriteBufferFromS3::completeMultipartUpload()
             ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);
 
         Stopwatch watch;
-        auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(req);
+        auto outcome = client_ptr->CompleteMultipartUpload(req);
         watch.stop();
 
         ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index 95148c49779..590342cc997 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -30,8 +30,6 @@ class WriteBufferFromS3 final : public WriteBufferFromFileBase
 public:
     WriteBufferFromS3(
         std::shared_ptr<const S3::Client> client_ptr_,
-        /// for CompleteMultipartUploadRequest, because it blocks on recv() for a few seconds on big uploads
-        std::shared_ptr<const S3::Client> client_with_long_timeout_ptr_,
         const String & bucket_,
         const String & key_,
         size_t buf_size_,
@@ -90,7 +88,6 @@ private:
     const S3Settings::RequestSettings::PartUploadSettings & upload_settings;
     const WriteSettings write_settings;
     const std::shared_ptr<const S3::Client> client_ptr;
-    const std::shared_ptr<const S3::Client> client_with_long_timeout_ptr;
     const std::optional<std::map<String, String>> object_metadata;
     Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3");
     LogSeriesLimiterPtr limitedLog = std::make_shared<LogSeriesLimiter>(log, 1, 5);
diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp
index 21bdd9a6f26..c82f97f8b20 100644
--- a/src/IO/tests/gtest_writebuffer_s3.cpp
+++ b/src/IO/tests/gtest_writebuffer_s3.cpp
@@ -549,7 +549,6 @@ public:
         getAsyncPolicy().setAutoExecute(false);
 
         return std::make_unique<WriteBufferFromS3>(
-                    client,
                     client,
                     bucket,
                     file_name,
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 80ee1e9339d..bdbba5abd96 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -825,7 +825,6 @@ public:
         write_buf = wrapWriteBufferWithCompressionMethod(
             std::make_unique<WriteBufferFromS3>(
                 configuration_.client,
-                configuration_.client_with_long_timeout,
                 bucket,
                 key,
                 DBMS_DEFAULT_BUFFER_SIZE,
@@ -1330,8 +1329,6 @@ void StorageS3::Configuration::connect(ContextPtr context)
                 context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
             auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
         });
-
-    client_with_long_timeout = client->clone(std::nullopt, request_settings.long_request_timeout_ms);
 }
 
 void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection)
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 3330ac6c210..3f35c578e19 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -311,7 +311,6 @@ public:
         HTTPHeaderEntries headers_from_ast;
 
         std::shared_ptr<const S3::Client> client;
-        std::shared_ptr<const S3::Client> client_with_long_timeout;
         std::vector<String> keys;
     };
 
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index e3d577ca0b3..728972c948c 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -69,8 +69,7 @@ struct S3Settings
         ThrottlerPtr get_request_throttler;
         ThrottlerPtr put_request_throttler;
         size_t retry_attempts = 10;
-        size_t request_timeout_ms = 3000;
-        size_t long_request_timeout_ms = 30000; // TODO: Take this from config like request_timeout_ms
+        size_t request_timeout_ms = 30000;
         bool allow_native_copy = true;
 
         bool throw_on_zero_files_match = false;
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
index 206eb4f2bad..4210c13b727 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
@@ -4,6 +4,7 @@
     <profiles>
         <default>
             <s3_retry_attempts>1000000</s3_retry_attempts>
+            <s3_use_adaptive_timeouts>1</s3_use_adaptive_timeouts>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
index 556bf60d385..95a313ea4f2 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
@@ -4,6 +4,7 @@
     <profiles>
         <default>
             <s3_retry_attempts>5</s3_retry_attempts>
+            <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
index b77e72d808b..7b1f503ed55 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/storage_conf.xml
@@ -7,11 +7,18 @@
 
     <storage_configuration>
         <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root/data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+            </s3>
             <broken_s3>
                 <type>s3</type>
                 <endpoint>http://resolver:8083/root/data/</endpoint>
                 <access_key_id>minio</access_key_id>
                 <secret_access_key>minio123</secret_access_key>
+                <skip_access_check>1</skip_access_check>
             </broken_s3>
         </disks>
 
@@ -23,9 +30,16 @@
                     </main>
                 </volumes>
             </broken_s3>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                </volumes>
+            </s3>
         </policies>
     </storage_configuration>
     <merge_tree>
-        <storage_policy>broken_s3</storage_policy>
+        <storage_policy>s3</storage_policy>
     </merge_tree>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index d6bcb3fb8f4..b000ccabcf4 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -64,6 +64,8 @@ def test_upload_after_check_works(cluster, broken_s3):
             data String
         ) ENGINE=MergeTree()
         ORDER BY id
+        SETTINGS
+            storage_policy='broken_s3'
         """
     )
 
@@ -78,7 +80,7 @@ def test_upload_after_check_works(cluster, broken_s3):
     assert "suddenly disappeared" in error, error
 
 
-def get_counters(node, query_id, log_type="ExceptionWhileProcessing"):
+def get_multipart_counters(node, query_id, log_type="ExceptionWhileProcessing"):
     node.query("SYSTEM FLUSH LOGS")
     return [
         int(x)
@@ -87,7 +89,25 @@ def get_counters(node, query_id, log_type="ExceptionWhileProcessing"):
                 SELECT
                     ProfileEvents['S3CreateMultipartUpload'],
                     ProfileEvents['S3UploadPart'],
-                    ProfileEvents['S3WriteRequestsErrors']
+                    ProfileEvents['S3WriteRequestsErrors'],
+                FROM system.query_log
+                WHERE query_id='{query_id}'
+                    AND type='{log_type}'
+                """
+        ).split()
+        if x
+    ]
+
+
+def get_put_counters(node, query_id, log_type="ExceptionWhileProcessing"):
+    node.query("SYSTEM FLUSH LOGS")
+    return [
+        int(x)
+        for x in node.query(
+            f"""
+                SELECT
+                    ProfileEvents['S3PutObject'],
+                    ProfileEvents['S3WriteRequestsErrors'],
                 FROM system.query_log
                 WHERE query_id='{query_id}'
                     AND type='{log_type}'
@@ -129,12 +149,12 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression
     assert "Code: 499" in error, error
     assert "mock s3 injected error" in error, error
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id
     )
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 0
-    assert count_s3_errors == 1
+    assert create_multipart == 1
+    assert upload_parts == 0
+    assert s3_errors == 1
 
 
 #  Add "lz4" compression method in the list after https://github.com/ClickHouse/ClickHouse/issues/50975 is fixed
@@ -172,12 +192,12 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload(
     assert "Code: 499" in error, error
     assert "mock s3 injected error" in error, error
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id
     )
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts >= 2
-    assert count_s3_errors >= 2
+    assert create_multipart == 1
+    assert upload_parts >= 2
+    assert s3_errors >= 2
 
 
 def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
@@ -207,12 +227,12 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 39
-    assert count_s3_errors == 3
+    assert create_multipart == 1
+    assert upload_parts == 39
+    assert s3_errors == 3
 
     broken_s3.setup_at_part_upload(count=1000, after=2, action="connection_refused")
     insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED_1"
@@ -279,13 +299,13 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried(
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 39
-    assert count_s3_errors == 3
+    assert create_multipart == 1
+    assert upload_parts == 39
+    assert s3_errors == 3
 
     broken_s3.setup_at_part_upload(
         count=1000,
@@ -361,13 +381,13 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried(
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 39
-    assert count_s3_errors == 3
+    assert create_multipart == 1
+    assert upload_parts == 39
+    assert s3_errors == 3
 
     broken_s3.setup_at_create_multi_part_upload(
         count=1000,
@@ -438,13 +458,13 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3):
         query_id=insert_query_id,
     )
 
-    count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters(
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id, log_type="QueryFinish"
     )
 
-    assert count_create_multi_part_uploads == 1
-    assert count_upload_parts == 7
-    assert count_s3_errors == 3
+    assert create_multipart == 1
+    assert upload_parts == 7
+    assert s3_errors == 3
 
     broken_s3.setup_at_part_upload(
         count=1000,
@@ -533,3 +553,60 @@ def test_query_is_canceled_with_inf_retries(cluster, broken_s3):
         retry_count=120,
         sleep_time=1,
     )
+
+
+@pytest.mark.parametrize("node_name", ["node", "node_with_inf_s3_retries"])
+def test_adaptive_timeouts(cluster, broken_s3, node_name):
+    node = cluster.instances[node_name]
+
+    broken_s3.setup_fake_puts(part_length=1)
+    broken_s3.setup_slow_answers(
+        timeout=5,
+        count=1000000,
+    )
+
+    insert_query_id = f"TEST_ADAPTIVE_TIMEOUTS_{node_name}"
+    node.query(
+        f"""
+            INSERT INTO
+                TABLE FUNCTION s3(
+                    'http://resolver:8083/root/data/adaptive_timeouts',
+                    'minio', 'minio123',
+                    'CSV', auto, 'none'
+                )
+            SELECT
+                *
+            FROM system.numbers
+            LIMIT 1
+            SETTINGS
+                s3_request_timeout_ms=30000,
+                s3_check_objects_after_upload=0
+            """,
+        query_id=insert_query_id,
+    )
+
+    broken_s3.reset()
+
+    put_objects, s3_errors = get_put_counters(
+        node, insert_query_id, log_type="QueryFinish"
+    )
+
+    assert put_objects == 1
+
+    s3_use_adaptive_timeouts = node.query(
+        f"""
+        SELECT
+            value
+        FROM system.settings
+        WHERE
+            name='s3_use_adaptive_timeouts'
+        """
+    ).strip()
+
+    if node_name == "node_with_inf_s3_retries":
+        # first 2 attempts failed
+        assert s3_use_adaptive_timeouts == "1"
+        assert s3_errors == 1
+    else:
+        assert s3_use_adaptive_timeouts == "0"
+        assert s3_errors == 0
diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
index 235b9a7b7a1..6303e9273fc 100644
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
@@ -11,6 +11,7 @@
                 <skip_access_check>true</skip_access_check>
                 <!-- Avoid extra retries to speed up tests -->
                 <retry_attempts>0</retry_attempts>
+                <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
                 <connect_timeout_ms>20000</connect_timeout_ms>
             </s3>
             <s3_retryable>
@@ -33,6 +34,7 @@
                 <skip_access_check>true</skip_access_check>
                 <!-- Avoid extra retries to speed up tests -->
                 <retry_attempts>1</retry_attempts>
+                <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
                 <s3_max_single_read_retries>1</s3_max_single_read_retries>
                 <connect_timeout_ms>20000</connect_timeout_ms>
             </s3_no_retries>
diff --git a/tests/integration/test_storage_s3/configs/defaultS3.xml b/tests/integration/test_storage_s3/configs/defaultS3.xml
index 37454ef6781..7dac6d9fbb5 100644
--- a/tests/integration/test_storage_s3/configs/defaultS3.xml
+++ b/tests/integration/test_storage_s3/configs/defaultS3.xml
@@ -1,9 +1,4 @@
 <clickhouse>
-    <profiles>
-        <default>
-            <s3_retry_attempts>5</s3_retry_attempts>
-        </default>
-    </profiles>
     <s3>
         <s3_mock>
             <endpoint>http://resolver:8080</endpoint>
diff --git a/tests/integration/test_storage_s3/configs/s3_retry.xml b/tests/integration/test_storage_s3/configs/s3_retry.xml
index 727e23273cf..3171da051d0 100644
--- a/tests/integration/test_storage_s3/configs/s3_retry.xml
+++ b/tests/integration/test_storage_s3/configs/s3_retry.xml
@@ -1,7 +1,9 @@
 <clickhouse>
     <profiles>
         <default>
-            <s3_retry_attempts>5</s3_retry_attempts>
+            <enable_s3_requests_logging>1</enable_s3_requests_logging>
+            <s3_retry_attempts>10</s3_retry_attempts>
+            <s3_max_inflight_parts_for_one_file>5</s3_max_inflight_parts_for_one_file>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
index 103dd30340c..5ef781bdc9e 100644
--- a/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
+++ b/tests/integration/test_storage_s3/s3_mocks/unstable_server.py
@@ -4,6 +4,7 @@ import re
 import socket
 import struct
 import sys
+import time
 
 
 def gen_n_digit_number(n):
@@ -39,14 +40,14 @@ random.seed("Unstable server/1.0")
 
 # Generating some "random" data and append a line which contains sum of numbers in column 4.
 lines = (
-    b"".join((gen_line() for _ in range(500000)))
+    b"".join([gen_line() for _ in range(500000)])
     + f"0,0,0,{-sum_in_4_column}\n".encode()
 )
 
 
 class RequestHandler(http.server.BaseHTTPRequestHandler):
     def do_HEAD(self):
-        if self.path == "/root/test.csv":
+        if self.path == "/root/test.csv" or self.path == "/root/slow_send_test.csv":
             self.from_bytes = 0
             self.end_bytes = len(lines)
             self.size = self.end_bytes
@@ -101,6 +102,18 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
                     print("Dropping connection")
                     break
 
+        if self.path == "/root/slow_send_test.csv":
+            self.send_block_size = 81920
+
+            for c, i in enumerate(
+                range(self.from_bytes, self.end_bytes, self.send_block_size)
+            ):
+                self.wfile.write(
+                    lines[i : min(i + self.send_block_size, self.end_bytes)]
+                )
+                self.wfile.flush()
+                time.sleep(1)
+
         elif self.path == "/":
             self.wfile.write(b"OK")
 
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 3dd3c9e39d0..835c8b908f0 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -818,6 +818,15 @@ def test_storage_s3_get_unstable(started_cluster):
     assert result.splitlines() == ["500001,500000,0"]
 
 
+def test_storage_s3_get_slow(started_cluster):
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["dummy"]
+    table_format = "column1 Int64, column2 Int64, column3 Int64, column4 Int64"
+    get_query = f"SELECT count(), sum(column3), sum(column4) FROM s3('http://resolver:8081/{started_cluster.minio_bucket}/slow_send_test.csv', 'CSV', '{table_format}') FORMAT CSV"
+    result = run_query(instance, get_query)
+    assert result.splitlines() == ["500001,500000,0"]
+
+
 def test_storage_s3_put_uncompressed(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]

From 4c7daf51675fda3f1ef02481b25ba9f572132f58 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Mon, 20 Nov 2023 14:55:23 +0100
Subject: [PATCH 0799/1097] Fix

---
 tests/integration/test_storage_s3_queue/test.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py
index ec27b732634..b1163a549b1 100644
--- a/tests/integration/test_storage_s3_queue/test.py
+++ b/tests/integration/test_storage_s3_queue/test.py
@@ -717,6 +717,8 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
     keeper_path = f"/clickhouse/test_{table_name}"
     files_path = f"{table_name}_data"
     files_to_generate = 300
+    row_num = 50
+    total_rows = row_num * files_to_generate
 
     for instance in [node, node_2]:
         create_table(
@@ -734,7 +736,7 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
         create_mv(instance, table_name, dst_table_name)
 
     total_values = generate_random_files(
-        started_cluster, files_path, files_to_generate, row_num=50
+        started_cluster, files_path, files_to_generate, row_num=row_num
     )
 
     def get_count(node, table_name):
@@ -743,13 +745,13 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
     for _ in range(150):
         if (
             get_count(node, dst_table_name) + get_count(node_2, dst_table_name)
-        ) == files_to_generate:
+        ) == total_rows:
             break
         time.sleep(1)
 
     if (
         get_count(node, dst_table_name) + get_count(node_2, dst_table_name)
-    ) != files_to_generate:
+    ) != total_rows:
         info = node.query(
             f"SELECT * FROM system.s3queue WHERE zookeeper_path like '%{table_name}' ORDER BY file_name FORMAT Vertical"
         )
@@ -762,7 +764,7 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
         list(map(int, l.split())) for l in run_query(node_2, get_query).splitlines()
     ]
 
-    assert len(res1) + len(res2) == files_to_generate
+    assert len(res1) + len(res2) == total_rows
 
     # Checking that all engines have made progress
     assert len(res1) > 0
@@ -774,7 +776,7 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
     time.sleep(10)
     assert (
         get_count(node, dst_table_name) + get_count(node_2, dst_table_name)
-    ) == files_to_generate
+    ) == total_rows
 
 
 def test_max_set_age(started_cluster):

From a7fc8d4b997359da08cc46fbf66f8aad1de42ed9 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 20 Nov 2023 15:04:14 +0100
Subject: [PATCH 0800/1097] test_merge_tree_s3 counts errors, turn off
 s3_use_adaptive_timeouts

---
 tests/integration/test_merge_tree_s3/configs/config.d/users.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_merge_tree_s3/configs/config.d/users.xml b/tests/integration/test_merge_tree_s3/configs/config.d/users.xml
index 3daa6f06a78..79e5091b28a 100644
--- a/tests/integration/test_merge_tree_s3/configs/config.d/users.xml
+++ b/tests/integration/test_merge_tree_s3/configs/config.d/users.xml
@@ -3,6 +3,7 @@
         <default>
             <enable_s3_requests_logging>1</enable_s3_requests_logging>
             <s3_max_inflight_parts_for_one_file>20</s3_max_inflight_parts_for_one_file>
+            <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
         </default>
     </profiles>
 </clickhouse>

From 6da51942ebe3e01b3ff2c565e87dc97c3441fc79 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 20 Nov 2023 11:57:46 +0100
Subject: [PATCH 0801/1097] Follow up the fix from #44311

---
 tests/ci/build_check.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index 3a20ca846a1..34a5956b777 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -128,18 +128,16 @@ def check_for_success_run(
     version: ClickHouseVersion,
 ) -> None:
     # TODO: Remove after S3 artifacts
-    # the final empty argument is necessary for distinguish build and build_suffix
-    logged_prefix = "/".join((S3_BUILDS_BUCKET, s3_prefix, ""))
-    logging.info("Checking for artifacts in %s", logged_prefix)
+    logging.info("Checking for artifacts %s in bucket %s", s3_prefix, S3_BUILDS_BUCKET)
     try:
         # Performance artifacts are now part of regular build, so we're safe
         build_results = s3_helper.list_prefix(s3_prefix)
     except Exception as ex:
-        logging.info("Got exception while listing %s: %s\nRerun", logged_prefix, ex)
+        logging.info("Got exception while listing %s: %s\nRerun", s3_prefix, ex)
         return
 
     if build_results is None or len(build_results) == 0:
-        logging.info("Nothing found in %s, rerun", logged_prefix)
+        logging.info("Nothing found in %s, rerun", s3_prefix)
         return
 
     logging.info("Some build results found:\n%s", build_results)
@@ -254,7 +252,9 @@ def main():
 
     # If this is rerun, then we try to find already created artifacts and just
     # put them as github actions artifact (result)
-    check_for_success_run(s3_helper, s3_path_prefix, build_name, version)
+    # The s3_path_prefix has additional "/" in the end to prevent finding
+    # e.g. `binary_darwin_aarch64/clickhouse` for `binary_darwin`
+    check_for_success_run(s3_helper, f"{s3_path_prefix}/", build_name, version)
 
     docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME)
     image_version = docker_image.version

From 94824a01945edf76cc8a7e9af3611f97b9620ff9 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 20 Nov 2023 14:21:38 +0100
Subject: [PATCH 0802/1097] Fix shellcheck for time-trace

---
 utils/prepare-time-trace/prepare-time-trace.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utils/prepare-time-trace/prepare-time-trace.sh b/utils/prepare-time-trace/prepare-time-trace.sh
index 5f4aad4c0b9..812928e8bd8 100755
--- a/utils/prepare-time-trace/prepare-time-trace.sh
+++ b/utils/prepare-time-trace/prepare-time-trace.sh
@@ -8,7 +8,7 @@
 
 # See also https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
 
-<<///
+true<<///
 CREATE TABLE build_time_trace
 (
     -- Extra columns:
@@ -44,7 +44,7 @@ ORDER BY (date, file, name, args_name);
 INPUT_DIR=$1
 OUTPUT_DIR=$2
 
-find "$INPUT_DIR" -name '*.json' -or -name '*.time-trace' | grep -P '\.(c|cpp|cc|cxx)\.json|\.time-trace$' | xargs -P $(nproc) -I{} bash -c "
+find "$INPUT_DIR" -name '*.json' -or -name '*.time-trace' | grep -P '\.(c|cpp|cc|cxx)\.json|\.time-trace$' | xargs -P "$(nproc)" -I{} bash -c "
 
     ORIGINAL_FILENAME=\$(echo '{}' | sed -r -e 's!\.(json|time-trace)\$!!; s!/CMakeFiles/[^/]+\.dir!!')
     LIBRARY_NAME=\$(echo '{}' | sed -r -e 's!^.*/CMakeFiles/([^/]+)\.dir/.*\$!\1!')
@@ -59,7 +59,7 @@ find "$INPUT_DIR" -name '*.json' -or -name '*.time-trace' | grep -P '\.(c|cpp|cc
 
 # Additionally, collect information about the sizes of translation units
 
-<<///
+true<<///
 CREATE TABLE binary_sizes
 (
     -- Extra columns:

From 362c15d9eaf1efcacd5e15706ff2fee2a289a28c Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 20 Nov 2023 14:22:02 +0100
Subject: [PATCH 0803/1097] Fix logging for profile JSON files

---
 tests/ci/build_check.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index 34a5956b777..adbd67bd95a 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -426,7 +426,9 @@ FORMAT JSONCompactEachRow"""
         url = f"https://{ci_logs_credentials.host}/"
         profiles_dir = temp_path / "profiles_source"
         profiles_dir.mkdir(parents=True, exist_ok=True)
-        logging.info("Processing profile JSON files from {GIT_REPO_ROOT}/build_docker")
+        logging.info(
+            "Processing profile JSON files from %s", repo_path / "build_docker"
+        )
         git_runner(
             "./utils/prepare-time-trace/prepare-time-trace.sh "
             f"build_docker {profiles_dir.absolute()}"

From 8fef863b95a6812097355558c57d04231624c030 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 20 Nov 2023 21:46:32 +0800
Subject: [PATCH 0804/1097] Fix flaky and slow tests.

---
 .../01710_projection_aggregation_in_order.sql        |  6 ++----
 .../0_stateless/02516_projections_with_rollup.sql    | 12 +++++-------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql
index e4fb1816c89..06f192adb57 100644
--- a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql
+++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql
@@ -1,5 +1,3 @@
--- Tags: disabled
--- FIXME https://github.com/ClickHouse/ClickHouse/issues/49552
 -- Test that check the correctness of the result for optimize_aggregation_in_order and projections,
 -- not that this optimization will take place.
 
@@ -20,7 +18,7 @@ CREATE TABLE normal
     )
 )
 ENGINE = MergeTree
-ORDER BY (key, ts);
+ORDER BY tuple();
 
 INSERT INTO normal SELECT
     number,
@@ -52,7 +50,7 @@ CREATE TABLE agg
     )
 )
 ENGINE = MergeTree
-ORDER BY (key, ts);
+ORDER BY tuple();
 
 INSERT INTO agg SELECT
     1,
diff --git a/tests/queries/0_stateless/02516_projections_with_rollup.sql b/tests/queries/0_stateless/02516_projections_with_rollup.sql
index 038caf59264..a87621073af 100644
--- a/tests/queries/0_stateless/02516_projections_with_rollup.sql
+++ b/tests/queries/0_stateless/02516_projections_with_rollup.sql
@@ -1,6 +1,3 @@
--- Tags: disabled
--- FIXME https://github.com/ClickHouse/ClickHouse/issues/49552
-
 DROP TABLE IF EXISTS video_log;
 DROP TABLE IF EXISTS video_log_result__fuzz_0;
 DROP TABLE IF EXISTS rng;
@@ -16,7 +13,8 @@ CREATE TABLE video_log
 )
 ENGINE = MergeTree
 PARTITION BY toDate(datetime)
-ORDER BY (user_id, device_id);
+ORDER BY (user_id, device_id)
+SETTINGS index_granularity_bytes=10485760, index_granularity=8192;
 
 CREATE TABLE video_log_result__fuzz_0
 (
@@ -62,7 +60,7 @@ LIMIT 10;
 ALTER TABLE video_log
     ADD PROJECTION p_norm
     (
-        SELECT 
+        SELECT
             datetime,
             device_id,
             bytes,
@@ -77,12 +75,12 @@ SETTINGS mutations_sync = 1;
 ALTER TABLE video_log
     ADD PROJECTION p_agg
     (
-        SELECT 
+        SELECT
             toStartOfHour(datetime) AS hour,
             domain,
             sum(bytes),
             avg(duration)
-        GROUP BY 
+        GROUP BY
             hour,
             domain
     );

From 6fa67cf95dff4f3f582d2801b38bc4ea598b94b7 Mon Sep 17 00:00:00 2001
From: edef <edef@edef.eu>
Date: Mon, 20 Nov 2023 14:59:28 +0000
Subject: [PATCH 0805/1097] Test reading arrow::LargeListArray

---
 .../02911_arrow_large_list.reference          |  4 +++
 .../0_stateless/02911_arrow_large_list.sh     | 31 +++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 tests/queries/0_stateless/02911_arrow_large_list.reference
 create mode 100755 tests/queries/0_stateless/02911_arrow_large_list.sh

diff --git a/tests/queries/0_stateless/02911_arrow_large_list.reference b/tests/queries/0_stateless/02911_arrow_large_list.reference
new file mode 100644
index 00000000000..a6fbcce8c06
--- /dev/null
+++ b/tests/queries/0_stateless/02911_arrow_large_list.reference
@@ -0,0 +1,4 @@
+a
+Array(Nullable(String))
+['00000','00001','00002']
+['10000','10001','10002']
diff --git a/tests/queries/0_stateless/02911_arrow_large_list.sh b/tests/queries/0_stateless/02911_arrow_large_list.sh
new file mode 100755
index 00000000000..9b1c9a9d0ed
--- /dev/null
+++ b/tests/queries/0_stateless/02911_arrow_large_list.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# ## generate arrow file with python
+# import pyarrow as pa
+# schema = pa.schema([ pa.field('a', pa.large_list(pa.utf8())) ])
+# a = pa.array([["00000", "00001", "00002"], ["10000", "10001", "10002"]])
+# with pa.OSFile('arraydata.arrow', 'wb') as sink:
+#    with pa.ipc.new_file(sink, schema=schema) as writer:
+#        batch = pa.record_batch([a], schema=schema)
+#        writer.write(batch)
+
+# cat arraydata.arrow | base64
+
+cat <<EOF | base64 --decode |  $CLICKHOUSE_LOCAL --query='SELECT * FROM table FORMAT TSVWithNamesAndTypes' --input-format=Arrow
+QVJST1cxAAD/////mAAAABAAAAAAAAoADAAGAAUACAAKAAAAAAEEAAwAAAAIAAgAAAAEAAgAAAAE
+AAAAAQAAAAQAAADY////AAABFRQAAAAYAAAABAAAAAEAAAAgAAAAAQAAAGEAAADI////EAAUAAgA
+BgAHAAwAAAAQABAAAAAAAAEFEAAAABwAAAAEAAAAAAAAAAQAAABpdGVtAAAAAAQABAAEAAAA////
+/8gAAAAUAAAAAAAAAAwAFgAGAAUACAAMAAwAAAAAAwQAGAAAAFgAAAAAAAAAAAAKABgADAAEAAgA
+CgAAAGwAAAAQAAAAAgAAAAAAAAAAAAAABQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABgAAAAA
+AAAAGAAAAAAAAAAAAAAAAAAAABgAAAAAAAAAHAAAAAAAAAA4AAAAAAAAAB4AAAAAAAAAAAAAAAIA
+AAACAAAAAAAAAAAAAAAAAAAABgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAGAAAAAAAA
+AAAAAAAFAAAACgAAAA8AAAAUAAAAGQAAAB4AAAAAAAAAMDAwMDAwMDAwMTAwMDAyMTAwMDAxMDAw
+MTEwMDAyAAD/////AAAAABAAAAAMABQABgAIAAwAEAAMAAAAAAAEADwAAAAoAAAABAAAAAEAAACo
+AAAAAAAAANAAAAAAAAAAWAAAAAAAAAAAAAAAAAAAAAAAAAAIAAgAAAAEAAgAAAAEAAAAAQAAAAQA
+AADY////AAABFRQAAAAYAAAABAAAAAEAAAAgAAAAAQAAAGEAAADI////EAAUAAgABgAHAAwAAAAQ
+ABAAAAAAAAEFEAAAABwAAAAEAAAAAAAAAAQAAABpdGVtAAAAAAQABAAEAAAAyAAAAEFSUk9XMQ==
+EOF

From 8bff421c279e38ecd6bc5a77dd7933398fe2140e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 20 Nov 2023 16:04:21 +0100
Subject: [PATCH 0806/1097] Skip test in fast tests

---
 tests/queries/0_stateless/02918_sqlite_path_check.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02918_sqlite_path_check.sh b/tests/queries/0_stateless/02918_sqlite_path_check.sh
index 1f250387a71..798efda6ec1 100755
--- a/tests/queries/0_stateless/02918_sqlite_path_check.sh
+++ b/tests/queries/0_stateless/02918_sqlite_path_check.sh
@@ -1,4 +1,6 @@
 #!/usr/bin/env bash
+# Tags: no-fasttest
+# Tag no-fasttest: Fast tests don't build external libraries (SQLite)
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 59c251bf88628951b2da442780ac91b19e9a3600 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 20 Nov 2023 16:17:31 +0100
Subject: [PATCH 0807/1097] Allow to use concat with a single argument

---
 src/Functions/concat.cpp                         | 6 ++++--
 tests/queries/0_stateless/00727_concat.reference | 2 ++
 tests/queries/0_stateless/00727_concat.sql       | 3 ++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp
index f426f662868..6403c4b8416 100644
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@@ -207,6 +207,8 @@ public:
 
     FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
     {
+        if (arguments.size() == 1)
+            return FunctionFactory::instance().getImpl("toString", context)->build(arguments);
         if (std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); }))
             return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
         if (std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); }))
@@ -221,10 +223,10 @@ public:
 
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
-        if (arguments.size() < 2)
+        if (arguments.empty())
             throw Exception(
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Number of arguments for function {} doesn't match: passed {}, should be at least 2.",
+                "Number of arguments for function {} doesn't match: passed {}, should be at least 1.",
                 getName(),
                 arguments.size());
 
diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference
index 1e102051fd0..9659405ea3b 100644
--- a/tests/queries/0_stateless/00727_concat.reference
+++ b/tests/queries/0_stateless/00727_concat.reference
@@ -64,4 +64,6 @@ Three arguments test
 42144255
 42144
 42144255
+42
+foo
 Testing the alias
diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql
index edeaf9340dd..94cdc83d51f 100644
--- a/tests/queries/0_stateless/00727_concat.sql
+++ b/tests/queries/0_stateless/00727_concat.sql
@@ -82,8 +82,9 @@ SELECT concat(materialize(42 :: Int32), materialize(144 :: UInt64));
 SELECT concat(materialize(42 :: Int32), materialize(144 :: UInt64), materialize(255 :: UInt32));
 SELECT concat(42, 144);
 SELECT concat(42, 144, 255);
+SELECT concat(42);
+SELECT concat('foo');
 
 SELECT CONCAT('Testing the ', 'alias');
 
 SELECT concat();  -- { serverError 42 }
-SELECT concat(1); -- { serverError 42 }

From 9f96b5897979effb67ff9b1eb233622494bb5e1c Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 20 Nov 2023 16:22:47 +0100
Subject: [PATCH 0808/1097] Update docs, add more tests.

---
 docs/en/sql-reference/functions/string-functions.md | 2 +-
 tests/queries/0_stateless/00727_concat.reference    | 3 +++
 tests/queries/0_stateless/00727_concat.sql          | 4 ++++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 4b6e0356301..1940993ce0b 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -439,7 +439,7 @@ concat(s1, s2, ...)
 
 **Arguments**
 
-At least two values of arbitrary type.
+At least one value of arbitrary type.
 
 Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
 
diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference
index 9659405ea3b..0ce6ac247c5 100644
--- a/tests/queries/0_stateless/00727_concat.reference
+++ b/tests/queries/0_stateless/00727_concat.reference
@@ -64,6 +64,9 @@ Three arguments test
 42144255
 42144
 42144255
+-- Single argument tests
 42
 foo
+\N
+\N
 Testing the alias
diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql
index 94cdc83d51f..2d0e6fe6a1f 100644
--- a/tests/queries/0_stateless/00727_concat.sql
+++ b/tests/queries/0_stateless/00727_concat.sql
@@ -82,8 +82,12 @@ SELECT concat(materialize(42 :: Int32), materialize(144 :: UInt64));
 SELECT concat(materialize(42 :: Int32), materialize(144 :: UInt64), materialize(255 :: UInt32));
 SELECT concat(42, 144);
 SELECT concat(42, 144, 255);
+
+SELECT '-- Single argument tests';
 SELECT concat(42);
 SELECT concat('foo');
+SELECT concat(NULL);
+SELECT concat(materialize(NULL :: Nullable(UInt64)));
 
 SELECT CONCAT('Testing the ', 'alias');
 

From 9b5e180995c39b4f6c6549a500d0a05f749c4257 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 20 Nov 2023 16:27:02 +0100
Subject: [PATCH 0809/1097] Fix

---
 src/Storages/S3Queue/StorageS3Queue.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 72e74d3c2a0..5d1e69f8b15 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -118,7 +118,11 @@ StorageS3Queue::StorageS3Queue(
     , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms)
     , log(&Poco::Logger::get("StorageS3Queue (" + table_id_.table_name + ")"))
 {
-    if (configuration.url.key.ends_with('/'))
+    if (configuration.url.key.empty())
+    {
+        configuration.url.key = "/*";
+    }
+    else if (configuration.url.key.ends_with('/'))
     {
         configuration.url.key += '*';
     }

From 23ea802bd7fe333ab72265d9bb2cac49b83e2495 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 20 Nov 2023 16:28:04 +0100
Subject: [PATCH 0810/1097] Add more concat tests

---
 tests/queries/0_stateless/00727_concat.reference | 2 ++
 tests/queries/0_stateless/00727_concat.sql       | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference
index 0ce6ac247c5..6fb23c072d3 100644
--- a/tests/queries/0_stateless/00727_concat.reference
+++ b/tests/queries/0_stateless/00727_concat.reference
@@ -66,6 +66,8 @@ Three arguments test
 42144255
 -- Single argument tests
 42
+42
+foo
 foo
 \N
 \N
diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql
index 2d0e6fe6a1f..f5048dcaaae 100644
--- a/tests/queries/0_stateless/00727_concat.sql
+++ b/tests/queries/0_stateless/00727_concat.sql
@@ -85,7 +85,9 @@ SELECT concat(42, 144, 255);
 
 SELECT '-- Single argument tests';
 SELECT concat(42);
+SELECT concat(materialize(42));
 SELECT concat('foo');
+SELECT concat(materialize('foo'));
 SELECT concat(NULL);
 SELECT concat(materialize(NULL :: Nullable(UInt64)));
 

From be0f0b68795273c0fabe336e1b8055a057dc15b1 Mon Sep 17 00:00:00 2001
From: Zhiguo Zhou <zhiguo.zhou@intel.com>
Date: Mon, 20 Nov 2023 23:34:52 +0800
Subject: [PATCH 0811/1097] Fix code style

---
 .../MergeTree/tests/gtest_combine_filters.cpp | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
index 91e9e53c230..84ddec04c24 100644
--- a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
+++ b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
@@ -37,17 +37,26 @@ bool testCombineFilters(size_t size)
 
     auto result = combineFilters(first_filter, second_filter);
 
-    if (result->size() != size) return false;
+    if (result->size() != size)
+    {
+        return false;
+    }
 
     for (size_t i = 0; i < size; i++)
     {
         if (i % 4 == 0)
         {
-            if (result->get64(i) != 1) return false;
+            if (result->get64(i) != 1)
+            {
+                return false;
+            }
         }
         else
         {
-            if (result->get64(i) != 0) return false;
+            if (result->get64(i) != 0)
+            {
+                return false;
+            }
         }
     }
 
@@ -72,7 +81,7 @@ bool testCombineColumns(size_t size)
         auto & column_data = column->getData();
 
         non_zero_count = 0;
-        for (size_t i = 0; i < len; non_zero_count++, i+=non_zero_count)
+        for (size_t i = 0; i < len; non_zero_count++, i += non_zero_count)
         {
             column_data[i] = 1;
         }
@@ -119,7 +128,10 @@ bool testCombineColumns(size_t size)
         }
         else
         {
-            if (result_data[i] != 0) return false;
+            if (result_data[i] != 0)
+            {
+                return false;
+            }
         }
     }
 

From d84d5692ef3d78195559ae9988da61ea72026e00 Mon Sep 17 00:00:00 2001
From: Kevin Mingtarja <kevin.mingtarja@u.nus.edu>
Date: Mon, 20 Nov 2023 23:52:22 +0800
Subject: [PATCH 0812/1097] Enable implicit constraint for VersionedCollapsing

---
 .../MergeTree/registerStorageMergeTree.cpp    |  4 ++-
 ...onstraints_for_collapsing_engine.reference |  2 ++
 ...olumn_constraints_for_collapsing_engine.sh | 32 +++++++++++++++++--
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 9285dfcdd91..ce394d682fc 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -620,7 +620,9 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         if (args.query.columns_list && args.query.columns_list->constraints)
             for (auto & constraint : args.query.columns_list->constraints->children)
                 constraints.push_back(constraint);
-        if (merging_params.mode == MergeTreeData::MergingParams::Collapsing && storage_settings->add_implicit_sign_column_constraint_for_collapsing_engine)
+        if ((merging_params.mode == MergeTreeData::MergingParams::Collapsing || 
+            merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) && 
+            storage_settings->add_implicit_sign_column_constraint_for_collapsing_engine)
         {
             auto sign_column_check_constraint = std::make_unique<ASTConstraintDeclaration>();
             sign_column_check_constraint->name = "check_sign_column";
diff --git a/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference
index 5c6c001014d..87fb3b0e8c4 100644
--- a/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference
+++ b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference
@@ -1,2 +1,4 @@
 1	2504	1
 ok
+1	200	1	1
+ok
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh
index bee12afc511..43594a45a1e 100755
--- a/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh
+++ b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.sh
@@ -4,13 +4,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+EXCEPTION_TEXT="VIOLATED_CONSTRAINT"
 EXCEPTION_SUCCESS_TEXT=ok
-$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS collapsing_merge_tree;"
 
 # CollapsingSortedAlgorithm::merge() also has a check for sign column value
 # optimize_on_insert = 0 is required to avoid this automatic merge behavior
-$CLICKHOUSE_CLIENT --query="SET optimize_on_insert = 0;"
+$CLICKHOUSE_CLIENT --query="SET optimize_on_insert=0;"
 
+
+# CollapsingMergeTree
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS collapsing_merge_tree;"
 $CLICKHOUSE_CLIENT --query="CREATE TABLE collapsing_merge_tree
 (
     Key UInt32, 
@@ -26,6 +29,29 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM collapsing_merge_tree;"
 
 # Should throw an exception
 $CLICKHOUSE_CLIENT --query="INSERT INTO collapsing_merge_tree VALUES (1, 2504, 5);" 2>&1 \
-    | grep -q VIOLATED_CONSTRAINT && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not throw an exception"
+    | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not throw an exception"
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE collapsing_merge_tree;"
+
+
+# VersionedCollapsingMergeTree
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS versioned_collapsing_merge_tree;"
+$CLICKHOUSE_CLIENT --query="CREATE TABLE versioned_collapsing_merge_tree
+(
+    Key UInt32,
+    Count UInt8,
+    Sign Int8,
+    Version UInt8
+)
+ENGINE=VersionedCollapsingMergeTree(Sign, Version) ORDER BY Key 
+SETTINGS add_implicit_sign_column_constraint_for_collapsing_engine=1;"
+
+# Should succeed
+$CLICKHOUSE_CLIENT --query="INSERT INTO versioned_collapsing_merge_tree VALUES (1, 2504, 1, 1);"
+$CLICKHOUSE_CLIENT --query="SELECT * FROM versioned_collapsing_merge_tree;"
+
+# Should throw an exception
+$CLICKHOUSE_CLIENT --query="INSERT INTO versioned_collapsing_merge_tree VALUES (1, 2504, 5, 1);" 2>&1 \
+    | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" || echo "Did not throw an exception"
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE versioned_collapsing_merge_tree;"

From 1d668264e11eafa184dd16a927a1897b2297dfd0 Mon Sep 17 00:00:00 2001
From: Kevin Mingtarja <kevin.mingtarja@u.nus.edu>
Date: Mon, 20 Nov 2023 23:54:36 +0800
Subject: [PATCH 0813/1097] Fix style

---
 src/Storages/MergeTree/registerStorageMergeTree.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index ce394d682fc..3310b8cb72d 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -620,8 +620,8 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         if (args.query.columns_list && args.query.columns_list->constraints)
             for (auto & constraint : args.query.columns_list->constraints->children)
                 constraints.push_back(constraint);
-        if ((merging_params.mode == MergeTreeData::MergingParams::Collapsing || 
-            merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) && 
+        if ((merging_params.mode == MergeTreeData::MergingParams::Collapsing ||
+            merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) &&
             storage_settings->add_implicit_sign_column_constraint_for_collapsing_engine)
         {
             auto sign_column_check_constraint = std::make_unique<ASTConstraintDeclaration>();

From 38133692bb3fb997c6a960c6a460b49748683f98 Mon Sep 17 00:00:00 2001
From: Aleksandr Musorin <aleksandr.musorin@semrush.com>
Date: Fri, 10 Nov 2023 14:40:55 +0100
Subject: [PATCH 0814/1097] Add total_bytes for RocksDB storage

Show total_bytes in system tables for RocksDB storage

Previously it did not work because total_rows was calculated only when optimize_trivial_approximate_count_query=1
---
 src/Core/Settings.h                           |  2 +-
 .../RocksDB/StorageEmbeddedRocksDB.cpp        | 31 ++++++++++++-------
 src/Storages/RocksDB/StorageEmbeddedRocksDB.h |  3 ++
 .../02892_rocksdb_trivial_count.reference     |  5 +++
 .../02892_rocksdb_trivial_count.sql           |  4 ++-
 5 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 951c5dac663..4ef651c3708 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -545,7 +545,7 @@ class IColumn;
     M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
     M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
     M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
-    M(Bool, optimize_trivial_approximate_count_query, false, "Use an approximate value for trivial count optimization of storages that support such estimations.", 0) \
+    M(Bool, optimize_trivial_approximate_count_query, true, "Use an approximate value for trivial count optimization of storages that support such estimations.", 0) \
     M(Bool, optimize_count_from_files, true, "Optimize counting rows from files in supported input formats", 0) \
     M(Bool, use_cache_for_count_from_files, true, "Use cache to count the number of rows in files", 0) \
     M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 42519c84f35..fd563ccf62b 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -703,17 +703,26 @@ void registerStorageEmbeddedRocksDB(StorageFactory & factory)
 
 std::optional<UInt64> StorageEmbeddedRocksDB::totalRows(const Settings & settings) const
 {
-    if (settings.optimize_trivial_approximate_count_query)
-    {
-        std::shared_lock lock(rocksdb_ptr_mx);
-        if (!rocksdb_ptr)
-            return {};
-        UInt64 estimated_rows;
-        if (!rocksdb_ptr->GetIntProperty("rocksdb.estimate-num-keys", &estimated_rows))
-            return {};
-        return estimated_rows;
-    }
-    return {};
+    if (!settings.optimize_trivial_approximate_count_query)
+        return {};
+    std::shared_lock lock(rocksdb_ptr_mx);
+    if (!rocksdb_ptr)
+        return {};
+    UInt64 estimated_rows;
+    if (!rocksdb_ptr->GetIntProperty("rocksdb.estimate-num-keys", &estimated_rows))
+        return {};
+    return estimated_rows;
+}
+
+std::optional<UInt64> StorageEmbeddedRocksDB::totalBytes(const Settings & /*settings*/) const
+{
+    std::shared_lock lock(rocksdb_ptr_mx);
+    if (!rocksdb_ptr)
+        return {};
+    UInt64 estimated_bytes;
+    if (!rocksdb_ptr->GetIntProperty("rocksdb.estimate-live-data-size", &estimated_bytes))
+        return {};
+    return estimated_bytes;
 }
 
 }
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
index 11eba607c3a..4f987e1308a 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
@@ -5,6 +5,7 @@
 #include <Storages/IStorage.h>
 #include <Interpreters/IKeyValueEntity.h>
 #include <rocksdb/status.h>
+#include <Storages/RocksDB/EmbeddedRocksDBSink.h>
 
 
 namespace rocksdb
@@ -89,6 +90,8 @@ public:
 
     std::optional<UInt64> totalRows(const Settings & settings) const override;
 
+    std::optional<UInt64> totalBytes(const Settings & settings) const override;
+
 private:
     const String primary_key;
     using RocksDBPtr = std::unique_ptr<rocksdb::DB>;
diff --git a/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference b/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference
index 9289ddcee34..7f298ea71ad 100644
--- a/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference
+++ b/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference
@@ -1 +1,6 @@
+-- { echoOn }
+SELECT count() FROM dict SETTINGS optimize_trivial_approximate_count_query = 0, max_rows_to_read = 1; -- { serverError TOO_MANY_ROWS }
+SELECT count() FROM dict SETTINGS max_rows_to_read = 1;
+121
+SELECT total_rows FROM system.tables WHERE database = currentDatabase() AND name = 'dict';
 121
diff --git a/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql b/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql
index 0cdf2d1b2b2..b5780954d74 100644
--- a/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql
+++ b/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql
@@ -2,5 +2,7 @@
 
 CREATE TABLE dict (key UInt64, value String) ENGINE = EmbeddedRocksDB PRIMARY KEY key;
 INSERT INTO dict SELECT number, toString(number) FROM numbers(121);
+-- { echoOn }
 SELECT count() FROM dict SETTINGS optimize_trivial_approximate_count_query = 0, max_rows_to_read = 1; -- { serverError TOO_MANY_ROWS }
-SELECT count() FROM dict SETTINGS optimize_trivial_approximate_count_query = 1, max_rows_to_read = 1;
+SELECT count() FROM dict SETTINGS max_rows_to_read = 1;
+SELECT total_rows FROM system.tables WHERE database = currentDatabase() AND name = 'dict';

From d5a4580236b98bbcf1ecfed4d61b6b32c8de34ec Mon Sep 17 00:00:00 2001
From: Kevin Mingtarja <kevin.mingtarja@u.nus.edu>
Date: Tue, 21 Nov 2023 00:13:53 +0800
Subject: [PATCH 0815/1097] Add newline in test reference

---
 ...icit_sign_column_constraints_for_collapsing_engine.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference
index 87fb3b0e8c4..323b12c173a 100644
--- a/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference
+++ b/tests/queries/0_stateless/02918_implicit_sign_column_constraints_for_collapsing_engine.reference
@@ -1,4 +1,4 @@
 1	2504	1
 ok
 1	200	1	1
-ok
\ No newline at end of file
+ok

From 6d14350886655c8730c77547773e73499ce06883 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 20 Nov 2023 17:26:31 +0100
Subject: [PATCH 0816/1097] Allow to use any integer types with
 fromDaysSinceYearZero

---
 src/Functions/fromDaysSinceYearZero.cpp       | 29 ++++++---------
 .../02907_fromDaysSinceYearZero.reference     | 32 ++++++++--------
 .../02907_fromDaysSinceYearZero.sql           | 37 +++++++++----------
 3 files changed, 46 insertions(+), 52 deletions(-)

diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp
index 36a05f1cbf9..4aa96fc8fbb 100644
--- a/src/Functions/fromDaysSinceYearZero.cpp
+++ b/src/Functions/fromDaysSinceYearZero.cpp
@@ -1,20 +1,14 @@
 #include <Functions/IFunction.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/DateTimeTransforms.h>
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDate32.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypeDateTime64.h>
-#include <DataTypes/DataTypesNumber.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnDecimal.h>
-#include <Columns/ColumnsDateTime.h>
-#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDate32.h>
+#include <Functions/DateTimeTransforms.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
 #include <Interpreters/castColumn.h>
 
 #include <Common/DateLUT.h>
-#include <Common/typeid_cast.h>
 
 #include <array>
 #include <cmath>
@@ -23,7 +17,8 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int ARGUMENT_OUT_OF_BOUND;
 }
 
 namespace
@@ -44,7 +39,6 @@ struct DateTraits32
 template <typename Traits>
 class FunctionFromDaysSinceYearZero : public IFunction
 {
-
 public:
     static constexpr auto name = Traits::name;
     using RawReturnType = typename Traits::ReturnDataType::FieldType;
@@ -58,9 +52,7 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        FunctionArgumentDescriptors args{
-            {"days", &isNativeUInt<IDataType>, nullptr, "UInt*"}
-        };
+        FunctionArgumentDescriptors args{{"days", &isNativeInteger<IDataType>, nullptr, "Integer"}};
 
         validateFunctionArgumentTypes(*this, arguments, args);
 
@@ -84,7 +76,8 @@ public:
             return false;
         };
 
-        const bool success = try_type(UInt8{}) || try_type(UInt16{}) || try_type(UInt32{}) || try_type(UInt64{});
+        const bool success = try_type(UInt8{}) || try_type(UInt16{}) || try_type(UInt32{}) || try_type(UInt64{}) || try_type(Int8{})
+            || try_type(Int16{}) || try_type(Int32{}) || try_type(Int64{});
 
         if (!success)
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column while execute function {}", getName());
@@ -104,6 +97,8 @@ public:
         for (size_t i = 0; i < rows_count; ++i)
         {
             auto raw_value = src_data[i];
+            if (raw_value < 0)
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Expected a non-negative integer, got: {}", std::to_string(raw_value));
             auto value = static_cast<equivalent_integer>(raw_value);
             dst_data[i] = static_cast<RawReturnType>(value - ToDaysSinceYearZeroImpl::DAYS_BETWEEN_YEARS_0_AND_1970);
         }
diff --git a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
index ac0f4662db2..d6f19af007e 100644
--- a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
+++ b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
@@ -1,21 +1,21 @@
 -- negative tests
 -- const and non-const arguments
-719527	2149-06-06	2149-06-06
-719528	1970-01-01	1970-01-01
-719529	1970-01-02	1970-01-02
-785062	2149-06-05	2149-06-05
-785063	2149-06-06	2149-06-06
-785064	1970-01-01	1970-01-01
-693960	2299-12-31	2299-12-31
-693961	1900-01-01	1900-01-01
-693962	1900-01-02	1900-01-02
-840056	2299-12-30	2299-12-30
-840057	2299-12-31	2299-12-31
-840058	2299-12-31	2299-12-31
--- integer types != UInt32
-255	1974-06-12	2299-12-31
-65535	1973-09-29	2299-12-31
-719529	1970-01-02	1970-01-02
+719527	719527	2149-06-06	2149-06-06	2149-06-06	2149-06-06
+719528	719528	1970-01-01	1970-01-01	1970-01-01	1970-01-01
+719529	719529	1970-01-02	1970-01-02	1970-01-02	1970-01-02
+785062	785062	2149-06-05	2149-06-05	2149-06-05	2149-06-05
+785063	785063	2149-06-06	2149-06-06	2149-06-06	2149-06-06
+785064	785064	1970-01-01	1970-01-01	1970-01-01	1970-01-01
+693960	693960	2299-12-31	2299-12-31	2299-12-31	2299-12-31
+693961	693961	1900-01-01	1900-01-01	1900-01-01	1900-01-01
+693962	693962	1900-01-02	1900-01-02	1900-01-02	1900-01-02
+840056	840056	2299-12-30	2299-12-30	2299-12-30	2299-12-30
+840057	840057	2299-12-31	2299-12-31	2299-12-31	2299-12-31
+840058	840058	2299-12-31	2299-12-31	2299-12-31	2299-12-31
+-- integer types != (U)Int32
+255	127	1974-06-12	2299-12-31	1974-02-04	2299-12-31
+65535	32767	1973-09-29	2299-12-31	2063-06-17	2299-12-31
+719529	719529	1970-01-02	1970-01-02	1970-01-02	1970-01-02
 -- NULL handling
 \N	\N
 -- Alias
diff --git a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
index 83cfa01d5ed..22838a008be 100644
--- a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
+++ b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
@@ -7,29 +7,28 @@ SELECT fromDaysSinceYearZero(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_
 SELECT fromDaysSinceYearZero32(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT fromDaysSinceYearZero('needs a number'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT fromDaysSinceYearZero32('needs a number'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT fromDaysSinceYearZero(-3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT fromDaysSinceYearZero32(-3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT fromDaysSinceYearZero(-3); -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT fromDaysSinceYearZero32(-3); -- { serverError ARGUMENT_OUT_OF_BOUND }
 
 SELECT '-- const and non-const arguments';
+SELECT 719527 AS x, toInt32(719527) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y)); -- outside Date's range
+SELECT 719528 AS x, toInt32(719528) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
+SELECT 719529 AS x, toInt32(719529) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
+SELECT 785062 AS x, toInt32(785062) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
+SELECT 785063 AS x, toInt32(785063) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
+SELECT 785064 AS x, toInt32(785064) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y)); -- outside Date's range
 
-SELECT 719527 AS x, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)); -- outside Date's range
-SELECT 719528 AS x, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x));
-SELECT 719529 AS x, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x));
-SELECT 785062 AS x, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x));
-SELECT 785063 AS x, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x));
-SELECT 785064 AS x, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)); -- outside Date's range
+SELECT 693960 AS x, toInt32(693960) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y)); -- outside Date32's range
+SELECT 693961 AS x, toInt32(693961) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
+SELECT 693962 AS x, toInt32(693962) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
+SELECT 840056 AS x, toInt32(840056) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
+SELECT 840057 AS x, toInt32(840057) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
+SELECT 840058 AS x, toInt32(840058) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y)); -- outside Date32's range
 
-SELECT 693960 AS x, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)); -- outside Date32's range
-SELECT 693961 AS x, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x));
-SELECT 693962 AS x, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x));
-SELECT 840056 AS x, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x));
-SELECT 840057 AS x, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x));
-SELECT 840058 AS x, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)); -- outside Date32's range
-
-SELECT '-- integer types != UInt32';
-SELECT toUInt8(255) AS x, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x); -- outside Date's range for all UInt8-s
-SELECT toUInt16(65535) AS x, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x); -- outside Date's range for all UInt16-s
-SELECT toUInt64(719529) AS x, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x); -- something useful
+SELECT '-- integer types != (U)Int32';
+SELECT toUInt8(255) AS x,  toInt8(127) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x), fromDaysSinceYearZero(y), fromDaysSinceYearZero32(y); -- outside Date's range for all (U)Int8-s
+SELECT toUInt16(65535) AS x, toInt16(32767) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x), fromDaysSinceYearZero(y), fromDaysSinceYearZero32(y); -- outside Date's range for all (U)Int16-s
+SELECT toUInt64(719529) AS x, toInt64(719529) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x), fromDaysSinceYearZero(y), fromDaysSinceYearZero32(y); -- something useful
 
 SELECT '-- NULL handling';
 SELECT fromDaysSinceYearZero(NULL), fromDaysSinceYearZero32(NULL);

From cd4e674aa3a09e898cda275cced5c66b05cd747c Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 20 Nov 2023 17:31:47 +0100
Subject: [PATCH 0817/1097] Simplify fromDaysSinceYearZero tests a bit

---
 .../02907_fromDaysSinceYearZero.sql           | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
index 22838a008be..053cc8563c6 100644
--- a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
+++ b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
@@ -11,19 +11,19 @@ SELECT fromDaysSinceYearZero(-3); -- { serverError ARGUMENT_OUT_OF_BOUND }
 SELECT fromDaysSinceYearZero32(-3); -- { serverError ARGUMENT_OUT_OF_BOUND }
 
 SELECT '-- const and non-const arguments';
-SELECT 719527 AS x, toInt32(719527) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y)); -- outside Date's range
-SELECT 719528 AS x, toInt32(719528) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
-SELECT 719529 AS x, toInt32(719529) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
-SELECT 785062 AS x, toInt32(785062) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
-SELECT 785063 AS x, toInt32(785063) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
-SELECT 785064 AS x, toInt32(785064) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y)); -- outside Date's range
+SELECT 719527 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y)); -- outside Date's range
+SELECT 719528 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
+SELECT 719529 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
+SELECT 785062 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
+SELECT 785063 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
+SELECT 785064 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y)); -- outside Date's range
 
-SELECT 693960 AS x, toInt32(693960) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y)); -- outside Date32's range
-SELECT 693961 AS x, toInt32(693961) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
-SELECT 693962 AS x, toInt32(693962) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
-SELECT 840056 AS x, toInt32(840056) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
-SELECT 840057 AS x, toInt32(840057) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
-SELECT 840058 AS x, toInt32(840058) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y)); -- outside Date32's range
+SELECT 693960 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y)); -- outside Date32's range
+SELECT 693961 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
+SELECT 693962 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
+SELECT 840056 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
+SELECT 840057 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
+SELECT 840058 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y)); -- outside Date32's range
 
 SELECT '-- integer types != (U)Int32';
 SELECT toUInt8(255) AS x,  toInt8(127) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x), fromDaysSinceYearZero(y), fromDaysSinceYearZero32(y); -- outside Date's range for all (U)Int8-s

From e5129990ed4a593acc88bcee448b9a7382f53833 Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Mon, 20 Nov 2023 13:38:32 -0300
Subject: [PATCH 0818/1097] sign all aws headers

---
 src/IO/S3/Client.cpp                    | 50 +++++++++++++++++--------
 src/IO/S3/Client.h                      | 32 ++++++++--------
 src/IO/S3/PocoHTTPClient.cpp            | 12 +++++-
 src/IO/S3/tests/gtest_aws_s3_client.cpp | 10 ++++-
 4 files changed, 70 insertions(+), 34 deletions(-)

diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 4630e68fbb6..7302e129b4b 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -103,6 +103,20 @@ void verifyClientConfiguration(const Aws::Client::ClientConfiguration & client_c
     assert_cast<const Client::RetryStrategy &>(*client_config.retryStrategy);
 }
 
+void addAdditionalAMZHeadersToCanonicalHeadersList(
+    Aws::AmazonWebServiceRequest & request,
+    const HTTPHeaderEntries & extra_headers
+)
+{
+    for (const auto & [name, value] : extra_headers)
+    {
+        if (name.starts_with("x-amz-"))
+        {
+            request.SetAdditionalCustomHeaderValue(name, value);
+        }
+    }
+}
+
 }
 
 std::unique_ptr<Client> Client::create(
@@ -265,12 +279,14 @@ template void Client::setKMSHeaders<CreateMultipartUploadRequest>(CreateMultipar
 template void Client::setKMSHeaders<CopyObjectRequest>(CopyObjectRequest & request) const;
 template void Client::setKMSHeaders<PutObjectRequest>(PutObjectRequest & request) const;
 
-Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) const
+Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const
 {
     const auto & bucket = request.GetBucket();
 
     request.setApiMode(api_mode);
 
+    addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
+
     if (auto region = getRegionForBucket(bucket); !region.empty())
     {
         if (!detect_region)
@@ -346,36 +362,36 @@ Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) c
 /// For each request, we wrap the request functions from Aws::S3::Client with doRequest
 /// doRequest calls virtuall function from Aws::S3::Client while DB::S3::Client has not virtual calls for each request type
 
-Model::ListObjectsV2Outcome Client::ListObjectsV2(const ListObjectsV2Request & request) const
+Model::ListObjectsV2Outcome Client::ListObjectsV2(ListObjectsV2Request & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ true>(
         request, [this](const Model::ListObjectsV2Request & req) { return ListObjectsV2(req); });
 }
 
-Model::ListObjectsOutcome Client::ListObjects(const ListObjectsRequest & request) const
+Model::ListObjectsOutcome Client::ListObjects(ListObjectsRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ true>(
         request, [this](const Model::ListObjectsRequest & req) { return ListObjects(req); });
 }
 
-Model::GetObjectOutcome Client::GetObject(const GetObjectRequest & request) const
+Model::GetObjectOutcome Client::GetObject(GetObjectRequest & request) const
 {
     return doRequest(request, [this](const Model::GetObjectRequest & req) { return GetObject(req); });
 }
 
-Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(const AbortMultipartUploadRequest & request) const
+Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(AbortMultipartUploadRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
         request, [this](const Model::AbortMultipartUploadRequest & req) { return AbortMultipartUpload(req); });
 }
 
-Model::CreateMultipartUploadOutcome Client::CreateMultipartUpload(const CreateMultipartUploadRequest & request) const
+Model::CreateMultipartUploadOutcome Client::CreateMultipartUpload(CreateMultipartUploadRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
         request, [this](const Model::CreateMultipartUploadRequest & req) { return CreateMultipartUpload(req); });
 }
 
-Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const CompleteMultipartUploadRequest & request) const
+Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(CompleteMultipartUploadRequest & request) const
 {
     auto outcome = doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
         request, [this](const Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); });
@@ -422,31 +438,31 @@ Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const Comp
     return outcome;
 }
 
-Model::CopyObjectOutcome Client::CopyObject(const CopyObjectRequest & request) const
+Model::CopyObjectOutcome Client::CopyObject(CopyObjectRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
         request, [this](const Model::CopyObjectRequest & req) { return CopyObject(req); });
 }
 
-Model::PutObjectOutcome Client::PutObject(const PutObjectRequest & request) const
+Model::PutObjectOutcome Client::PutObject(PutObjectRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
         request, [this](const Model::PutObjectRequest & req) { return PutObject(req); });
 }
 
-Model::UploadPartOutcome Client::UploadPart(const UploadPartRequest & request) const
+Model::UploadPartOutcome Client::UploadPart(UploadPartRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
         request, [this](const Model::UploadPartRequest & req) { return UploadPart(req); });
 }
 
-Model::UploadPartCopyOutcome Client::UploadPartCopy(const UploadPartCopyRequest & request) const
+Model::UploadPartCopyOutcome Client::UploadPartCopy(UploadPartCopyRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
         request, [this](const Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); });
 }
 
-Model::DeleteObjectOutcome Client::DeleteObject(const DeleteObjectRequest & request) const
+Model::DeleteObjectOutcome Client::DeleteObject(DeleteObjectRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
         request, [this](const Model::DeleteObjectRequest & req) { return DeleteObject(req); });
@@ -458,7 +474,7 @@ Model::DeleteObjectsOutcome Client::DeleteObjects(const DeleteObjectsRequest & r
         request, [this](const Model::DeleteObjectsRequest & req) { return DeleteObjects(req); });
 }
 
-Client::ComposeObjectOutcome Client::ComposeObject(const ComposeObjectRequest & request) const
+Client::ComposeObjectOutcome Client::ComposeObject(ComposeObjectRequest & request) const
 {
     auto request_fn = [this](const ComposeObjectRequest & req)
     {
@@ -490,8 +506,9 @@ Client::ComposeObjectOutcome Client::ComposeObject(const ComposeObjectRequest &
 
 template <typename RequestType, typename RequestFn>
 std::invoke_result_t<RequestFn, RequestType>
-Client::doRequest(const RequestType & request, RequestFn request_fn) const
+Client::doRequest(RequestType & request, RequestFn request_fn) const
 {
+    addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
     const auto & bucket = request.GetBucket();
     request.setApiMode(api_mode);
 
@@ -568,8 +585,9 @@ Client::doRequest(const RequestType & request, RequestFn request_fn) const
 
 template <bool IsReadMethod, typename RequestType, typename RequestFn>
 std::invoke_result_t<RequestFn, RequestType>
-Client::doRequestWithRetryNetworkErrors(const RequestType & request, RequestFn request_fn) const
+Client::doRequestWithRetryNetworkErrors(RequestType & request, RequestFn request_fn) const
 {
+    addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
     auto with_retries = [this, request_fn_ = std::move(request_fn)] (const RequestType & request_)
     {
         chassert(client_configuration.retryStrategy);
@@ -666,6 +684,8 @@ std::string Client::getRegionForBucket(const std::string & bucket, bool force_de
     Aws::S3::Model::HeadBucketRequest req;
     req.SetBucket(bucket);
 
+    addAdditionalAMZHeadersToCanonicalHeadersList(req, client_configuration.extra_headers);
+
     std::string region;
     auto outcome = HeadBucket(req);
     if (outcome.IsSuccess())
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index 5ad57a9d827..cf0d53d9c72 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -177,24 +177,24 @@ public:
     template <typename RequestType>
     void setKMSHeaders(RequestType & request) const;
 
-    Model::HeadObjectOutcome HeadObject(const HeadObjectRequest & request) const;
-    Model::ListObjectsV2Outcome ListObjectsV2(const ListObjectsV2Request & request) const;
-    Model::ListObjectsOutcome ListObjects(const ListObjectsRequest & request) const;
-    Model::GetObjectOutcome GetObject(const GetObjectRequest & request) const;
+    Model::HeadObjectOutcome HeadObject(HeadObjectRequest & request) const;
+    Model::ListObjectsV2Outcome ListObjectsV2(ListObjectsV2Request & request) const;
+    Model::ListObjectsOutcome ListObjects(ListObjectsRequest & request) const;
+    Model::GetObjectOutcome GetObject(GetObjectRequest & request) const;
 
-    Model::AbortMultipartUploadOutcome AbortMultipartUpload(const AbortMultipartUploadRequest & request) const;
-    Model::CreateMultipartUploadOutcome CreateMultipartUpload(const CreateMultipartUploadRequest & request) const;
-    Model::CompleteMultipartUploadOutcome CompleteMultipartUpload(const CompleteMultipartUploadRequest & request) const;
-    Model::UploadPartOutcome UploadPart(const UploadPartRequest & request) const;
-    Model::UploadPartCopyOutcome UploadPartCopy(const UploadPartCopyRequest & request) const;
+    Model::AbortMultipartUploadOutcome AbortMultipartUpload(AbortMultipartUploadRequest & request) const;
+    Model::CreateMultipartUploadOutcome CreateMultipartUpload(CreateMultipartUploadRequest & request) const;
+    Model::CompleteMultipartUploadOutcome CompleteMultipartUpload(CompleteMultipartUploadRequest & request) const;
+    Model::UploadPartOutcome UploadPart(UploadPartRequest & request) const;
+    Model::UploadPartCopyOutcome UploadPartCopy(UploadPartCopyRequest & request) const;
 
-    Model::CopyObjectOutcome CopyObject(const CopyObjectRequest & request) const;
-    Model::PutObjectOutcome PutObject(const PutObjectRequest & request) const;
-    Model::DeleteObjectOutcome DeleteObject(const DeleteObjectRequest & request) const;
-    Model::DeleteObjectsOutcome DeleteObjects(const DeleteObjectsRequest & request) const;
+    Model::CopyObjectOutcome CopyObject(CopyObjectRequest & request) const;
+    Model::PutObjectOutcome PutObject(PutObjectRequest & request) const;
+    Model::DeleteObjectOutcome DeleteObject(DeleteObjectRequest & request) const;
+    Model::DeleteObjectsOutcome DeleteObjects(DeleteObjectsRequest & request) const;
 
     using ComposeObjectOutcome = Aws::Utils::Outcome<Aws::NoResult, Aws::S3::S3Error>;
-    ComposeObjectOutcome ComposeObject(const ComposeObjectRequest & request) const;
+    ComposeObjectOutcome ComposeObject(ComposeObjectRequest & request) const;
 
     using Aws::S3::S3Client::EnableRequestProcessing;
     using Aws::S3::S3Client::DisableRequestProcessing;
@@ -236,11 +236,11 @@ private:
 
     template <typename RequestType, typename RequestFn>
     std::invoke_result_t<RequestFn, RequestType>
-    doRequest(const RequestType & request, RequestFn request_fn) const;
+    doRequest(RequestType & request, RequestFn request_fn) const;
 
     template <bool IsReadMethod, typename RequestType, typename RequestFn>
     std::invoke_result_t<RequestFn, RequestType>
-    doRequestWithRetryNetworkErrors(const RequestType & request, RequestFn request_fn) const;
+    doRequestWithRetryNetworkErrors(RequestType & request, RequestFn request_fn) const;
 
     void updateURIForBucket(const std::string & bucket, S3::URI new_uri) const;
     std::optional<S3::URI> getURIFromError(const Aws::S3::S3Error & error) const;
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 4a1b6def133..a9dfd03a5e3 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -469,7 +469,17 @@ void PocoHTTPClient::makeRequestInternalImpl(
             for (const auto & [header_name, header_value] : request.GetHeaders())
                 poco_request.set(header_name, header_value);
             for (const auto & [header_name, header_value] : extra_headers)
-                poco_request.set(boost::algorithm::to_lower_copy(header_name), header_value);
+            {
+                // AWS S3 canonical headers must include `Host`, `Content-Type` and any `x-amz-*`.
+                // These headers will be signed. Custom S3 headers specified in ClickHouse storage conf are added in `extra_headers`.
+                // At this point in the stack trace, request has already been signed and any `x-amz-*` extra headers was already added
+                // to the canonical headers list. Therefore, we should not add them again to the request.
+                // https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
+                if (!header_name.starts_with("x-amz-"))
+                {
+                    poco_request.set(boost::algorithm::to_lower_copy(header_name), header_value);
+                }
+            }
 
             Poco::Net::HTTPResponse poco_response;
 
diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp
index bff9ca6fa7b..64c8c7759c9 100644
--- a/src/IO/S3/tests/gtest_aws_s3_client.cpp
+++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp
@@ -175,7 +175,10 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeadersRead)
         "host;"
         "x-amz-api-version;"
         "x-amz-content-sha256;"
-        "x-amz-date, ...\n"
+        "x-amz-date;"
+        "x-amz-server-side-encryption-customer-algorithm;"
+        "x-amz-server-side-encryption-customer-key;"
+        "x-amz-server-side-encryption-customer-key-md5, ...\n"
         "x-amz-server-side-encryption-customer-algorithm: AES256\n"
         "x-amz-server-side-encryption-customer-key: Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=\n"
         "x-amz-server-side-encryption-customer-key-md5: fMNuOw6OLU5GG2vc6RTA+g==\n");
@@ -196,7 +199,10 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeadersWrite)
         "content-type;"
         "host;"
         "x-amz-content-sha256;"
-        "x-amz-date, ...\n"
+        "x-amz-date;"
+        "x-amz-server-side-encryption-customer-algorithm;"
+        "x-amz-server-side-encryption-customer-key;"
+        "x-amz-server-side-encryption-customer-key-md5, ...\n"
         "x-amz-server-side-encryption-customer-algorithm: AES256\n"
         "x-amz-server-side-encryption-customer-key: Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=\n"
         "x-amz-server-side-encryption-customer-key-md5: fMNuOw6OLU5GG2vc6RTA+g==\n");

From 3544ee1e5f2826d21a8fa68ed4036f02f1dab595 Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Mon, 20 Nov 2023 13:52:18 -0300
Subject: [PATCH 0819/1097] fix build by removing some const specifiers

---
 src/IO/S3/Client.cpp     | 2 +-
 src/IO/S3/copyS3File.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 7302e129b4b..28a55580dc5 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -468,7 +468,7 @@ Model::DeleteObjectOutcome Client::DeleteObject(DeleteObjectRequest & request) c
         request, [this](const Model::DeleteObjectRequest & req) { return DeleteObject(req); });
 }
 
-Model::DeleteObjectsOutcome Client::DeleteObjects(const DeleteObjectsRequest & request) const
+Model::DeleteObjectsOutcome Client::DeleteObjects(DeleteObjectsRequest & request) const
 {
     return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
         request, [this](const Model::DeleteObjectsRequest & req) { return DeleteObjects(req); });
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index 30da1c580c1..a7b98bfb445 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -488,7 +488,7 @@ namespace
             client_ptr->setKMSHeaders(request);
         }
 
-        void processPutRequest(const S3::PutObjectRequest & request)
+        void processPutRequest(S3::PutObjectRequest & request)
         {
             size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
             for (size_t retries = 1;; ++retries)
@@ -675,7 +675,7 @@ namespace
             client_ptr->setKMSHeaders(request);
         }
 
-        void processCopyRequest(const S3::CopyObjectRequest & request)
+        void processCopyRequest(S3::CopyObjectRequest & request)
         {
             size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
             for (size_t retries = 1;; ++retries)

From ff438112e95568bb322c9ff8283e6b7f2e1b412a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 20 Nov 2023 18:08:39 +0100
Subject: [PATCH 0820/1097] Style

---
 tests/queries/0_stateless/02918_sqlite_path_check.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02918_sqlite_path_check.sh b/tests/queries/0_stateless/02918_sqlite_path_check.sh
index 798efda6ec1..fa74b9ecfc8 100755
--- a/tests/queries/0_stateless/02918_sqlite_path_check.sh
+++ b/tests/queries/0_stateless/02918_sqlite_path_check.sh
@@ -12,4 +12,4 @@ function get_exception_message()
 }
 
 get_exception_message "Select * from sqlite('/etc/passwd', 'something');"
-get_exception_message "Select * from sqlite('../../../../etc/passwd', 'something');
+get_exception_message "Select * from sqlite('../../../../etc/passwd', 'something');"

From cbb2e02c0342c7703e8cc069378e2e12b9bf930e Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 13 Nov 2023 11:41:40 +0000
Subject: [PATCH 0821/1097] Analyzer: partition pruning for S3

---
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  16 +-
 src/Storages/StorageS3.cpp                    | 173 +++++++++++++-----
 src/Storages/StorageS3.h                      |  12 --
 src/Storages/VirtualColumnUtils.cpp           |  18 ++
 src/Storages/VirtualColumnUtils.h             |   1 +
 tests/analyzer_tech_debt.txt                  |   1 -
 .../02302_s3_file_pruning.reference           |  10 +
 .../0_stateless/02302_s3_file_pruning.sql     |  12 +-
 8 files changed, 173 insertions(+), 70 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index d1a285b8818..0bc02ab7395 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -778,23 +778,11 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
 {
     if (!filter_dag)
         return {};
-
     auto sample = data.getSampleBlockWithVirtualColumns();
-    std::unordered_set<const ActionsDAG::Node *> allowed_inputs;
-    for (const auto * input : filter_dag->getInputs())
-        if (sample.has(input->result_name))
-            allowed_inputs.insert(input);
-
-    if (allowed_inputs.empty())
+    auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(sample, filter_dag, context);
+    if (!dag)
         return {};
 
-    auto atoms = filter_dag->extractConjunctionAtoms(filter_dag->getOutputs().at(0));
-    atoms = ActionsDAG::filterNodesByAllowedInputs(std::move(atoms), allowed_inputs);
-    if (atoms.empty())
-        return {};
-
-    auto dag = ActionsDAG::buildFilterActionsDAG(atoms, {}, context);
-
     auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, false /* one_part */);
     VirtualColumnUtils::filterBlockWithQuery(dag, virtual_columns_block, context);
     return VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index bdbba5abd96..2a67c9303e8 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -44,6 +44,8 @@
 #include <Processors/Sources/ConstChunkGenerator.h>
 
 #include <QueryPipeline/QueryPipelineBuilder.h>
+#include <Planner/Utils.h>
+#include <Analyzer/QueryNode.h>
 
 #include <DataTypes/DataTypeString.h>
 
@@ -126,6 +128,117 @@ namespace ErrorCodes
     extern const int FILE_DOESNT_EXIST;
 }
 
+static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, const Strings & paths)
+{
+    Block virtual_columns_block;
+    {
+        for (const auto & column : virtual_columns)
+            virtual_columns_block.insert({column.type->createColumn(), column.type, column.name});
+
+        virtual_columns_block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
+
+        for (size_t i = 0; i != paths.size(); ++i)
+        {
+            const auto & path = paths[i];
+            if (virtual_columns_block.has("_path"))
+                virtual_columns_block.getByName("_path").column->assumeMutableRef().insert(path);
+
+            if (virtual_columns_block.has("_file"))
+            {
+                auto pos = path.find_last_of('/');
+                String file;
+                if (pos != std::string::npos)
+                    file = path.substr(pos + 1);
+                else
+                    file = path;
+
+                virtual_columns_block.getByName("_file").column->assumeMutableRef().insert(file);
+            }
+
+            virtual_columns_block.getByName("_idx").column->assumeMutableRef().insert(i);
+        }
+    }
+    return virtual_columns_block;
+}
+
+static Block renameColumnsInBlock(const Block & source_block, const std::unordered_map<String, String> & rename_map)
+{
+    auto columns = source_block.getColumnsWithTypeAndName();
+    for (auto & col : columns)
+    {
+        auto it = rename_map.find(col.name);
+        if (it != rename_map.end())
+            col.name = it->second;
+    }
+    return Block(std::move(columns));
+}
+
+static ActionsDAGPtr getFilterForPartitionPruning(const SelectQueryInfo & query_info,
+                                                  const NamesAndTypesList & virtual_columns,
+                                                  NameToNameMap & column_rename,
+                                                  ContextPtr context)
+{
+    if (!query_info.query_tree || !query_info.planner_context)
+        return nullptr;
+
+    const auto * query_node = query_info.query_tree->as<QueryNode>();
+    if (!query_node || !query_node->getWhere())
+        return nullptr;
+
+    Block header = getBlockWithVirtuals(virtual_columns, "", {});
+
+    const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression);
+    for (const auto & [column_name, column_identifier] : table_expression_data.getColumnNameToIdentifier())
+        column_rename.emplace(column_name, column_identifier);
+
+    header = renameColumnsInBlock(header, column_rename);
+    auto filter_dag = buildActionsDAGFromExpressionNode(
+        query_node->getWhere(), header.getColumnsWithTypeAndName(), query_info.planner_context);
+
+    if (filter_dag)
+        return VirtualColumnUtils::splitFilterDagForAllowedInputs(header, filter_dag, context);
+    return {};
+}
+
+static void filterKeysForPartitionPruning(std::vector<String> & keys, const String & bucket, const NamesAndTypesList & virtual_columns, const SelectQueryInfo & query_info, ContextPtr context)
+{
+    ASTPtr filter_ast;
+    if (!keys.empty())
+        filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query_info.query, virtual_columns, fs::path(bucket) / keys[0], context);
+
+    if (filter_ast)
+    {
+        std::vector<String> paths;
+        paths.reserve(keys.size());
+        for (const auto & key : keys)
+            paths.push_back(fs::path(bucket) / key);
+
+        VirtualColumnUtils::filterByPathOrFile(keys, paths, query_info.query, virtual_columns, context, filter_ast);
+        LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied AST partition pruning {} from {} keys left", keys.size(), paths.size());
+        return;
+    }
+
+    NameToNameMap column_rename;
+    auto filter_actions = getFilterForPartitionPruning(query_info, virtual_columns, column_rename, context);
+    if (filter_actions)
+    {
+        auto block = getBlockWithVirtuals(virtual_columns, bucket, keys);
+        block = renameColumnsInBlock(block, column_rename);
+
+        VirtualColumnUtils::filterBlockWithQuery(filter_actions, block, context);
+
+        String key_column_name = "_key";
+        if (auto it = column_rename.find("_key"); it != column_rename.end())
+            key_column_name = it->second;
+        auto filtered_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, key_column_name);
+        LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied partition pruning {} from {} keys left", filtered_keys.size(), keys.size());
+        keys.clear();
+        keys.reserve(filtered_keys.size());
+        for (auto && key : filtered_keys)
+            keys.emplace_back(key);
+    }
+}
+
 class IOutputFormat;
 using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
 
@@ -390,7 +503,7 @@ size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount()
     return pimpl->objectsCount();
 }
 
-class StorageS3Source::KeysIterator::Impl : WithContext
+class StorageS3Source::KeysIterator::Impl
 {
 public:
     explicit Impl(
@@ -399,35 +512,15 @@ public:
         const std::vector<String> & keys_,
         const String & bucket_,
         const S3Settings::RequestSettings & request_settings_,
-        ASTPtr query_,
-        const NamesAndTypesList & virtual_columns_,
-        ContextPtr context_,
         KeysWithInfo * read_keys_,
         std::function<void(FileProgress)> file_progress_callback_)
-        : WithContext(context_)
-        , keys(keys_)
+        : keys(keys_)
         , client(client_.clone())
         , version_id(version_id_)
         , bucket(bucket_)
         , request_settings(request_settings_)
-        , query(query_)
-        , virtual_columns(virtual_columns_)
         , file_progress_callback(file_progress_callback_)
     {
-        ASTPtr filter_ast;
-        if (!keys.empty())
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(bucket) / keys[0], getContext());
-
-        if (filter_ast)
-        {
-            std::vector<String> paths;
-            paths.reserve(keys.size());
-            for (const auto & key : keys)
-                paths.push_back(fs::path(bucket) / key);
-
-            VirtualColumnUtils::filterByPathOrFile(keys, paths, query, virtual_columns, getContext(), filter_ast);
-        }
-
         if (read_keys_)
         {
             for (const auto & key : keys)
@@ -463,8 +556,6 @@ private:
     String version_id;
     String bucket;
     S3Settings::RequestSettings request_settings;
-    ASTPtr query;
-    NamesAndTypesList virtual_columns;
     std::function<void(FileProgress)> file_progress_callback;
 };
 
@@ -474,14 +565,11 @@ StorageS3Source::KeysIterator::KeysIterator(
     const std::vector<String> & keys_,
     const String & bucket_,
     const S3Settings::RequestSettings & request_settings_,
-    ASTPtr query,
-    const NamesAndTypesList & virtual_columns_,
-    ContextPtr context,
     KeysWithInfo * read_keys,
     std::function<void(FileProgress)> file_progress_callback_)
     : pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(
         client_, version_id_, keys_, bucket_, request_settings_,
-        query, virtual_columns_, context, read_keys, file_progress_callback_))
+        read_keys, file_progress_callback_))
 {
 }
 
@@ -965,8 +1053,6 @@ private:
     const String key;
     const std::optional<FormatSettings> format_settings;
 
-    ExpressionActionsPtr partition_by_expr;
-
     static void validateBucket(const String & str)
     {
         S3::URI::validateBucket(str, {});
@@ -1038,15 +1124,17 @@ StorageS3::StorageS3(
     virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
-std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
-    const Configuration & configuration,
+static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
+    const StorageS3::Configuration & configuration,
     bool distributed_processing,
     ContextPtr local_context,
-    ASTPtr query,
+    const SelectQueryInfo & query_info,
     const NamesAndTypesList & virtual_columns,
-    KeysWithInfo * read_keys,
-    std::function<void(FileProgress)> file_progress_callback)
+    StorageS3::KeysWithInfo * read_keys = nullptr,
+    std::function<void(FileProgress)> file_progress_callback = {})
 {
+    ASTPtr query = query_info.query;
+
     if (distributed_processing)
     {
         return std::make_shared<StorageS3Source::ReadTaskIterator>(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
@@ -1060,10 +1148,11 @@ std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
     }
     else
     {
+        Strings keys = configuration.keys;
+        filterKeysForPartitionPruning(keys, configuration.url.bucket, virtual_columns, query_info, local_context);
         return std::make_shared<StorageS3Source::KeysIterator>(
-            *configuration.client, configuration.url.version_id, configuration.keys,
-            configuration.url.bucket, configuration.request_settings, query,
-            virtual_columns, local_context, read_keys, file_progress_callback);
+            *configuration.client, configuration.url.version_id, keys,
+            configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
     }
 }
 
@@ -1096,10 +1185,10 @@ Pipe StorageS3::read(
     if (partition_by && query_configuration.withWildcard())
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet");
 
-    Pipes pipes;
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
 
     std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
-        query_configuration, distributed_processing, local_context, query_info.query, virtual_columns, nullptr, local_context->getFileProgressCallback());
+        query_configuration, distributed_processing, local_context, query_info, virtual_columns, nullptr, local_context->getFileProgressCallback());
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     if (estimated_keys_count > 1)
@@ -1108,7 +1197,6 @@ Pipe StorageS3::read(
         /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
         num_streams = 1;
 
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
     bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
         && local_context->getSettingsRef().optimize_count_from_files;
 
@@ -1116,6 +1204,7 @@ Pipe StorageS3::read(
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));
     LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads);
 
+    Pipes pipes;
     pipes.reserve(num_streams);
     for (size_t i = 0; i < num_streams; ++i)
     {
@@ -1580,7 +1669,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
 {
     KeysWithInfo read_keys;
 
-    auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, &read_keys);
+    auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys);
 
     std::optional<ColumnsDescription> columns_from_cache;
     if (ctx->getSettingsRef().schema_inference_use_cache_for_s3)
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 3f35c578e19..4f16be0a077 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -102,9 +102,6 @@ public:
             const std::vector<String> & keys_,
             const String & bucket_,
             const S3Settings::RequestSettings & request_settings_,
-            ASTPtr query,
-            const NamesAndTypesList & virtual_columns,
-            ContextPtr context,
             KeysWithInfo * read_keys = nullptr,
             std::function<void(FileProgress)> progress_callback_ = {});
 
@@ -400,15 +397,6 @@ private:
     std::optional<FormatSettings> format_settings;
     ASTPtr partition_by;
 
-    static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
-        const Configuration & configuration,
-        bool distributed_processing,
-        ContextPtr local_context,
-        ASTPtr query,
-        const NamesAndTypesList & virtual_columns,
-        KeysWithInfo * read_keys = nullptr,
-        std::function<void(FileProgress)> progress_callback = {});
-
     static ColumnsDescription getTableStructureFromDataImpl(
         const Configuration & configuration,
         const std::optional<FormatSettings> & format_settings,
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 219043f25c6..7aec5ce0d78 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -445,6 +445,24 @@ void addRequestedPathAndFileVirtualsToChunk(
     }
 }
 
+ActionsDAGPtr splitFilterDagForAllowedInputs(const Block & header, const ActionsDAGPtr & filter_dag, ContextPtr context)
+{
+    std::unordered_set<const ActionsDAG::Node *> allowed_inputs;
+    for (const auto * input : filter_dag->getInputs())
+        if (header.has(input->result_name))
+            allowed_inputs.insert(input);
+
+    if (allowed_inputs.empty())
+        return {};
+
+    auto atoms = filter_dag->extractConjunctionAtoms(filter_dag->getOutputs().at(0));
+    atoms = ActionsDAG::filterNodesByAllowedInputs(std::move(atoms), allowed_inputs);
+    if (atoms.empty())
+        return {};
+
+    return ActionsDAG::buildFilterActionsDAG(atoms, {}, context);
+}
+
 }
 
 }
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index a21f2b05552..a717d2a0197 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -34,6 +34,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
 /// If `expression_ast` is passed, use it to filter block.
 void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast = {});
 void filterBlockWithQuery(ActionsDAGPtr dag, Block & block, ContextPtr context);
+ActionsDAGPtr splitFilterDagForAllowedInputs(const Block & header, const ActionsDAGPtr & filter_dag, ContextPtr context);
 
 /// Extract from the input stream a set of `name` column values
 template <typename T>
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 392c4e2e313..e0055a9d617 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -24,7 +24,6 @@
 01952_optimize_distributed_group_by_sharding_key
 02139_MV_with_scalar_subquery
 02174_cte_scalar_cache_mv
-02302_s3_file_pruning
 02352_grouby_shadows_arg
 02354_annoy
 02428_parameterized_view
diff --git a/tests/queries/0_stateless/02302_s3_file_pruning.reference b/tests/queries/0_stateless/02302_s3_file_pruning.reference
index f8d2bdd0612..7e69bdd55db 100644
--- a/tests/queries/0_stateless/02302_s3_file_pruning.reference
+++ b/tests/queries/0_stateless/02302_s3_file_pruning.reference
@@ -24,4 +24,14 @@ insert into test_02302 select 1 settings s3_create_new_file_on_insert = true;
 insert into test_02302 select 2 settings s3_create_new_file_on_insert = true;
 select * from test_02302 where _file like '%1';
 1
+select _file, * from test_02302 where _file like '%1';
+test_02302.1	1
+set max_rows_to_read = 2;
+select * from test_02302 where (_file like '%.1' OR _file like '%.2') AND a > 1;
+2
+set max_rows_to_read = 999;
+select 'a1' as _file, * from test_02302 where _file like '%1' ORDER BY a;
+a1	0
+a1	1
+a1	2
 drop table test_02302;
diff --git a/tests/queries/0_stateless/02302_s3_file_pruning.sql b/tests/queries/0_stateless/02302_s3_file_pruning.sql
index 624a87506d1..93fc8a1bc25 100644
--- a/tests/queries/0_stateless/02302_s3_file_pruning.sql
+++ b/tests/queries/0_stateless/02302_s3_file_pruning.sql
@@ -1,5 +1,5 @@
 -- Tags: no-parallel, no-fasttest
--- Tag no-fasttest: Depends on AWS
+-- Tag no-fasttest: Depends on S3
 
 -- { echo }
 drop table if exists test_02302;
@@ -32,4 +32,14 @@ insert into test_02302 select 1 settings s3_create_new_file_on_insert = true;
 insert into test_02302 select 2 settings s3_create_new_file_on_insert = true;
 
 select * from test_02302 where _file like '%1';
+
+select _file, * from test_02302 where _file like '%1';
+
+set max_rows_to_read = 2;
+select * from test_02302 where (_file like '%.1' OR _file like '%.2') AND a > 1;
+
+set max_rows_to_read = 999;
+
+select 'a1' as _file, * from test_02302 where _file like '%1' ORDER BY a;
+
 drop table test_02302;

From 1cddfb1e6b775b452715d68ee566e56794081dfc Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 13 Nov 2023 11:42:23 +0000
Subject: [PATCH 0822/1097] rewrite getBlockWithVirtuals for S3Storage

---
 src/Storages/StorageS3.cpp | 61 +++++++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 2a67c9303e8..2f1288c9067 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -128,36 +128,57 @@ namespace ErrorCodes
     extern const int FILE_DOESNT_EXIST;
 }
 
-static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, const Strings & paths)
+static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, const String & bucket, const Strings & keys)
 {
     Block virtual_columns_block;
+    fs::path bucket_path(bucket);
+
+    for (const auto & [column_name, column_type] : virtual_columns)
     {
-        for (const auto & column : virtual_columns)
-            virtual_columns_block.insert({column.type->createColumn(), column.type, column.name});
-
-        virtual_columns_block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
-
-        for (size_t i = 0; i != paths.size(); ++i)
+        if (column_name == "_path")
         {
-            const auto & path = paths[i];
-            if (virtual_columns_block.has("_path"))
-                virtual_columns_block.getByName("_path").column->assumeMutableRef().insert(path);
-
-            if (virtual_columns_block.has("_file"))
+            auto column = column_type->createColumn();
+            for (const auto & key : keys)
+                column->insert((bucket_path / key).string());
+            virtual_columns_block.insert({std::move(column), column_type, column_name});
+        }
+        else if (column_name == "_file")
+        {
+            auto column = column_type->createColumn();
+            for (const auto & key : keys)
             {
-                auto pos = path.find_last_of('/');
-                String file;
+                auto pos = key.find_last_of('/');
                 if (pos != std::string::npos)
-                    file = path.substr(pos + 1);
+                    column->insert(key.substr(pos + 1));
                 else
-                    file = path;
-
-                virtual_columns_block.getByName("_file").column->assumeMutableRef().insert(file);
+                    column->insert(key);
             }
-
-            virtual_columns_block.getByName("_idx").column->assumeMutableRef().insert(i);
+            virtual_columns_block.insert({std::move(column), column_type, column_name});
+        }
+        else if (column_name == "_key")
+        {
+            auto column = column_type->createColumn();
+            for (const auto & key : keys)
+                column->insert(key);
+            virtual_columns_block.insert({std::move(column), column_type, column_name});
+        }
+        else
+        {
+            auto column = column_type->createColumn();
+            column->insertManyDefaults(keys.size());
+            virtual_columns_block.insert({std::move(column), column_type, column_name});
         }
     }
+
+    /// Column _key is mandatory and may not be in virtual_columns list
+    if (!virtual_columns_block.has("_key"))
+    {
+        auto column_type = std::make_shared<DataTypeString>();
+        auto column = column_type->createColumn(); for (const auto & key : keys)
+            column->insert(key);
+        virtual_columns_block.insert({std::move(column), column_type, "_key"});
+    }
+
     return virtual_columns_block;
 }
 

From a915eeded805b736a2e70e46648f7048292c7a75 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 20 Nov 2023 17:49:16 +0000
Subject: [PATCH 0823/1097] StorageS3 use filters from SourceStepWithFilter

---
 src/Storages/StorageS3.cpp | 148 +++++++++++++++++++++++--------------
 src/Storages/StorageS3.h   |   4 +-
 2 files changed, 96 insertions(+), 56 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 2f1288c9067..3e12660997f 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -42,6 +42,8 @@
 #include <Processors/Formats/IOutputFormat.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
+
 
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Planner/Utils.h>
@@ -128,6 +130,48 @@ namespace ErrorCodes
     extern const int FILE_DOESNT_EXIST;
 }
 
+
+class ReadFromStorageS3Step : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromStorageS3Step"; }
+
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+
+    void applyFilters() override;
+
+    ReadFromStorageS3Step(
+        Block sample_block,
+        const Names & column_names_,
+        StorageSnapshotPtr storage_snapshot_,
+        StorageS3 & storage_,
+        SelectQueryInfo query_info_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , column_names(column_names_)
+        , storage_snapshot(std::move(storage_snapshot_))
+        , storage(storage_)
+        , query_info(std::move(query_info_))
+        , local_context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    Names column_names;
+    StorageSnapshotPtr storage_snapshot;
+    StorageS3 & storage;
+    SelectQueryInfo query_info;
+    ContextPtr local_context;
+
+    size_t max_block_size;
+    size_t num_streams;
+};
+
+
 static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, const String & bucket, const Strings & keys)
 {
     Block virtual_columns_block;
@@ -182,50 +226,17 @@ static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, con
     return virtual_columns_block;
 }
 
-static Block renameColumnsInBlock(const Block & source_block, const std::unordered_map<String, String> & rename_map)
+static void filterKeysForPartitionPruning(std::vector<String> & keys,
+                                          const String & bucket,
+                                          const NamesAndTypesList & virtual_columns,
+                                          const SelectQueryInfo & query_info,
+                                          const std::vector<ActionsDAGPtr> & filter_dags,
+                                          ContextPtr context)
 {
-    auto columns = source_block.getColumnsWithTypeAndName();
-    for (auto & col : columns)
-    {
-        auto it = rename_map.find(col.name);
-        if (it != rename_map.end())
-            col.name = it->second;
-    }
-    return Block(std::move(columns));
-}
+    if (keys.empty())
+        return;
 
-static ActionsDAGPtr getFilterForPartitionPruning(const SelectQueryInfo & query_info,
-                                                  const NamesAndTypesList & virtual_columns,
-                                                  NameToNameMap & column_rename,
-                                                  ContextPtr context)
-{
-    if (!query_info.query_tree || !query_info.planner_context)
-        return nullptr;
-
-    const auto * query_node = query_info.query_tree->as<QueryNode>();
-    if (!query_node || !query_node->getWhere())
-        return nullptr;
-
-    Block header = getBlockWithVirtuals(virtual_columns, "", {});
-
-    const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression);
-    for (const auto & [column_name, column_identifier] : table_expression_data.getColumnNameToIdentifier())
-        column_rename.emplace(column_name, column_identifier);
-
-    header = renameColumnsInBlock(header, column_rename);
-    auto filter_dag = buildActionsDAGFromExpressionNode(
-        query_node->getWhere(), header.getColumnsWithTypeAndName(), query_info.planner_context);
-
-    if (filter_dag)
-        return VirtualColumnUtils::splitFilterDagForAllowedInputs(header, filter_dag, context);
-    return {};
-}
-
-static void filterKeysForPartitionPruning(std::vector<String> & keys, const String & bucket, const NamesAndTypesList & virtual_columns, const SelectQueryInfo & query_info, ContextPtr context)
-{
-    ASTPtr filter_ast;
-    if (!keys.empty())
-        filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query_info.query, virtual_columns, fs::path(bucket) / keys[0], context);
+    ASTPtr filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query_info.query, virtual_columns, fs::path(bucket) / keys[0], context);
 
     if (filter_ast)
     {
@@ -240,12 +251,13 @@ static void filterKeysForPartitionPruning(std::vector<String> & keys, const Stri
     }
 
     NameToNameMap column_rename;
-    auto filter_actions = getFilterForPartitionPruning(query_info, virtual_columns, column_rename, context);
-    if (filter_actions)
+    for (const auto & filter_dag : filter_dags)
     {
         auto block = getBlockWithVirtuals(virtual_columns, bucket, keys);
-        block = renameColumnsInBlock(block, column_rename);
 
+        auto filter_actions = VirtualColumnUtils::splitFilterDagForAllowedInputs(block, filter_dag, context);
+        if (!filter_actions)
+            continue;
         VirtualColumnUtils::filterBlockWithQuery(filter_actions, block, context);
 
         String key_column_name = "_key";
@@ -1150,6 +1162,7 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     bool distributed_processing,
     ContextPtr local_context,
     const SelectQueryInfo & query_info,
+    const std::vector<ActionsDAGPtr> & filter_dags,
     const NamesAndTypesList & virtual_columns,
     StorageS3::KeysWithInfo * read_keys = nullptr,
     std::function<void(FileProgress)> file_progress_callback = {})
@@ -1170,7 +1183,7 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     else
     {
         Strings keys = configuration.keys;
-        filterKeysForPartitionPruning(keys, configuration.url.bucket, virtual_columns, query_info, local_context);
+        filterKeysForPartitionPruning(keys, configuration.url.bucket, virtual_columns, query_info, filter_dags, local_context);
         return std::make_shared<StorageS3Source::KeysIterator>(
             *configuration.client, configuration.url.version_id, keys,
             configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
@@ -1192,7 +1205,8 @@ bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const
     return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context);
 }
 
-Pipe StorageS3::read(
+void StorageS3::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -1201,15 +1215,34 @@ Pipe StorageS3::read(
     size_t max_block_size,
     size_t num_streams)
 {
-    auto query_configuration = updateConfigurationAndGetCopy(local_context);
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), virtual_columns);
 
-    if (partition_by && query_configuration.withWildcard())
+    auto reading = std::make_unique<ReadFromStorageS3Step>(
+        read_from_format_info.source_header,
+        column_names,
+        storage_snapshot,
+        *this,
+        query_info,
+        local_context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    auto query_configuration = storage.updateConfigurationAndGetCopy(local_context);
+
+    if (storage.partition_by && query_configuration.withWildcard())
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet");
 
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
+    auto virtual_columns = storage.getVirtuals();
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, storage.supportsSubsetOfColumns(local_context), virtual_columns);
 
     std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
-        query_configuration, distributed_processing, local_context, query_info, virtual_columns, nullptr, local_context->getFileProgressCallback());
+        query_configuration, storage.distributed_processing, local_context, query_info, filter_dags,
+        virtual_columns, nullptr, local_context->getFileProgressCallback());
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     if (estimated_keys_count > 1)
@@ -1232,9 +1265,9 @@ Pipe StorageS3::read(
         pipes.emplace_back(std::make_shared<StorageS3Source>(
             read_from_format_info,
             query_configuration.format,
-            getName(),
+            storage.getName(),
             local_context,
-            format_settings,
+            storage.format_settings,
             max_block_size,
             query_configuration.request_settings,
             query_configuration.compression_method,
@@ -1248,7 +1281,12 @@ Pipe StorageS3::read(
             query_info));
     }
 
-    return Pipe::unitePipes(std::move(pipes));
+    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+}
+
+
+void ReadFromStorageS3Step::applyFilters()
+{
 }
 
 SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
@@ -1690,7 +1728,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
 {
     KeysWithInfo read_keys;
 
-    auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys);
+    auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, {}, &read_keys);
 
     std::optional<ColumnsDescription> columns_from_cache;
     if (ctx->getSettingsRef().schema_inference_use_cache_for_s3)
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 4f16be0a077..f3b82cd2849 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -327,7 +327,8 @@ public:
         return name;
     }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -387,6 +388,7 @@ private:
     friend class StorageS3Cluster;
     friend class TableFunctionS3Cluster;
     friend class StorageS3Queue;
+    friend class ReadFromStorageS3Step;
 
     Configuration configuration;
     std::mutex configuration_update_mutex;

From 95e9a274176c46066afb8cbd1c125b0cf2cc96a4 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 20 Nov 2023 17:59:07 +0000
Subject: [PATCH 0824/1097] Remove ast based code from
 filterKeysForPartitionPruning

---
 src/Storages/StorageS3.cpp | 35 +++++++----------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 3e12660997f..0dc163981e9 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -229,30 +229,14 @@ static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, con
 static void filterKeysForPartitionPruning(std::vector<String> & keys,
                                           const String & bucket,
                                           const NamesAndTypesList & virtual_columns,
-                                          const SelectQueryInfo & query_info,
                                           const std::vector<ActionsDAGPtr> & filter_dags,
                                           ContextPtr context)
 {
-    if (keys.empty())
-        return;
-
-    ASTPtr filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query_info.query, virtual_columns, fs::path(bucket) / keys[0], context);
-
-    if (filter_ast)
-    {
-        std::vector<String> paths;
-        paths.reserve(keys.size());
-        for (const auto & key : keys)
-            paths.push_back(fs::path(bucket) / key);
-
-        VirtualColumnUtils::filterByPathOrFile(keys, paths, query_info.query, virtual_columns, context, filter_ast);
-        LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied AST partition pruning {} from {} keys left", keys.size(), paths.size());
-        return;
-    }
-
-    NameToNameMap column_rename;
     for (const auto & filter_dag : filter_dags)
     {
+        if (keys.empty())
+            break;
+
         auto block = getBlockWithVirtuals(virtual_columns, bucket, keys);
 
         auto filter_actions = VirtualColumnUtils::splitFilterDagForAllowedInputs(block, filter_dag, context);
@@ -260,10 +244,7 @@ static void filterKeysForPartitionPruning(std::vector<String> & keys,
             continue;
         VirtualColumnUtils::filterBlockWithQuery(filter_actions, block, context);
 
-        String key_column_name = "_key";
-        if (auto it = column_rename.find("_key"); it != column_rename.end())
-            key_column_name = it->second;
-        auto filtered_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, key_column_name);
+        std::unordered_set<String> filtered_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, "_key");
         LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied partition pruning {} from {} keys left", filtered_keys.size(), keys.size());
         keys.clear();
         keys.reserve(filtered_keys.size());
@@ -1161,14 +1142,12 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     const StorageS3::Configuration & configuration,
     bool distributed_processing,
     ContextPtr local_context,
-    const SelectQueryInfo & query_info,
+    ASTPtr query,
     const std::vector<ActionsDAGPtr> & filter_dags,
     const NamesAndTypesList & virtual_columns,
     StorageS3::KeysWithInfo * read_keys = nullptr,
     std::function<void(FileProgress)> file_progress_callback = {})
 {
-    ASTPtr query = query_info.query;
-
     if (distributed_processing)
     {
         return std::make_shared<StorageS3Source::ReadTaskIterator>(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
@@ -1183,7 +1162,7 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     else
     {
         Strings keys = configuration.keys;
-        filterKeysForPartitionPruning(keys, configuration.url.bucket, virtual_columns, query_info, filter_dags, local_context);
+        filterKeysForPartitionPruning(keys, configuration.url.bucket, virtual_columns, filter_dags, local_context);
         return std::make_shared<StorageS3Source::KeysIterator>(
             *configuration.client, configuration.url.version_id, keys,
             configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
@@ -1241,7 +1220,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, storage.supportsSubsetOfColumns(local_context), virtual_columns);
 
     std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
-        query_configuration, storage.distributed_processing, local_context, query_info, filter_dags,
+        query_configuration, storage.distributed_processing, local_context, query_info.query, filter_dags,
         virtual_columns, nullptr, local_context->getFileProgressCallback());
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();

From 31a6c7c1c49da5f9ea42f97bee7e617cebabaebe Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 20 Nov 2023 18:08:45 +0000
Subject: [PATCH 0825/1097] Style changes around filterKeysForPartitionPruning

---
 src/Storages/StorageS3.cpp        | 32 +++++++++++++++----------------
 src/Storages/VirtualColumnUtils.h |  2 ++
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 0dc163981e9..594f973bc0d 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -172,7 +172,7 @@ private:
 };
 
 
-static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, const String & bucket, const Strings & keys)
+static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, const String & bucket, const std::unordered_set<String> & keys)
 {
     Block virtual_columns_block;
     fs::path bucket_path(bucket);
@@ -226,31 +226,31 @@ static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, con
     return virtual_columns_block;
 }
 
-static void filterKeysForPartitionPruning(std::vector<String> & keys,
-                                          const String & bucket,
-                                          const NamesAndTypesList & virtual_columns,
-                                          const std::vector<ActionsDAGPtr> & filter_dags,
-                                          ContextPtr context)
+static std::vector<String> filterKeysForPartitionPruning(
+    const std::vector<String> & keys,
+    const String & bucket,
+    const NamesAndTypesList & virtual_columns,
+    const std::vector<ActionsDAGPtr> & filter_dags,
+    ContextPtr context)
 {
+    std::unordered_set<String> result_keys(keys.begin(), keys.end());
     for (const auto & filter_dag : filter_dags)
     {
-        if (keys.empty())
+        if (result_keys.empty())
             break;
 
-        auto block = getBlockWithVirtuals(virtual_columns, bucket, keys);
+        auto block = getBlockWithVirtuals(virtual_columns, bucket, result_keys);
 
         auto filter_actions = VirtualColumnUtils::splitFilterDagForAllowedInputs(block, filter_dag, context);
         if (!filter_actions)
             continue;
         VirtualColumnUtils::filterBlockWithQuery(filter_actions, block, context);
 
-        std::unordered_set<String> filtered_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, "_key");
-        LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied partition pruning {} from {} keys left", filtered_keys.size(), keys.size());
-        keys.clear();
-        keys.reserve(filtered_keys.size());
-        for (auto && key : filtered_keys)
-            keys.emplace_back(key);
+        result_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, "_key");
     }
+
+    LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied partition pruning {} from {} keys left", result_keys.size(), keys.size());
+    return std::vector<String>(result_keys.begin(), result_keys.end());
 }
 
 class IOutputFormat;
@@ -1161,8 +1161,7 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     }
     else
     {
-        Strings keys = configuration.keys;
-        filterKeysForPartitionPruning(keys, configuration.url.bucket, virtual_columns, filter_dags, local_context);
+        Strings keys = filterKeysForPartitionPruning(configuration.keys, configuration.url.bucket, virtual_columns, filter_dags, local_context);
         return std::make_shared<StorageS3Source::KeysIterator>(
             *configuration.client, configuration.url.version_id, keys,
             configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
@@ -1266,6 +1265,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
 
 void ReadFromStorageS3Step::applyFilters()
 {
+    /// We will use filter_dags in filterKeysForPartitionPruning called from initializePipeline, nothing to do here
 }
 
 SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index a717d2a0197..d8d4b44b4ff 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -34,6 +34,8 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
 /// If `expression_ast` is passed, use it to filter block.
 void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast = {});
 void filterBlockWithQuery(ActionsDAGPtr dag, Block & block, ContextPtr context);
+
+/// Extract subset of filter_dag that can be evaluated using only columns from header
 ActionsDAGPtr splitFilterDagForAllowedInputs(const Block & header, const ActionsDAGPtr & filter_dag, ContextPtr context);
 
 /// Extract from the input stream a set of `name` column values

From 897cd06bcf2a7e75998133cad9a5ad668366231b Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 20 Nov 2023 19:15:49 +0100
Subject: [PATCH 0826/1097] Fix dropping tables in test
 "test_create_or_drop_tables_during_backup".

---
 .../test_backup_restore_on_cluster/test_concurrency.py    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_backup_restore_on_cluster/test_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_concurrency.py
index aea82c6b559..ab37846db9a 100644
--- a/tests/integration/test_backup_restore_on_cluster/test_concurrency.py
+++ b/tests/integration/test_backup_restore_on_cluster/test_concurrency.py
@@ -214,7 +214,13 @@ def test_create_or_drop_tables_during_backup(db_engine, table_engine):
         while time.time() < end_time:
             table_name = f"mydb.tbl{randint(1, num_nodes)}"
             node = nodes[randint(0, num_nodes - 1)]
-            node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
+            # "DROP TABLE IF EXISTS" still can throw some errors (e.g. "WRITE locking attempt on node0 has timed out!")
+            # So we use query_and_get_answer_with_error() to ignore any errors.
+            # `lock_acquire_timeout` is also reduced because we don't wait our test to wait too long.
+            node.query_and_get_answer_with_error(
+                f"DROP TABLE IF EXISTS {table_name} SYNC",
+                settings={"lock_acquire_timeout": 10},
+            )
 
     def rename_tables():
         while time.time() < end_time:

From 852e983fe3086a6a6aa7b2ba3461b124fe30a6f8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 20 Nov 2023 21:34:22 +0100
Subject: [PATCH 0827/1097] Follow-up

---
 packages/clickhouse-server.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml
index 5e2bc7c7412..7894129b8e3 100644
--- a/packages/clickhouse-server.yaml
+++ b/packages/clickhouse-server.yaml
@@ -52,8 +52,6 @@ contents:
   dst: /lib/systemd/system/clickhouse-server.service
 - src: root/usr/bin/clickhouse-copier
   dst: /usr/bin/clickhouse-copier
-- src: root/usr/bin/clickhouse-report
-  dst: /usr/bin/clickhouse-report
 - src: root/usr/bin/clickhouse-server
   dst: /usr/bin/clickhouse-server
 # clickhouse-keeper part

From 3c2cf5dc7018b23cbb465ed9f906768fb1314a19 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 20 Nov 2023 21:52:47 +0100
Subject: [PATCH 0828/1097] Enable Analyzer in Stress and Fuzz tests

---
 docker/test/fuzzer/query-fuzzer-tweaks-users.xml |  5 -----
 docker/test/stateless/stress_tests.lib           | 15 ---------------
 2 files changed, 20 deletions(-)

diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
index ecd7aae2e4a..023f257253a 100644
--- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
+++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
@@ -23,11 +23,6 @@
                     <max>10G</max>
                 </max_memory_usage>
 
-                <!-- Analyzer is unstable, not ready for testing. -->
-                <allow_experimental_analyzer>
-                    <readonly/>
-                </allow_experimental_analyzer>
-
                 <table_function_remote_max_addresses>
                     <max>200</max>
                 </table_function_remote_max_addresses>
diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib
index 551461b6eca..8f89c1b80dd 100644
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@@ -140,21 +140,6 @@ EOL
     -->
     <core_path>$PWD</core_path>
 </clickhouse>
-EOL
-
-    # Analyzer is not yet ready for testing
-    cat > /etc/clickhouse-server/users.d/no_analyzer.xml <<EOL
-<clickhouse>
-    <profiles>
-        <default>
-            <constraints>
-                <allow_experimental_analyzer>
-                    <readonly/>
-                </allow_experimental_analyzer>
-            </constraints>
-        </default>
-    </profiles>
-</clickhouse>
 EOL
 
 }

From ff6dfd2490cee6300147ddea4f5eb1ffcfacb08b Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Mon, 20 Nov 2023 21:47:17 +0000
Subject: [PATCH 0829/1097] Run CI for PRs with missing documentation if 'can
 be tested' label is present

---
 tests/ci/run_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index db98a2c1ab5..231e2617a3f 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -150,7 +150,7 @@ def main():
             DOCS_NAME,
             pr_info,
         )
-        sys.exit(1)
+        sys.exit(0)
 
     if description_error:
         print(

From 524edd8460431b447c4bcd2f932bed489f7a83f6 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Tue, 21 Nov 2023 00:08:20 +0100
Subject: [PATCH 0830/1097] turn test off

---
 tests/integration/test_merge_tree_s3/test.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index 3b2f1c0f6a6..0856b1681f8 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -851,6 +851,8 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name):
 def test_s3_engine_heavy_write_check_mem(
     cluster, broken_s3, node_name, in_flight_memory
 ):
+    pytest.skip("Disabled, will be fixed after https://github.com/ClickHouse/ClickHouse/issues/51152")
+
     in_flight = in_flight_memory[0]
     memory = in_flight_memory[1]
 
@@ -870,12 +872,16 @@ def test_s3_engine_heavy_write_check_mem(
     )
 
     broken_s3.setup_fake_multpartuploads()
-    broken_s3.setup_slow_answers(10 * 1024 * 1024, timeout=15, count=10)
+    slow_responces = 10
+    slow_timeout = 15
+    broken_s3.setup_slow_answers(10 * 1024 * 1024, timeout=slow_timeout, count=slow_responces)
 
     query_id = f"INSERT_INTO_S3_ENGINE_QUERY_ID_{in_flight}"
     node.query(
         "INSERT INTO s3_test SELECT number, toString(number) FROM numbers(50000000)"
-        f" SETTINGS max_memory_usage={2*memory}"
+        f" SETTINGS "
+        f" max_memory_usage={2*memory}"
+        # f", max_threads=1" # ParallelFormattingOutputFormat consumption depends on it
         f", s3_max_inflight_parts_for_one_file={in_flight}",
         query_id=query_id,
     )
@@ -892,7 +898,8 @@ def test_s3_engine_heavy_write_check_mem(
     assert int(memory_usage) < 1.2 * memory
     assert int(memory_usage) > 0.8 * memory
 
-    assert int(wait_inflight) > in_flight * 1000 * 1000
+    # The more in_flight value is the less time CH waits.
+    assert int(wait_inflight) / 1000 / 1000 > slow_responces * slow_timeout / in_flight
 
     check_no_objects_after_drop(cluster, node_name=node_name)
 

From c5f16725ec83cd1065c07f709bb28eeb43e46177 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Thu, 9 Nov 2023 10:12:33 +0800
Subject: [PATCH 0831/1097] add function getClientHTTPHeader

---
 .../functions/other-functions.md              |  38 ++++++
 programs/server/Server.cpp                    |   2 +
 src/Core/ServerSettings.h                     |   2 +
 src/Functions/getClientHTTPHeader.cpp         | 119 ++++++++++++++++++
 src/Interpreters/ClientInfo.h                 |   2 +
 src/Interpreters/Context.cpp                  |  28 ++++-
 src/Interpreters/Context.h                    |   9 +-
 src/Interpreters/Session.cpp                  |   4 +-
 src/Interpreters/Session.h                    |   3 +-
 src/Server/HTTPHandler.cpp                    |   3 +-
 ...dont_use_header_test_tvb024u3.csvwithnames |   2 +
 .../forbidden_get_client_http_headers.xml     |   4 +
 ...new_functions_must_be_documented.reference |   1 +
 .../02911_getHTTPHeaderFuncion.reference      |  14 +++
 .../0_stateless/02911_getHTTPHeaderFuncion.sh |  75 +++++++++++
 .../aspell-ignore/en/aspell-dict.txt          |   1 +
 16 files changed, 302 insertions(+), 5 deletions(-)
 create mode 100644 src/Functions/getClientHTTPHeader.cpp
 create mode 100644 tests/02876_formats_with_names_dont_use_header_test_tvb024u3.csvwithnames
 create mode 100644 tests/config/config.d/forbidden_get_client_http_headers.xml
 create mode 100644 tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference
 create mode 100755 tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 6b092cf384d..cbd0d595393 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -67,7 +67,45 @@ WHERE macro = 'test';
 │ test  │ Value        │
 └───────┴──────────────┘
 ```
+  
+## getClientHTTPHeader  
+Returns the value of specified http header.If there is no such header or the request method is not http, it will throw an exception.  
 
+**Syntax**  
+
+```sql
+getClientHTTPHeader(name);
+``` 
+
+**Arguments**  
+
+- `name` — HTTP header name .[String](../../sql-reference/data-types/string.md#string)  
+
+**Returned value**
+
+Value of the specified header.  
+Type:[String](../../sql-reference/data-types/string.md#string).
+
+
+When we use `clickhouse-client` to execute this function, we'll always get empty string, because client doesn't use http protocol.
+```sql
+SELECT getCientHTTPHeader('test')
+```
+result:  
+
+```text
+┌─getClientHTTPHeader('test')─┐
+│                             │
+└────────────------───────────┘
+```  
+Try to use http request:  
+```shell 
+echo "select getClientHTTPHeader('X-Clickhouse-User')" | curl -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' 'http://localhost:8123/' -d @-
+
+#result
+default
+```
+  
 ## FQDN
 
 Returns the fully qualified domain name of the ClickHouse server.
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 85ae6d7796c..60ce94581b7 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1273,6 +1273,8 @@ try
             global_context->setHTTPHeaderFilter(*config);
 
             global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
+            global_context->setClientHTTPHeaderForbiddenHeaders(server_settings_.get_client_http_header_forbidden_headers);
+            global_context->setAllowGetHTTPHeaderFunction(server_settings_.allow_get_client_http_header);
             global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
 
             ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 523301a8933..2e5d5562e6f 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -98,6 +98,8 @@ namespace DB
     M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
     M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
     M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
+    M(String, get_client_http_header_forbidden_headers, "", "Comma separated list of http header names that will not be returned by function getClientHTTPHeader.", 0) \
+    M(Bool, allow_get_client_http_header, false, "Allow function getClientHTTPHeader", 0) \
     M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0)
 
 DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
diff --git a/src/Functions/getClientHTTPHeader.cpp b/src/Functions/getClientHTTPHeader.cpp
new file mode 100644
index 00000000000..9a144f2c877
--- /dev/null
+++ b/src/Functions/getClientHTTPHeader.cpp
@@ -0,0 +1,119 @@
+#include <Functions/IFunction.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypeString.h>
+#include <Columns/ColumnString.h>
+#include <Interpreters/Context.h>
+#include <Common/CurrentThread.h>
+#include "Disks/DiskType.h"
+#include "Interpreters/Context_fwd.h"
+#include <Core/Field.h>
+#include <Poco/Net/NameValueCollection.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int ILLEGAL_COLUMN;
+    extern const int FUNCTION_NOT_ALLOWED;
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+/** Get the value of parameter in http headers.
+  * If there no such parameter or the method of request is not
+  * http, the function will throw an exception.
+  */
+class FunctionGetClientHTTPHeader : public IFunction, WithContext
+{
+private:
+
+public:
+    explicit FunctionGetClientHTTPHeader(ContextPtr context_): WithContext(context_) {}
+
+    static constexpr auto name = "getClientHTTPHeader";
+
+    static FunctionPtr create(ContextPtr context_)
+    {
+        return std::make_shared<FunctionGetClientHTTPHeader>(context_);
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    String getName() const override { return name; }
+
+    bool isDeterministic() const override { return false; }
+
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+
+    size_t getNumberOfArguments() const override
+    {
+        return 1;
+    }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!getContext()->allowGetHTTPHeaderFunction())
+            throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "The function {} is not enabled, you can set allow_get_client_http_header in config file.", getName());
+
+        if (!isString(arguments[0]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must have String type", getName());
+        return std::make_shared<DataTypeString>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        const auto & client_info = getContext()->getClientInfo();
+        const auto & method = client_info.http_method;
+        const auto & headers = client_info.headers;
+        const IColumn * arg_column = arguments[0].column.get();
+        const ColumnString * arg_string = checkAndGetColumn<ColumnString>(arg_column);
+
+        if (!arg_string)
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must be constant String", getName());
+
+        if (method != ClientInfo::HTTPMethod::GET && method != ClientInfo::HTTPMethod::POST)
+            return result_type->createColumnConstWithDefaultValue(input_rows_count);
+
+        auto result_column = ColumnString::create();
+
+        const String default_value;
+        String forbidden_header_names = getContext()->getClientHTTPHeaderForbiddenHeaders();
+        std::vector<String> forbidden_header_list;
+        boost::split(forbidden_header_list, forbidden_header_names, [](char c) { return c == ','; });
+        String header_list;
+
+        for (size_t row = 0; row < input_rows_count; ++row)
+        {
+            auto header_name = arg_string->getDataAt(row).toString();
+
+            if (!headers.has(header_name)) 
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} is not in HTTP request headers.", header_name);
+            else
+            {
+                auto it = std::find(forbidden_header_list.begin(), forbidden_header_list.end(), header_name);
+                if (it != forbidden_header_list.end())
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "The header {} is in headers_forbidden_to_return_list, you can config it in config file.", header_name);
+                    
+                const String & value = headers[header_name];
+                result_column->insertData(value.data(), value.size());
+            }
+        }
+
+        return result_column;
+    }
+};
+
+}
+
+REGISTER_FUNCTION(GetHttpHeader)
+{
+    factory.registerFunction<FunctionGetClientHTTPHeader>();
+}
+
+}
diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h
index 70524333047..81a81788d54 100644
--- a/src/Interpreters/ClientInfo.h
+++ b/src/Interpreters/ClientInfo.h
@@ -2,6 +2,7 @@
 
 #include <Core/UUID.h>
 #include <Poco/Net/SocketAddress.h>
+#include <Poco/Net/NameValueCollection.h>
 #include <base/types.h>
 #include <Common/OpenTelemetryTraceContext.h>
 #include <Common/VersionNumber.h>
@@ -96,6 +97,7 @@ public:
 
     /// For mysql and postgresql
     UInt64 connection_id = 0;
+    Poco::Net::NameValueCollection headers;
 
     /// Comma separated list of forwarded IP addresses (from X-Forwarded-For for HTTP interface).
     /// It's expected that proxy appends the forwarded address to the end of the list.
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 138741a2f2b..10bf54c8548 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -3,6 +3,7 @@
 #include <optional>
 #include <memory>
 #include <Poco/UUID.h>
+#include <Poco/Net/NameValueCollection.h>
 #include <Poco/Util/Application.h>
 #include <Common/Macros.h>
 #include <Common/EventNotifier.h>
@@ -316,6 +317,8 @@ struct ContextSharedPart : boost::noncopyable
     std::optional<MergeTreeSettings> merge_tree_settings TSA_GUARDED_BY(mutex);   /// Settings of MergeTree* engines.
     std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex);   /// Settings of ReplicatedMergeTree* engines.
     std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
+    String get_client_http_header_forbidden_headers;
+    bool allow_get_client_http_header;
     std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
     /// No lock required for format_schema_path modified only during initialization
     String format_schema_path;                              /// Path to a directory that contains schema files used by input formats.
@@ -3898,6 +3901,26 @@ void Context::checkTableCanBeDropped(const String & database, const String & tab
 }
 
 
+void Context::setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers)
+{
+    shared->get_client_http_header_forbidden_headers = forbidden_headers;
+}
+
+void Context::setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function)
+{
+    shared->allow_get_client_http_header= allow_get_http_header_function;
+}
+
+String Context::getClientHTTPHeaderForbiddenHeaders() const
+{
+    return shared->get_client_http_header_forbidden_headers;
+}
+
+bool Context::allowGetHTTPHeaderFunction() const
+{
+    return shared->allow_get_client_http_header;
+}
+
 void Context::setMaxPartitionSizeToDrop(size_t max_size)
 {
     // Is initialized at server startup and updated at config reload
@@ -4218,12 +4241,15 @@ void Context::setClientConnectionId(uint32_t connection_id_)
     client_info.connection_id = connection_id_;
 }
 
-void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
+void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers)
 {
     client_info.http_method = http_method;
     client_info.http_user_agent = http_user_agent;
     client_info.http_referer = http_referer;
     need_recalculate_access = true;
+
+    if (!http_headers.empty())
+        client_info.headers = http_headers;
 }
 
 void Context::setForwardedFor(const String & forwarded_for)
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index e12a5c4b69b..4574f4d530e 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -26,6 +26,8 @@
 #include <Server/HTTP/HTTPContext.h>
 #include <Storages/ColumnsDescription.h>
 #include <Storages/IStorage_fwd.h>
+#include <Poco/Net/NameValueCollection.h>
+#include <Core/Types.h>
 
 #include "config.h"
 
@@ -639,7 +641,7 @@ public:
     void setClientInterface(ClientInfo::Interface interface);
     void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
     void setClientConnectionId(uint32_t connection_id);
-    void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
+    void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers = {});
     void setForwardedFor(const String & forwarded_for);
     void setQueryKind(ClientInfo::QueryKind query_kind);
     void setQueryKindInitial();
@@ -1068,6 +1070,11 @@ public:
     /// Prevents DROP TABLE if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
     void setMaxTableSizeToDrop(size_t max_size);
     size_t getMaxTableSizeToDrop() const;
+    void setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers);
+    /// Return the forbiddent headers that users cant get via getClientHTTPHeader function
+    String getClientHTTPHeaderForbiddenHeaders() const;
+    void setAllowGetHTTPHeaderFunction(const bool allow_get_http_header_function);
+    bool allowGetHTTPHeaderFunction() const;
     void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size) const;
 
     /// Prevents DROP PARTITION if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp
index 162772061b5..b815b6195de 100644
--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@@ -15,6 +15,7 @@
 #include <Interpreters/Cluster.h>
 
 #include <magic_enum.hpp>
+#include <Poco/Net/NameValueCollection.h>
 
 #include <atomic>
 #include <condition_variable>
@@ -431,7 +432,7 @@ void Session::setClientConnectionId(uint32_t connection_id)
         prepared_client_info->connection_id = connection_id;
 }
 
-void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
+void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers)
 {
     if (session_context)
     {
@@ -442,6 +443,7 @@ void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String
         prepared_client_info->http_method = http_method;
         prepared_client_info->http_user_agent = http_user_agent;
         prepared_client_info->http_referer = http_referer;
+        prepared_client_info->headers = http_headers;
     }
 }
 
diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h
index 2249d8fbb2f..af8148698d3 100644
--- a/src/Interpreters/Session.h
+++ b/src/Interpreters/Session.h
@@ -5,6 +5,7 @@
 #include <Interpreters/ClientInfo.h>
 #include <Interpreters/Context_fwd.h>
 #include <Interpreters/SessionTracker.h>
+#include <Poco/Net/NameValueCollection.h>
 
 #include <chrono>
 #include <memory>
@@ -64,7 +65,7 @@ public:
     void setClientInterface(ClientInfo::Interface interface);
     void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
     void setClientConnectionId(uint32_t connection_id);
-    void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
+    void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers = {});
     void setForwardedFor(const String & forwarded_for);
     void setQuotaClientKey(const String & quota_key);
     void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index a2d067af387..375de4306e7 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -45,6 +45,7 @@
 #include <Poco/StreamCopier.h>
 #include <Poco/String.h>
 #include <Poco/Net/SocketAddress.h>
+#include <Poco/Net/NameValueCollection.h>
 
 #include <chrono>
 #include <sstream>
@@ -502,7 +503,7 @@ bool HTTPHandler::authenticateUser(
     else if (request.getMethod() == HTTPServerRequest::HTTP_POST)
         http_method = ClientInfo::HTTPMethod::POST;
 
-    session->setHttpClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", ""));
+    session->setHttpClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", ""), request);
     session->setForwardedFor(request.get("X-Forwarded-For", ""));
     session->setQuotaClientKey(quota_key);
 
diff --git a/tests/02876_formats_with_names_dont_use_header_test_tvb024u3.csvwithnames b/tests/02876_formats_with_names_dont_use_header_test_tvb024u3.csvwithnames
new file mode 100644
index 00000000000..bfde6bfa0b8
--- /dev/null
+++ b/tests/02876_formats_with_names_dont_use_header_test_tvb024u3.csvwithnames
@@ -0,0 +1,2 @@
+a,b,c
+1,2,3
diff --git a/tests/config/config.d/forbidden_get_client_http_headers.xml b/tests/config/config.d/forbidden_get_client_http_headers.xml
new file mode 100644
index 00000000000..cfecb015260
--- /dev/null
+++ b/tests/config/config.d/forbidden_get_client_http_headers.xml
@@ -0,0 +1,4 @@
+<clickhouse>
+    <get_client_http_header_forbidden_headers>FORBIDDEN-KEY1,FORBIDDEN-KEY2</get_client_http_header_forbidden_headers>    
+    <allow_get_client_http_header>1</allow_get_client_http_header>
+</clickhouse>
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index 379eea4dbbb..7bb0b965fbc 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -320,6 +320,7 @@ geoDistance
 geohashDecode
 geohashEncode
 geohashesInBox
+getClientHTTPHeader
 getMacro
 getOSKernelVersion
 getServerPort
diff --git a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference
new file mode 100644
index 00000000000..c86cbdef7fe
--- /dev/null
+++ b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference
@@ -0,0 +1,14 @@
+value
+value1	value2
+value1	value1	value2
+BAD_ARGUMENTS
+BAD_ARGUMENTS
+BAD_ARGUMENTS
+1	row1_value1	row1_value2	row1_value3	row1_value4	row1_value5	row1_value6	row1_value7
+2	row2_value1	row2_value2	row2_value3	row2_value4	row2_value5	row2_value6	row2_value7
+3							
+value_from_query_1	value_from_query_2	value_from_query_3	1	row1_value1	row1_value2	row1_value3	row1_value4	row1_value5	row1_value6	row1_value7
+value_from_query_1	value_from_query_2	value_from_query_3	2	row2_value1	row2_value2	row2_value3	row2_value4	row2_value5	row2_value6	row2_value7
+value_from_query_1	value_from_query_2	value_from_query_3	3							
+http_value1
+http_value2
diff --git a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
new file mode 100755
index 00000000000..5b1de0ebe86
--- /dev/null
+++ b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+echo "SELECT getClientHTTPHeader('key')" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' -H 'key: value' 'http://localhost:8123/' -d @-  
+
+echo "SELECT getClientHTTPHeader('key1'), getClientHTTPHeader('key2')" | curl -s -H 'X-Clickhouse-User: default' \
+    -H 'X-ClickHouse-Key: ' -H 'key1: value1' -H 'key2: value2' 'http://localhost:8123/' -d @-
+
+echo "SELECT getClientHTTPHeader('test-' || 'key' || '-1'), getClientHTTPHeader('test-key-1'), getClientHTTPHeader('key2')" | curl -s -H 'X-Clickhouse-User: default' \
+    -H 'X-ClickHouse-Key: ' -H 'test-key-1: value1' -H 'key2: value2' 'http://localhost:8123/' -d @-
+
+#Code: 36. DB::Exception: NOT-FOUND-KEY is not in HTTP request headers
+echo "SELECT getClientHTTPHeader('NOT-FOUND-KEY')"| curl -s -H 'X-Clickhouse-User: default' \
+    -H 'X-ClickHouse-Key: ' -H 'key1: value1' -H 'key2: value2' 'http://localhost:8123/' -d @- | grep -o -e BAD_ARGUMENTS
+
+#Code: 36. DB::Exception: The header FORBIDDEN-KEY is in headers_forbidden_to_return_list, you can config it in config file.
+echo "SELECT getClientHTTPHeader('FORBIDDEN-KEY')" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' -H 'FORBIDDEN-KEY1: forbbiden1' 'http://localhost:8123/' -d @-  | grep -o -e BAD_ARGUMENTS
+echo "SELECT getClientHTTPHeader('FORBIDDEN-KEY')" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' -H 'FORBIDDEN-KEY2: forbbiden2' 'http://localhost:8123/' -d @-  | grep -o -e BAD_ARGUMENTS
+
+db_name=${CLICKHOUSE_DATABASE}
+
+$CLICKHOUSE_CLIENT -q "CREATE DATABASE IF NOT EXISTS ${db_name};"
+
+$CLICKHOUSE_CLIENT -q "CREATE TABLE ${db_name}.02884_get_http_header
+     (id UInt32, 
+     http_key1 String DEFAULT getClientHTTPHeader('http_header_key1'),
+     http_key2 String DEFAULT getClientHTTPHeader('http_header_key2'),
+     http_key3 String DEFAULT getClientHTTPHeader('http_header_key3'),
+     http_key4 String DEFAULT getClientHTTPHeader('http_header_key4'),
+     http_key5 String DEFAULT getClientHTTPHeader('http_header_key5'),
+     http_key6 String DEFAULT getClientHTTPHeader('http_header_key6'),
+     http_key7 String DEFAULT getClientHTTPHeader('http_header_key7')
+     ) 
+     Engine=MergeTree()
+     ORDER BY id" 
+
+#Insert data via http request
+echo "INSERT INTO ${db_name}.02884_get_http_header (id) values (1)" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' \
+ -H 'http_header_key1: row1_value1'\
+ -H 'http_header_key2: row1_value2'\
+ -H 'http_header_key3: row1_value3'\
+ -H 'http_header_key4: row1_value4'\
+ -H 'http_header_key5: row1_value5'\
+ -H 'http_header_key6: row1_value6'\
+ -H 'http_header_key7: row1_value7' 'http://localhost:8123/' -d @-
+
+echo "INSERT INTO ${db_name}.02884_get_http_header (id) values (2)" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' \
+ -H 'http_header_key1: row2_value1'\
+ -H 'http_header_key2: row2_value2'\
+ -H 'http_header_key3: row2_value3'\
+ -H 'http_header_key4: row2_value4'\
+ -H 'http_header_key5: row2_value5'\
+ -H 'http_header_key6: row2_value6'\
+ -H 'http_header_key7: row2_value7' 'http://localhost:8123/' -d @-
+
+$CLICKHOUSE_CLIENT -q "SELECT id, http_key1, http_key2, http_key3, http_key4, http_key5, http_key6, http_key7 FROM ${db_name}.02884_get_http_header ORDER BY id;"
+#Insert data via tcp client
+$CLICKHOUSE_CLIENT --param_db="$db_name" -q "INSERT INTO ${db_name}.02884_get_http_header (id) values (3)"
+$CLICKHOUSE_CLIENT --param_db="$db_name" -q "SELECT * FROM ${db_name}.02884_get_http_header where id = 3"
+
+echo "SELECT getClientHTTPHeader('key_from_query_1'), getClientHTTPHeader('key_from_query_2'), getClientHTTPHeader('key_from_query_3'), * FROM ${db_name}.02884_get_http_header ORDER BY id" | curl -s -H 'X-Clickhouse-User: default' \
+    -H 'X-ClickHouse-Key: ' -H 'key_from_query_1: value_from_query_1' -H 'key_from_query_2: value_from_query_2' -H 'key_from_query_3: value_from_query_3' 'http://localhost:8123/' -d @-
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${db_name}.02884_get_http_header"
+
+$CLICKHOUSE_CLIENT -q "CREATE TABLE IF NOT EXISTS ${db_name}.02884_header_from_table (header_name String) Engine=Memory"
+$CLICKHOUSE_CLIENT -q "INSERT INTO ${db_name}.02884_header_from_table values ('http_key1'), ('http_key2')"
+
+echo "SELECT getClientHTTPHeader(header_name) as value from  (select * FROM ${db_name}.02884_header_from_table) order by value" | curl -s -H 'X-Clickhouse-User: default' \
+    -H 'X-ClickHouse-Key: ' -H 'http_key1: http_value1' -H 'http_key2: http_value2' 'http://localhost:8123/' -d @-
+
+$CLICKHOUSE_CLIENT -q "DROP DATABASE ${db_name}"
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index a1012678faf..242bdce925a 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1530,6 +1530,7 @@ geohashEncode
 geohashesInBox
 geoip
 geospatial
+getClientHTTPHeader
 getMacro
 getOSKernelVersion
 getServerPort

From 3eef0601e3404a9b1238f24f0f2deab571e4a17c Mon Sep 17 00:00:00 2001
From: santrancisco <san.tran@ebfe.pw>
Date: Tue, 21 Nov 2023 16:29:40 +1100
Subject: [PATCH 0832/1097] Fix file path validation for DatabaseFileSystem

---
 src/Databases/DatabaseFilesystem.cpp                |  6 ++++--
 .../02921_database_filesystem_path_check.reference  |  2 ++
 .../02921_database_filesystem_path_check.sh         | 13 +++++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02921_database_filesystem_path_check.reference
 create mode 100755 tests/queries/0_stateless/02921_database_filesystem_path_check.sh

diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp
index 49f260034db..b6a5f95a5f7 100644
--- a/src/Databases/DatabaseFilesystem.cpp
+++ b/src/Databases/DatabaseFilesystem.cpp
@@ -40,13 +40,15 @@ DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path
     {
         path = user_files_path / path;
     }
-    else if (!is_local && !pathStartsWith(fs::path(path), user_files_path))
+    
+    path = fs::absolute(path).lexically_normal();
+    
+    if (!is_local && !pathStartsWith(fs::path(path), user_files_path))
     {
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
                         "Path must be inside user-files path: {}", user_files_path.string());
     }
 
-    path = fs::absolute(path).lexically_normal();
     if (!fs::exists(path))
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path does not exist: {}", path);
 }
diff --git a/tests/queries/0_stateless/02921_database_filesystem_path_check.reference b/tests/queries/0_stateless/02921_database_filesystem_path_check.reference
new file mode 100644
index 00000000000..21ebc92b43f
--- /dev/null
+++ b/tests/queries/0_stateless/02921_database_filesystem_path_check.reference
@@ -0,0 +1,2 @@
+Path must be inside user-files path
+Path must be inside user-files path
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02921_database_filesystem_path_check.sh b/tests/queries/0_stateless/02921_database_filesystem_path_check.sh
new file mode 100755
index 00000000000..79315d89f07
--- /dev/null
+++ b/tests/queries/0_stateless/02921_database_filesystem_path_check.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+function get_exception_message()
+{
+  $CLICKHOUSE_CLIENT --query "$1" |& grep -o 'Path must.*path'
+}
+
+get_exception_message "create database db_filesystem ENGINE=Filesystem('/etc');"
+get_exception_message "create database db_filesystem ENGINE=Filesystem('../../../../../../../../etc')';"
\ No newline at end of file

From f4d936fc554ae8af7a2cf01721995aa8e5b143a7 Mon Sep 17 00:00:00 2001
From: santrancisco <san.tran@ebfe.pw>
Date: Tue, 21 Nov 2023 17:39:12 +1100
Subject: [PATCH 0833/1097] Fix style

---
 src/Databases/DatabaseFilesystem.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp
index b6a5f95a5f7..ca1b5b27a59 100644
--- a/src/Databases/DatabaseFilesystem.cpp
+++ b/src/Databases/DatabaseFilesystem.cpp
@@ -40,9 +40,9 @@ DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path
     {
         path = user_files_path / path;
     }
-    
+
     path = fs::absolute(path).lexically_normal();
-    
+
     if (!is_local && !pathStartsWith(fs::path(path), user_files_path))
     {
         throw Exception(ErrorCodes::BAD_ARGUMENTS,

From 15234474d7a570f6b4fc0c4eae5cfb3718449316 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 7 Nov 2023 10:03:57 +0000
Subject: [PATCH 0834/1097] Implement system table blob_storage_log

---
 .../settings.md                               |  2 +-
 .../system-tables/blob_storage_log.md         | 59 +++++++++++
 programs/server/config.xml                    | 10 ++
 src/Backups/BackupIO_S3.cpp                   | 39 +++++++-
 src/Backups/BackupIO_S3.h                     |  6 +-
 src/Common/SystemLogBase.cpp                  |  1 +
 src/Common/SystemLogBase.h                    |  3 +-
 src/Coordination/KeeperSnapshotManagerS3.cpp  |  5 +-
 src/Coordination/Standalone/Context.cpp       |  5 +
 src/Coordination/Standalone/Context.h         |  2 +
 src/Disks/IO/ReadBufferFromRemoteFSGather.cpp |  2 +-
 .../ObjectStorages/DiskObjectStorage.cpp      |  9 +-
 .../DiskObjectStorageTransaction.cpp          |  4 +-
 src/Disks/ObjectStorages/IObjectStorage.h     |  4 -
 .../MetadataStorageFromDisk.cpp               |  2 +-
 .../MetadataStorageFromPlainObjectStorage.cpp |  2 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     | 28 +++++-
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  6 +-
 .../ObjectStorages/S3/registerDiskS3.cpp      | 11 ++-
 src/Disks/ObjectStorages/StoredObject.h       | 21 +---
 ...etadataStorageFromStaticFilesWebServer.cpp |  2 +-
 src/IO/S3/BlobStorageLogWriter.cpp            | 72 ++++++++++++++
 src/IO/S3/BlobStorageLogWriter.h              | 57 +++++++++++
 src/IO/S3/copyS3File.cpp                      | 53 ++++++++--
 src/IO/S3/copyS3File.h                        |  3 +
 src/IO/S3/tests/gtest_aws_s3_client.cpp       |  3 +-
 src/IO/WriteBufferFromS3.cpp                  | 26 ++++-
 src/IO/WriteBufferFromS3.h                    |  4 +
 src/IO/tests/gtest_writebuffer_s3.cpp         |  3 +-
 src/Interpreters/BlobStorageLog.cpp           | 92 +++++++++++++++++
 src/Interpreters/BlobStorageLog.h             | 57 +++++++++++
 src/Interpreters/Context.cpp                  | 11 ++-
 src/Interpreters/Context.h                    |  2 +
 src/Interpreters/InterpreterSystemQuery.cpp   |  1 +
 src/Interpreters/SystemLog.cpp                |  4 +
 src/Interpreters/SystemLog.h                  |  3 +
 src/Storages/S3Queue/StorageS3Queue.cpp       |  7 +-
 src/Storages/S3Queue/StorageS3Queue.h         |  3 +
 src/Storages/StorageAzureBlob.cpp             |  2 +-
 src/Storages/StorageS3.cpp                    | 19 +++-
 src/Storages/StorageS3.h                      |  1 +
 tests/config/config.d/blob_storage_log.xml    |  9 ++
 tests/config/install.sh                       |  1 +
 .../configs/blob_log.xml                      |  9 ++
 .../test_backup_restore_s3/test.py            | 28 +++++-
 .../configs/config.d/blob_log.xml             |  9 ++
 tests/integration/test_merge_tree_s3/test.py  | 99 ++++++++++++++++---
 .../test_storage_s3/configs/blob_log.xml      |  9 ++
 tests/integration/test_storage_s3/test.py     | 58 +++++++++--
 49 files changed, 782 insertions(+), 86 deletions(-)
 create mode 100644 docs/en/operations/system-tables/blob_storage_log.md
 create mode 100644 src/IO/S3/BlobStorageLogWriter.cpp
 create mode 100644 src/IO/S3/BlobStorageLogWriter.h
 create mode 100644 src/Interpreters/BlobStorageLog.cpp
 create mode 100644 src/Interpreters/BlobStorageLog.h
 create mode 100644 tests/config/config.d/blob_storage_log.xml
 create mode 100644 tests/integration/test_backup_restore_s3/configs/blob_log.xml
 create mode 100644 tests/integration/test_merge_tree_s3/configs/config.d/blob_log.xml
 create mode 100644 tests/integration/test_storage_s3/configs/blob_log.xml

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index cfc5a939a0e..3e4f1f4313f 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -2740,7 +2740,7 @@ ClickHouse will use it to form the proxy URI using the following template: `{pro
             <proxy_cache_time>10</proxy_cache_time>
         </resolver>
     </http>
-    
+
     <https>
         <resolver>
             <endpoint>http://resolver:8080/hostname</endpoint>
diff --git a/docs/en/operations/system-tables/blob_storage_log.md b/docs/en/operations/system-tables/blob_storage_log.md
new file mode 100644
index 00000000000..db08b0c583d
--- /dev/null
+++ b/docs/en/operations/system-tables/blob_storage_log.md
@@ -0,0 +1,59 @@
+---
+slug: /en/operations/system-tables/blob_storage_log
+---
+# Blob Storage Operations Log
+
+Contains logging entries with information about various blob storage operations such as uploads and deletes.
+
+Columns:
+
+- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the event.
+- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event.
+- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the event with microseconds precision.
+- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event. Possible values:
+    - `'Upload'`
+    - `'Delete'`
+    - `'MultiPartUploadCreate'`
+    - `'MultiPartUploadWrite'`
+    - `'MultiPartUploadComplete'`
+    - `'MultiPartUploadAbort'`
+- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the query associated with the event, if any.
+- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Identifier of the thread performing the operation.
+- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread performing the operation.
+- `disk_name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the associated disk.
+- `bucket` ([String](../../sql-reference/data-types/string.md)) — Name of the bucket.
+- `remote_path` ([String](../../sql-reference/data-types/string.md)) — Path to the remote resource.
+- `local_path` ([String](../../sql-reference/data-types/string.md)) — Path to the metadata file on the local system, which references the remote resource.
+- `data_size` ([UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the data involved in the upload event.
+- `error` ([String](../../sql-reference/data-types/string.md)) — Error message associated with the event, if any.
+
+**Example**
+
+Suppose a blob storage operation uploads a file, and an event is logged:
+
+```sql
+SELECT * FROM system.blob_storage_log WHERE query_id = '7afe0450-504d-4e4b-9a80-cd9826047972' ORDER BY event_date, event_time_microseconds \G
+```
+
+```text
+Row 1:
+──────
+event_date:              2023-10-31
+event_time:              2023-10-31 16:03:40
+event_time_microseconds: 2023-10-31 16:03:40.481437
+event_type:              Upload
+query_id:                7afe0450-504d-4e4b-9a80-cd9826047972
+thread_id:               2381740
+disk_name:               disk_s3
+bucket:                  bucket1
+remote_path:             rrr/kxo/tbnqtrghgtnxkzgtcrlutwuslgawe
+local_path:              store/654/6549e8b3-d753-4447-8047-d462df6e6dbe/tmp_insert_all_1_1_0/checksums.txt
+data_size:               259
+error:
+```
+
+In this example, upload operation was associated with the `INSERT` query with ID `7afe0450-504d-4e4b-9a80-cd9826047972`. The local metadata file `store/654/6549e8b3-d753-4447-8047-d462df6e6dbe/tmp_insert_all_1_1_0/checksums.txt` refers to remote path `rrr/kxo/tbnqtrghgtnxkzgtcrlutwuslgawe` in bucket `bucket1` on disk `disk_s3`, with a size of 259 bytes.
+
+**See Also**
+
+- [External Disks for Storing Data](../../operations/storing-data.md)
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 7800aa51166..4d7d9ab4d5a 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1248,6 +1248,16 @@
         <flush_interval_milliseconds>7500</flush_interval_milliseconds>
     </backup_log>
 
+    <!-- Blob storage object operations log.
+    -->
+    <blob_storage_log>
+        <database>system</database>
+        <table>blob_storage_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <ttl>event_date + INTERVAL 30 DAY</ttl>
+    </blob_storage_log>
+
     <!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
     <!-- Custom TLD lists.
          Format: <name>/path/to/file</name>
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index ea3f57c27ff..fecb97dbbaf 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -127,6 +127,9 @@ BackupReaderS3::BackupReaderS3(
     request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
     request_settings.allow_native_copy = allow_s3_native_copy;
     client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
+
+    if (auto blob_storage_system_log = context_->getBlobStorageLog())
+        blob_storage_log = std::make_shared<BlobStorageLogWriter>(blob_storage_system_log);
 }
 
 BackupReaderS3::~BackupReaderS3() = default;
@@ -178,6 +181,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                 /* dest_key= */ blob_path[0],
                 s3_settings.request_settings,
                 read_settings,
+                blob_storage_log,
                 object_attributes,
                 threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupReaderS3"),
                 /* for_disk_s3= */ true);
@@ -214,6 +218,12 @@ BackupWriterS3::BackupWriterS3(
     request_settings.allow_native_copy = allow_s3_native_copy;
     request_settings.setStorageClassName(storage_class_name);
     client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
+    if (auto blob_storage_system_log = context_->getBlobStorageLog())
+    {
+        blob_storage_log = std::make_shared<BlobStorageLogWriter>(blob_storage_system_log);
+        if (context_->hasQueryContext())
+            blob_storage_log->query_id = context_->getQueryContext()->getCurrentQueryId();
+    }
 }
 
 void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
@@ -239,6 +249,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
                 fs::path(s3_uri.key) / path_in_backup,
                 s3_settings.request_settings,
                 read_settings,
+                blob_storage_log,
                 {},
                 threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
             return; /// copied!
@@ -262,13 +273,15 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
         fs::path(s3_uri.key) / destination,
         s3_settings.request_settings,
         read_settings,
+        blob_storage_log,
         {},
         threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
 void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
-    copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, {},
+    copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup,
+                     s3_settings.request_settings, blob_storage_log, {},
                      threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
@@ -302,6 +315,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
         fs::path(s3_uri.key) / file_name,
         DBMS_DEFAULT_BUFFER_SIZE,
         s3_settings.request_settings,
+        blob_storage_log,
         std::nullopt,
         threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"),
         write_settings);
@@ -311,8 +325,19 @@ void BackupWriterS3::removeFile(const String & file_name)
 {
     S3::DeleteObjectRequest request;
     request.SetBucket(s3_uri.bucket);
-    request.SetKey(fs::path(s3_uri.key) / file_name);
+    auto key = fs::path(s3_uri.key) / file_name;
+    request.SetKey(key);
+
     auto outcome = client->DeleteObject(request);
+
+    if (blob_storage_log)
+    {
+        blob_storage_log->addEvent(
+            BlobStorageLogElement::EventType::Delete,
+            s3_uri.bucket, key, /* local_path */ "", /* data_size */ 0,
+            outcome.IsSuccess() ? nullptr : &outcome.GetError());
+    }
+
     if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
         throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
 }
@@ -371,6 +396,16 @@ void BackupWriterS3::removeFilesBatch(const Strings & file_names)
         request.SetDelete(delkeys);
 
         auto outcome = client->DeleteObjects(request);
+
+        if (blob_storage_log)
+        {
+            const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError();
+            auto time_now = std::chrono::system_clock::now();
+            for (const auto & obj : current_chunk)
+                blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, s3_uri.bucket, obj.GetKey(),
+                                          /* local_path */ "", /* data_size */ 0, outcome_error, time_now);
+        }
+
         if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
             throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
     }
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index c00ce747ff5..8ab8e1fb566 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -8,7 +8,7 @@
 #include <IO/S3Common.h>
 #include <Storages/StorageS3Settings.h>
 #include <Interpreters/Context_fwd.h>
-
+#include <IO/S3/BlobStorageLogWriter.h>
 
 namespace DB
 {
@@ -32,6 +32,8 @@ private:
     const DataSourceDescription data_source_description;
     S3Settings s3_settings;
     std::shared_ptr<S3::Client> client;
+
+    BlobStorageLogWriterPtr blob_storage_log;
 };
 
 
@@ -63,6 +65,8 @@ private:
     S3Settings s3_settings;
     std::shared_ptr<S3::Client> client;
     std::optional<bool> supports_batch_delete;
+
+    BlobStorageLogWriterPtr blob_storage_log;
 };
 
 }
diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 45e08d28f2d..a0b3d411e38 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -17,6 +17,7 @@
 #include <Interpreters/TransactionsInfoLog.h>
 #include <Interpreters/AsynchronousInsertLog.h>
 #include <Interpreters/BackupLog.h>
+#include <IO/S3/BlobStorageLogWriter.h>
 
 #include <Common/MemoryTrackerBlockerInThread.h>
 #include <Common/SystemLogBase.h>
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 6efab699b75..1ced313b36a 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -31,7 +31,8 @@
     M(FilesystemCacheLogElement) \
     M(FilesystemReadPrefetchesLogElement) \
     M(AsynchronousInsertLogElement) \
-    M(BackupLogElement)
+    M(BackupLogElement) \
+    M(BlobStorageLogElement)
 
 namespace Poco
 {
diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp
index bedde0d7b39..ee62f6471e7 100644
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@@ -147,12 +147,14 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
 
         const auto create_writer = [&](const auto & key)
         {
+            /// blob_storage_log is not used for keeper
             return WriteBufferFromS3(
                 s3_client->client,
                 s3_client->uri.bucket,
                 key,
                 DBMS_DEFAULT_BUFFER_SIZE,
-                request_settings_1
+                request_settings_1,
+                /* blob_log */ {}
             );
         };
 
@@ -214,6 +216,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
                 delete_request.SetBucket(s3_client->uri.bucket);
                 delete_request.SetKey(lock_file);
                 auto delete_outcome = s3_client->client->DeleteObject(delete_request);
+
                 if (!delete_outcome.IsSuccess())
                     throw S3Exception(delete_outcome.GetError().GetMessage(), delete_outcome.GetError().GetErrorType());
             }
diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp
index 6942e866f23..dba4a8934b9 100644
--- a/src/Coordination/Standalone/Context.cpp
+++ b/src/Coordination/Standalone/Context.cpp
@@ -235,6 +235,11 @@ std::shared_ptr<FilesystemReadPrefetchesLog> Context::getFilesystemReadPrefetche
     return nullptr;
 }
 
+std::shared_ptr<BlobStorageLog> Context::getBlobStorageLog() const
+{
+    return nullptr;
+}
+
 void Context::setConfig(const ConfigurationPtr & config)
 {
     auto lock = getGlobalLock();
diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h
index bcaf8687d71..7555618233d 100644
--- a/src/Coordination/Standalone/Context.h
+++ b/src/Coordination/Standalone/Context.h
@@ -27,6 +27,7 @@ struct ContextSharedPart;
 class Macros;
 class FilesystemCacheLog;
 class FilesystemReadPrefetchesLog;
+class BlobStorageLog;
 
 /// A small class which owns ContextShared.
 /// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete.
@@ -115,6 +116,7 @@ public:
 
     std::shared_ptr<FilesystemCacheLog> getFilesystemCacheLog() const;
     std::shared_ptr<FilesystemReadPrefetchesLog> getFilesystemReadPrefetchesLog() const;
+    std::shared_ptr<BlobStorageLog> getBlobStorageLog() const;
 
     enum class ApplicationType
     {
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index 421a79d71cc..e0ce388bc89 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -209,7 +209,7 @@ void ReadBufferFromRemoteFSGather::setReadUntilPosition(size_t position)
 
 void ReadBufferFromRemoteFSGather::reset()
 {
-    current_object = {};
+    current_object = StoredObject();
     current_buf_idx = {};
     current_buf.reset();
 }
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index c1f053be7c6..ff9b429308e 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -519,7 +519,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(
 
     return object_storage->readObjects(
         storage_objects,
-        object_storage->getAdjustedSettingsFromMetadataFile(updateResourceLink(settings, getReadResourceName()), path),
+        updateResourceLink(settings, getReadResourceName()),
         read_hint,
         file_size);
 }
@@ -532,12 +532,9 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorage::writeFile(
 {
     LOG_TEST(log, "Write file: {}", path);
 
+    WriteSettings write_settings = updateResourceLink(settings, getWriteResourceName());
     auto transaction = createObjectStorageTransaction();
-    return transaction->writeFile(
-        path,
-        buf_size,
-        mode,
-        object_storage->getAdjustedSettingsFromMetadataFile(updateResourceLink(settings, getWriteResourceName()), path));
+    return transaction->writeFile(path, buf_size, mode, write_settings);
 }
 
 Strings DiskObjectStorage::getBlobPath(const String & path) const
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index 25de89a9548..83a229c3533 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -684,7 +684,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
     }
 
     /// seems ok
-    auto object = StoredObject(object_key.serialize());
+    auto object = StoredObject(object_key.serialize(), path);
     std::function<void(size_t count)> create_metadata_callback;
 
     if (autocommit)
@@ -782,7 +782,7 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction(
     }
 
     /// seems ok
-    auto object = StoredObject(object_key.serialize());
+    auto object = StoredObject(object_key.serialize(), path);
     auto write_operation = std::make_unique<WriteFileObjectStorageOperation>(object_storage, metadata_storage, object);
 
     operations_to_execute.emplace_back(std::move(write_operation));
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 1918c197577..72a4c432b20 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -206,10 +206,6 @@ public:
 
     virtual bool supportParallelWrite() const { return false; }
 
-    virtual ReadSettings getAdjustedSettingsFromMetadataFile(const ReadSettings & settings, const std::string & /* path */) const { return settings; }
-
-    virtual WriteSettings getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & /* path */) const { return settings; }
-
     virtual ReadSettings patchSettings(const ReadSettings & read_settings) const;
 
     virtual WriteSettings patchSettings(const WriteSettings & write_settings) const;
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
index 91234a3fa05..9b9c4eb388c 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
@@ -141,7 +141,7 @@ StoredObjects MetadataStorageFromDisk::getStorageObjects(const std::string & pat
     objects.reserve(keys_with_meta.size());
     for (const auto & [object_key, object_meta] : keys_with_meta)
     {
-        objects.emplace_back(object_key.serialize(), object_meta.size_bytes, path);
+        objects.emplace_back(object_key.serialize(), path, object_meta.size_bytes);
     }
 
     return objects;
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
index 5f1d1f7f7f2..f20cd67a39f 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
@@ -106,7 +106,7 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std
 {
     size_t object_size = getFileSize(path);
     auto object_key = object_storage->generateObjectKeyForPath(path);
-    return {StoredObject(object_key.serialize(), object_size, path)};
+    return {StoredObject(object_key.serialize(), path, object_size)};
 }
 
 const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 308db389ee1..a84d4c8f4a1 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -15,6 +15,8 @@
 #include <IO/S3/copyS3File.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
+#include <IO/S3/BlobStorageLogWriter.h>
+
 #include <Disks/ObjectStorages/S3/diskSettings.h>
 
 #include <Common/getRandomASCIIString.h>
@@ -249,12 +251,18 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
     if (write_settings.s3_allow_parallel_part_upload)
         scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "VFSWrite");
 
+
+    auto blob_storage_log = BlobStorageLogWriter::create(disk_name);
+    if (blob_storage_log)
+        blob_storage_log->local_path = object.local_path;
+
     return std::make_unique<WriteBufferFromS3>(
         client.get(),
         bucket,
         object.remote_path,
         buf_size,
         settings_ptr->request_settings,
+        std::move(blob_storage_log),
         attributes,
         std::move(scheduler),
         disk_write_settings);
@@ -321,6 +329,10 @@ void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exis
     request.SetBucket(bucket);
     request.SetKey(object.remote_path);
     auto outcome = client.get()->DeleteObject(request);
+    if (auto blob_storage_log = BlobStorageLogWriter::create(disk_name))
+        blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete,
+                                   bucket, object.remote_path, object.local_path, object.bytes_size,
+                                   outcome.IsSuccess() ? nullptr : &outcome.GetError());
 
     throwIfUnexpectedError(outcome, if_exists);
 
@@ -344,6 +356,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
         size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete;
         size_t current_position = 0;
 
+        auto blob_storage_log = BlobStorageLogWriter::create(disk_name);
         while (current_position < objects.size())
         {
             std::vector<Aws::S3::Model::ObjectIdentifier> current_chunk;
@@ -369,9 +382,18 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
             request.SetDelete(delkeys);
             auto outcome = client.get()->DeleteObjects(request);
 
-            throwIfUnexpectedError(outcome, if_exists);
+            if (blob_storage_log)
+            {
+                const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError();
+                auto time_now = std::chrono::system_clock::now();
+                for (const auto & object : objects)
+                    blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete,
+                                               bucket, object.remote_path, object.local_path, object.bytes_size,
+                                               outcome_error, time_now);
+            }
 
             LOG_DEBUG(log, "Objects with paths [{}] were removed from S3", keys);
+            throwIfUnexpectedError(outcome, if_exists);
         }
     }
 }
@@ -450,6 +472,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
             object_to.remote_path,
             settings_ptr->request_settings,
             patchSettings(read_settings),
+            BlobStorageLogWriter::create(disk_name),
             object_to_attributes,
             scheduler,
             /* for_disk_s3= */ true);
@@ -478,6 +501,7 @@ void S3ObjectStorage::copyObject( // NOLINT
         object_to.remote_path,
         settings_ptr->request_settings,
         patchSettings(read_settings),
+        BlobStorageLogWriter::create(disk_name),
         object_to_attributes,
         scheduler,
         /* for_disk_s3= */ true);
@@ -520,7 +544,7 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
     return std::make_unique<S3ObjectStorage>(
         std::move(new_client), std::move(new_s3_settings),
         version_id, s3_capabilities, new_namespace,
-        endpoint, object_key_prefix);
+        endpoint, object_key_prefix, disk_name);
 }
 
 ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string &) const
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 7d14482311f..fdf82430812 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -50,9 +50,11 @@ private:
         const S3Capabilities & s3_capabilities_,
         String bucket_,
         String connection_string,
-        String object_key_prefix_)
+        String object_key_prefix_,
+        const String & disk_name_)
         : bucket(std::move(bucket_))
         , object_key_prefix(std::move(object_key_prefix_))
+        , disk_name(disk_name_)
         , client(std::move(client_))
         , s3_settings(std::move(s3_settings_))
         , s3_capabilities(s3_capabilities_)
@@ -173,7 +175,7 @@ private:
 private:
     std::string bucket;
     String object_key_prefix;
-
+    std::string disk_name;
 
     MultiVersion<S3::Client> client;
     MultiVersion<S3ObjectStorageSettings> s3_settings;
diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
index 663d8b777e8..7543fb94331 100644
--- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
+++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
@@ -116,6 +116,7 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
         MetadataStoragePtr metadata_storage;
         auto settings = getSettings(config, config_prefix, context);
         auto client = getClient(config, config_prefix, context, *settings);
+
         if (type == "s3_plain")
         {
             /// send_metadata changes the filenames (includes revision), while
@@ -127,14 +128,18 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "s3_plain does not supports send_metadata");
 
             s3_storage = std::make_shared<S3PlainObjectStorage>(
-                std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, uri.key);
-
+                std::move(client), std::move(settings),
+                uri.version_id, s3_capabilities,
+                uri.bucket, uri.endpoint, uri.key, name);
             metadata_storage = std::make_shared<MetadataStorageFromPlainObjectStorage>(s3_storage, uri.key);
         }
         else
         {
             s3_storage = std::make_shared<S3ObjectStorage>(
-                std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, uri.key);
+                std::move(client), std::move(settings),
+                uri.version_id, s3_capabilities,
+                uri.bucket, uri.endpoint, uri.key, name);
+
             auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
             metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri.key);
         }
diff --git a/src/Disks/ObjectStorages/StoredObject.h b/src/Disks/ObjectStorages/StoredObject.h
index 4a03743e310..af2fc127ff4 100644
--- a/src/Disks/ObjectStorages/StoredObject.h
+++ b/src/Disks/ObjectStorages/StoredObject.h
@@ -19,23 +19,10 @@ struct StoredObject
 
     uint64_t bytes_size = 0;
 
-    StoredObject() = default;
-
-    explicit StoredObject(String remote_path_)
-        : remote_path(std::move(remote_path_))
-    {}
-
-    StoredObject(
-        String remote_path_,
-        uint64_t bytes_size_)
-        : remote_path(std::move(remote_path_))
-        , bytes_size(bytes_size_)
-    {}
-
-    StoredObject(
-        String remote_path_,
-        uint64_t bytes_size_,
-        String local_path_)
+    explicit StoredObject(
+        const String & remote_path_ = "",
+        const String & local_path_ = "",
+        uint64_t bytes_size_ = 0)
         : remote_path(std::move(remote_path_))
         , local_path(std::move(local_path_))
         , bytes_size(bytes_size_)
diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
index 2d1ae41eb05..9eb42a12f66 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
@@ -87,7 +87,7 @@ StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const s
     remote_path = remote_path.substr(object_storage.url.size());
 
     std::shared_lock shared_lock(object_storage.metadata_mutex);
-    return {StoredObject(remote_path, object_storage.files.at(path).size, path)};
+    return {StoredObject(remote_path, path, object_storage.files.at(path).size)};
 }
 
 std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const
diff --git a/src/IO/S3/BlobStorageLogWriter.cpp b/src/IO/S3/BlobStorageLogWriter.cpp
new file mode 100644
index 00000000000..fe33f1c8799
--- /dev/null
+++ b/src/IO/S3/BlobStorageLogWriter.cpp
@@ -0,0 +1,72 @@
+#include <IO/S3/BlobStorageLogWriter.h>
+
+#if USE_AWS_S3
+
+#include <base/getThreadId.h>
+#include <Common/setThreadName.h>
+#include <IO/S3/Client.h>
+#include <Interpreters/Context.h>
+
+namespace DB
+{
+
+void BlobStorageLogWriter::addEvent(
+    BlobStorageLogElement::EventType event_type,
+    const String & bucket,
+    const String & remote_path,
+    const String & local_path_,
+    size_t data_size,
+    const Aws::S3::S3Error * error,
+    BlobStorageLogElement::EvenTime time_now)
+{
+    if (!log)
+        return;
+
+    if (!time_now.time_since_epoch().count())
+        time_now = std::chrono::system_clock::now();
+
+    BlobStorageLogElement element;
+
+    element.event_type = event_type;
+
+    element.query_id = query_id;
+    element.thread_id = getThreadId();
+    element.thread_name = getThreadName();
+
+    element.disk_name = disk_name;
+    element.bucket = bucket;
+    element.remote_path = remote_path;
+    element.local_path = local_path_.empty() ? local_path : local_path_;
+    element.data_size = data_size;
+
+    if (error)
+    {
+        element.error_code = static_cast<Int32>(error->GetErrorType());
+        element.error_message = error->GetMessage();
+    }
+
+    element.event_time = time_now;
+
+    log->add(element);
+}
+
+BlobStorageLogWriterPtr BlobStorageLogWriter::create(const String & disk_name)
+{
+#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// Keeper standalone build doesn't have a context
+    if (auto blob_storage_log = Context::getGlobalContextInstance()->getBlobStorageLog())
+    {
+        auto log_writer = std::make_shared<BlobStorageLogWriter>(std::move(blob_storage_log));
+
+        log_writer->disk_name = disk_name;
+        if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext())
+            log_writer->query_id = CurrentThread::getQueryId();
+
+        return log_writer;
+    }
+#endif
+    return {};
+}
+
+}
+
+#endif
diff --git a/src/IO/S3/BlobStorageLogWriter.h b/src/IO/S3/BlobStorageLogWriter.h
new file mode 100644
index 00000000000..a218a6e972f
--- /dev/null
+++ b/src/IO/S3/BlobStorageLogWriter.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#include <Interpreters/BlobStorageLog.h>
+
+#include "config.h"
+
+#if USE_AWS_S3
+
+namespace Aws::S3
+{
+    class S3Error;
+}
+
+namespace DB
+{
+
+using BlobStorageLogPtr = std::shared_ptr<BlobStorageLog>;
+
+class BlobStorageLogWriter;
+using BlobStorageLogWriterPtr = std::shared_ptr<BlobStorageLogWriter>;
+
+/// Helper class tp write events to BlobStorageLog
+/// Can additionally hold some context information
+class BlobStorageLogWriter : private boost::noncopyable
+{
+public:
+    BlobStorageLogWriter() = default;
+
+    explicit BlobStorageLogWriter(BlobStorageLogPtr log_)
+        : log(std::move(log_))
+    {}
+
+    void addEvent(
+        BlobStorageLogElement::EventType event_type,
+        const String & bucket,
+        const String & remote_path,
+        const String & local_path,
+        size_t data_size,
+        const Aws::S3::S3Error * error,
+        BlobStorageLogElement::EvenTime time_now = {});
+
+    bool isInitialized() const { return log != nullptr; }
+
+    /// Optional context information
+    String disk_name;
+    String query_id;
+    String local_path;
+
+    static BlobStorageLogWriterPtr create(const String & disk_name = "");
+
+private:
+    BlobStorageLogPtr log;
+};
+
+}
+
+#endif
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index 30da1c580c1..5e8f2e1911f 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -4,6 +4,7 @@
 
 #include <Common/ProfileEvents.h>
 #include <Common/typeid_cast.h>
+#include <IO/S3/BlobStorageLogWriter.h>
 #include <Interpreters/Context.h>
 #include <IO/LimitSeekableReadBuffer.h>
 #include <IO/S3/getObjectInfo.h>
@@ -59,6 +60,7 @@ namespace
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_s3_,
+            BlobStorageLogWriterPtr blob_storage_log_,
             const Poco::Logger * log_)
             : client_ptr(client_ptr_)
             , dest_bucket(dest_bucket_)
@@ -68,6 +70,7 @@ namespace
             , object_metadata(object_metadata_)
             , schedule(schedule_)
             , for_disk_s3(for_disk_s3_)
+            , blob_storage_log(blob_storage_log_)
             , log(log_)
         {
         }
@@ -83,6 +86,7 @@ namespace
         const std::optional<std::map<String, String>> & object_metadata;
         ThreadPoolCallbackRunner<void> schedule;
         bool for_disk_s3;
+        BlobStorageLogWriterPtr blob_storage_log;
         const Poco::Logger * log;
 
         struct UploadPartTask
@@ -132,6 +136,10 @@ namespace
                 ProfileEvents::increment(ProfileEvents::DiskS3CreateMultipartUpload);
 
             auto outcome = client_ptr->CreateMultipartUpload(request);
+            if (blob_storage_log)
+                blob_storage_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadCreate,
+                                           dest_bucket, dest_key, /* local_path_ */ {}, /* data_size */ 0,
+                                           outcome.IsSuccess() ? nullptr : &outcome.GetError());
 
             if (outcome.IsSuccess())
             {
@@ -178,6 +186,16 @@ namespace
 
                 auto outcome = client_ptr->CompleteMultipartUpload(request);
 
+                if (blob_storage_log)
+                    blob_storage_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadComplete,
+                                               dest_bucket, dest_key, /* local_path_ */ {}, /* data_size */ 0,
+                                               outcome.IsSuccess() ? nullptr : &outcome.GetError());
+
+                if (blob_storage_log)
+                    blob_storage_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadComplete,
+                                               dest_bucket, dest_key, /* local_path_ */ {}, /* data_size */ 0,
+                                               outcome.IsSuccess() ? nullptr : &outcome.GetError());
+
                 if (outcome.IsSuccess())
                 {
                     LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", dest_bucket, dest_key, multipart_upload_id, part_tags.size());
@@ -206,7 +224,12 @@ namespace
             abort_request.SetBucket(dest_bucket);
             abort_request.SetKey(dest_key);
             abort_request.SetUploadId(multipart_upload_id);
-            client_ptr->AbortMultipartUpload(abort_request);
+            auto outcome = client_ptr->AbortMultipartUpload(abort_request);
+            if (blob_storage_log)
+                blob_storage_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadAbort,
+                                           dest_bucket, dest_key, /* local_path_ */ {}, /* data_size */ 0,
+                                           outcome.IsSuccess() ? nullptr : &outcome.GetError());
+
             multipart_upload_aborted = true;
         }
 
@@ -435,8 +458,9 @@ namespace
             const S3Settings::RequestSettings & request_settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
-            bool for_disk_s3_)
-            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File"))
+            bool for_disk_s3_,
+            BlobStorageLogWriterPtr blob_storage_log_)
+            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, &Poco::Logger::get("copyDataToS3File"))
             , create_read_buffer(create_read_buffer_)
             , offset(offset_)
             , size(size_)
@@ -500,6 +524,10 @@ namespace
                 Stopwatch watch;
                 auto outcome = client_ptr->PutObject(request);
                 watch.stop();
+                if (blob_storage_log)
+                    blob_storage_log->addEvent(BlobStorageLogElement::EventType::Upload,
+                                               dest_bucket, dest_key, /* local_path_ */ {}, size,
+                                               outcome.IsSuccess() ? nullptr : &outcome.GetError());
 
                 if (outcome.IsSuccess())
                 {
@@ -581,6 +609,11 @@ namespace
                 ProfileEvents::increment(ProfileEvents::DiskS3UploadPart);
 
             auto outcome = client_ptr->UploadPart(req);
+            if (blob_storage_log)
+                blob_storage_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadWrite,
+                                           dest_bucket, dest_key, /* local_path_ */ {}, size,
+                                           outcome.IsSuccess() ? nullptr : &outcome.GetError());
+
             if (!outcome.IsSuccess())
             {
                 abortMultipartUpload();
@@ -608,8 +641,9 @@ namespace
             const ReadSettings & read_settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
-            bool for_disk_s3_)
-            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File"))
+            bool for_disk_s3_,
+            BlobStorageLogWriterPtr blob_storage_log_)
+            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, &Poco::Logger::get("copyS3File"))
             , src_bucket(src_bucket_)
             , src_key(src_key_)
             , offset(src_offset_)
@@ -712,6 +746,7 @@ namespace
                             dest_bucket,
                             dest_key,
                             request_settings,
+                            blob_storage_log,
                             object_metadata,
                             schedule,
                             for_disk_s3);
@@ -803,11 +838,12 @@ void copyDataToS3File(
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
+    BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_s3)
 {
-    CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
+    CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3, blob_storage_log};
     helper.performCopy();
 }
 
@@ -822,13 +858,14 @@ void copyS3File(
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
     const ReadSettings & read_settings,
+    BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_s3)
 {
     if (settings.allow_native_copy)
     {
-        CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3};
+        CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3, blob_storage_log};
         helper.performCopy();
     }
     else
@@ -837,7 +874,7 @@ void copyS3File(
         {
             return std::make_unique<ReadBufferFromS3>(s3_client, src_bucket, src_key, "", settings, read_settings);
         };
-        copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3);
+        copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, blob_storage_log, object_metadata, schedule, for_disk_s3);
     }
 }
 
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index 33e22fdfba2..607be51ed25 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -6,6 +6,7 @@
 
 #include <Storages/StorageS3Settings.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
+#include <IO/S3/BlobStorageLogWriter.h>
 #include <base/types.h>
 #include <functional>
 #include <memory>
@@ -38,6 +39,7 @@ void copyS3File(
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
     const ReadSettings & read_settings,
+    BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunner<void> schedule_ = {},
     bool for_disk_s3 = false);
@@ -55,6 +57,7 @@ void copyDataToS3File(
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
+    BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunner<void> schedule_ = {},
     bool for_disk_s3 = false);
diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp
index bff9ca6fa7b..b176c871d0c 100644
--- a/src/IO/S3/tests/gtest_aws_s3_client.cpp
+++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp
@@ -95,7 +95,8 @@ void doWriteRequest(std::shared_ptr<const DB::S3::Client> client, const DB::S3::
         uri.bucket,
         uri.key,
         DBMS_DEFAULT_BUFFER_SIZE,
-        request_settings
+        request_settings,
+        {}
     );
 
     write_buffer.write('\0'); // doesn't matter what we write here, just needs to be something
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index 62d0c80f1f2..8605fdc004a 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -16,7 +16,7 @@
 #include <IO/S3Common.h>
 #include <IO/S3/Requests.h>
 #include <IO/S3/getObjectInfo.h>
-#include <Interpreters/Context.h>
+#include <IO/S3/BlobStorageLogWriter.h>
 
 #include <aws/s3/model/StorageClass.h>
 
@@ -81,6 +81,7 @@ WriteBufferFromS3::WriteBufferFromS3(
     const String & key_,
     size_t buf_size_,
     const S3Settings::RequestSettings & request_settings_,
+    BlobStorageLogWriterPtr blob_log_,
     std::optional<std::map<String, String>> object_metadata_,
     ThreadPoolCallbackRunner<void> schedule_,
     const WriteSettings & write_settings_)
@@ -98,6 +99,7 @@ WriteBufferFromS3::WriteBufferFromS3(
               std::move(schedule_),
               upload_settings.max_inflight_parts_for_one_file,
               limitedLog))
+    , blob_log(std::move(blob_log_))
 {
     LOG_TRACE(limitedLog, "Create WriteBufferFromS3, {}", getShortLogDetails());
 
@@ -378,6 +380,9 @@ void WriteBufferFromS3::createMultipartUpload()
     watch.stop();
 
     ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
+    if (blob_log)
+        blob_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadCreate, bucket, key, {}, 0,
+                           outcome.IsSuccess() ? nullptr : &outcome.GetError());
 
     if (!outcome.IsSuccess())
     {
@@ -386,6 +391,7 @@ void WriteBufferFromS3::createMultipartUpload()
     }
 
     multipart_upload_id = outcome.GetResult().GetUploadId();
+
     LOG_TRACE(limitedLog, "Multipart upload has created. {}", getShortLogDetails());
 }
 
@@ -414,6 +420,10 @@ void WriteBufferFromS3::abortMultipartUpload()
 
     ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
 
+    if (blob_log)
+        blob_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadAbort, bucket, key, {}, 0,
+                           outcome.IsSuccess() ? nullptr : &outcome.GetError());
+
     if (!outcome.IsSuccess())
     {
         ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
@@ -508,6 +518,13 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data)
 
         ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
 
+        if (blob_log)
+        {
+            blob_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadWrite,
+                /* bucket = */ bucket, /* remote_path = */ key, /* local_path = */ {}, /* data_size */ data_size,
+                outcome.IsSuccess() ? nullptr : &outcome.GetError());
+        }
+
         if (!outcome.IsSuccess())
         {
             ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
@@ -569,6 +586,10 @@ void WriteBufferFromS3::completeMultipartUpload()
 
         ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
 
+        if (blob_log)
+            blob_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadComplete, bucket, key, {}, 0,
+                               outcome.IsSuccess() ? nullptr : &outcome.GetError());
+
         if (outcome.IsSuccess())
         {
             LOG_TRACE(limitedLog, "Multipart upload has completed. {}, Parts: {}", getShortLogDetails(), multipart_tags.size());
@@ -650,6 +671,9 @@ void WriteBufferFromS3::makeSinglepartUpload(WriteBufferFromS3::PartData && data
             rlock.unlock();
 
             ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
+            if (blob_log)
+                blob_log->addEvent(BlobStorageLogElement::EventType::Upload, bucket, key, {}, request.GetContentLength(),
+                                   outcome.IsSuccess() ? nullptr : &outcome.GetError());
 
             if (outcome.IsSuccess())
             {
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index 590342cc997..191e522c59a 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -11,6 +11,7 @@
 #include <IO/WriteSettings.h>
 #include <Storages/StorageS3Settings.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
+#include <IO/S3/BlobStorageLogWriter.h>
 
 #include <memory>
 #include <vector>
@@ -34,6 +35,7 @@ public:
         const String & key_,
         size_t buf_size_,
         const S3Settings::RequestSettings & request_settings_,
+        BlobStorageLogWriterPtr blob_log_,
         std::optional<std::map<String, String>> object_metadata_ = std::nullopt,
         ThreadPoolCallbackRunner<void> schedule_ = {},
         const WriteSettings & write_settings_ = {});
@@ -118,6 +120,8 @@ private:
 
     class TaskTracker;
     std::unique_ptr<TaskTracker> task_tracker;
+
+    BlobStorageLogWriterPtr blob_log;
 };
 
 }
diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp
index c82f97f8b20..41ba6a2779d 100644
--- a/src/IO/tests/gtest_writebuffer_s3.cpp
+++ b/src/IO/tests/gtest_writebuffer_s3.cpp
@@ -554,6 +554,7 @@ public:
                     file_name,
                     DBMS_DEFAULT_BUFFER_SIZE,
                     request_settings,
+                    nullptr,
                     std::nullopt,
                     getAsyncPolicy().getScheduler());
     }
@@ -1214,7 +1215,7 @@ TEST_F(WBS3Test, ReadBeyondLastOffset) {
         /// create encrypted file reader
 
         auto cache_log = std::shared_ptr<FilesystemCacheLog>();
-        const StoredObjects objects = { StoredObject(remote_file, data.size() + FileEncryption::Header::kSize) };
+        const StoredObjects objects = { StoredObject(remote_file, /* local_path */ "", data.size() + FileEncryption::Header::kSize) };
         auto async_read_counters = std::make_shared<AsyncReadCounters>();
         auto prefetch_log = std::shared_ptr<FilesystemReadPrefetchesLog>();
 
diff --git a/src/Interpreters/BlobStorageLog.cpp b/src/Interpreters/BlobStorageLog.cpp
new file mode 100644
index 00000000000..3eb3ac40b85
--- /dev/null
+++ b/src/Interpreters/BlobStorageLog.cpp
@@ -0,0 +1,92 @@
+#include <Interpreters/BlobStorageLog.h>
+
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeDate.h>
+
+
+namespace DB
+{
+
+NamesAndTypesList BlobStorageLogElement::getNamesAndTypes()
+{
+    auto event_enum_type = std::make_shared<DataTypeEnum8>(
+        DataTypeEnum8::Values{
+            {"Upload", static_cast<Int8>(EventType::Upload)},
+            {"Delete", static_cast<Int8>(EventType::Delete)},
+            {"MultiPartUploadCreate", static_cast<Int8>(EventType::MultiPartUploadCreate)},
+            {"MultiPartUploadWrite", static_cast<Int8>(EventType::MultiPartUploadWrite)},
+            {"MultiPartUploadComplete", static_cast<Int8>(EventType::MultiPartUploadComplete)},
+            {"MultiPartUploadAbort", static_cast<Int8>(EventType::MultiPartUploadAbort)},
+        });
+
+    return {
+        {"event_date", std::make_shared<DataTypeDate>()},
+        {"event_time", std::make_shared<DataTypeDateTime>()},
+        {"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
+
+        {"event_type", event_enum_type},
+
+        {"query_id", std::make_shared<DataTypeString>()},
+        {"thread_id", std::make_shared<DataTypeUInt64>()},
+        {"thread_name", std::make_shared<DataTypeString>()},
+
+        {"disk_name", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
+        {"bucket", std::make_shared<DataTypeString>()},
+        {"remote_path", std::make_shared<DataTypeString>()},
+        {"local_path", std::make_shared<DataTypeString>()},
+        {"data_size", std::make_shared<DataTypeUInt64>()},
+
+        {"error", std::make_shared<DataTypeString>()},
+    };
+}
+
+void BlobStorageLogElement::appendToBlock(MutableColumns & columns) const
+{
+#ifndef NDEBUG
+    auto coulumn_names = BlobStorageLogElement::getNamesAndTypes().getNames();
+#endif
+
+    size_t i = 0;
+
+    auto event_time_seconds = timeInSeconds(event_time);
+    assert(coulumn_names.at(i) == "event_date");
+    columns[i++]->insert(DateLUT::instance().toDayNum(event_time_seconds).toUnderType());
+    assert(coulumn_names.at(i) == "event_time");
+    columns[i++]->insert(event_time_seconds);
+    assert(coulumn_names.at(i) == "event_time_microseconds");
+    columns[i++]->insert(Decimal64(timeInMicroseconds(event_time)));
+
+    assert(coulumn_names.at(i) == "event_type");
+    columns[i++]->insert(static_cast<Int8>(event_type));
+
+    assert(coulumn_names.at(i) == "query_id");
+    columns[i++]->insert(query_id);
+    assert(coulumn_names.at(i) == "thread_id");
+    columns[i++]->insert(thread_id);
+    assert(coulumn_names.at(i) == "thread_name");
+    columns[i++]->insert(thread_name);
+
+    assert(coulumn_names.at(i) == "disk_name");
+    columns[i++]->insert(disk_name);
+    assert(coulumn_names.at(i) == "bucket");
+    columns[i++]->insert(bucket);
+    assert(coulumn_names.at(i) == "remote_path");
+    columns[i++]->insert(remote_path);
+    assert(coulumn_names.at(i) == "local_path");
+    columns[i++]->insert(local_path);
+    assert(coulumn_names.at(i) == "data_size");
+    columns[i++]->insert(data_size);
+
+    assert(coulumn_names.at(i) == "error");
+    columns[i++]->insert(error_message);
+
+    assert(i == coulumn_names.size() && columns.size() == coulumn_names.size());
+}
+
+}
+
diff --git a/src/Interpreters/BlobStorageLog.h b/src/Interpreters/BlobStorageLog.h
new file mode 100644
index 00000000000..460d54a386d
--- /dev/null
+++ b/src/Interpreters/BlobStorageLog.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#include <Interpreters/SystemLog.h>
+#include <Core/NamesAndTypes.h>
+#include <Core/NamesAndAliases.h>
+#include <Poco/Message.h>
+#include <chrono>
+
+namespace DB
+{
+
+struct BlobStorageLogElement
+{
+    enum class EventType : Int8
+    {
+        Upload = 1,
+        Delete = 2,
+        MultiPartUploadCreate = 3,
+        MultiPartUploadWrite = 4,
+        MultiPartUploadComplete = 5,
+        MultiPartUploadAbort = 6,
+    };
+
+    EventType event_type;
+
+    String query_id;
+    UInt64 thread_id = 0;
+    String thread_name;
+
+    String disk_name;
+    String bucket;
+    String remote_path;
+    String local_path;
+
+    size_t data_size;
+
+    Int32 error_code = -1; /// negative if no error
+    String error_message;
+
+    using EvenTime = std::chrono::time_point<std::chrono::system_clock>;
+    EvenTime event_time;
+
+    static std::string name() { return "BlobStorageLog"; }
+
+    static NamesAndTypesList getNamesAndTypes();
+    static NamesAndAliases getNamesAndAliases() { return {}; }
+    void appendToBlock(MutableColumns & columns) const;
+    static const char * getCustomColumnList() { return nullptr; }
+};
+
+
+class BlobStorageLog : public SystemLog<BlobStorageLogElement>
+{
+    using SystemLog<BlobStorageLogElement>::SystemLog;
+};
+
+}
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index c13be020933..e25c2540937 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -3667,16 +3667,25 @@ std::shared_ptr<BackupLog> Context::getBackupLog() const
     return shared->system_logs->backup_log;
 }
 
+std::shared_ptr<BlobStorageLog> Context::getBlobStorageLog() const
+{
+    SharedLockGuard lock(shared->mutex);
+
+    if (!shared->system_logs)
+        return {};
+    return shared->system_logs->blob_storage_log;
+}
+
 std::vector<ISystemLog *> Context::getSystemLogs() const
 {
     SharedLockGuard lock(shared->mutex);
 
     if (!shared->system_logs)
         return {};
-
     return shared->system_logs->logs;
 }
 
+
 CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double part_size_ratio) const
 {
     std::lock_guard lock(shared->mutex);
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index f1b65fae039..7ae567548dd 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -107,6 +107,7 @@ class FilesystemReadPrefetchesLog;
 class S3QueueLog;
 class AsynchronousInsertLog;
 class BackupLog;
+class BlobStorageLog;
 class IAsynchronousReader;
 struct MergeTreeSettings;
 struct InitialAllRangesAnnouncement;
@@ -1057,6 +1058,7 @@ public:
     std::shared_ptr<FilesystemReadPrefetchesLog> getFilesystemReadPrefetchesLog() const;
     std::shared_ptr<AsynchronousInsertLog> getAsynchronousInsertLog() const;
     std::shared_ptr<BackupLog> getBackupLog() const;
+    std::shared_ptr<BlobStorageLog> getBlobStorageLog() const;
 
     std::vector<ISystemLog *> getSystemLogs() const;
 
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index ff95b3014dc..9c3db6cfdbd 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -35,6 +35,7 @@
 #include <Interpreters/ProcessorsProfileLog.h>
 #include <Interpreters/AsynchronousInsertLog.h>
 #include <Interpreters/BackupLog.h>
+#include <IO/S3/BlobStorageLogWriter.h>
 #include <Interpreters/JIT/CompiledExpressionCache.h>
 #include <Interpreters/TransactionLog.h>
 #include <Interpreters/AsynchronousInsertQueue.h>
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 9e4129219cb..4f283a3f78d 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -10,6 +10,7 @@
 #include <Interpreters/OpenTelemetrySpanLog.h>
 #include <Interpreters/PartLog.h>
 #include <Interpreters/ProcessorsProfileLog.h>
+#include <Interpreters/BlobStorageLog.h>
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/QueryThreadLog.h>
 #include <Interpreters/QueryViewsLog.h>
@@ -291,6 +292,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf
     asynchronous_insert_log = createSystemLog<AsynchronousInsertLog>(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log");
     backup_log = createSystemLog<BackupLog>(global_context, "system", "backup_log", config, "backup_log");
     s3_queue_log = createSystemLog<S3QueueLog>(global_context, "system", "s3queue_log", config, "s3queue_log");
+    blob_storage_log = createSystemLog<BlobStorageLog>(global_context, "system", "blob_storage_log", config, "blob_storage_log");
 
     if (query_log)
         logs.emplace_back(query_log.get());
@@ -333,6 +335,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf
         logs.emplace_back(backup_log.get());
     if (s3_queue_log)
         logs.emplace_back(s3_queue_log.get());
+    if (blob_storage_log)
+        logs.emplace_back(blob_storage_log.get());
 
     try
     {
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index b27a8a48651..d322af4329c 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -51,6 +51,7 @@ class FilesystemReadPrefetchesLog;
 class AsynchronousInsertLog;
 class BackupLog;
 class S3QueueLog;
+class BlobStorageLog;
 
 /// System logs should be destroyed in destructor of the last Context and before tables,
 ///  because SystemLog destruction makes insert query while flushing data into underlying tables
@@ -89,6 +90,8 @@ struct SystemLogs
     std::shared_ptr<AsynchronousInsertLog> asynchronous_insert_log;
     /// Backup and restore events
     std::shared_ptr<BackupLog> backup_log;
+    /// Log blob storage operations
+    std::shared_ptr<BlobStorageLog> blob_storage_log;
 
     std::vector<ISystemLog *> logs;
 };
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 99699aab709..2e1d49d1507 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -239,11 +239,16 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
         configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()),
         file_iterator, local_context->getSettingsRef().max_download_threads, false, /* query_info */ std::nullopt);
 
-    auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client](const std::string & path)
+    auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client, blob_storage_log = BlobStorageLogWriter::create()](const std::string & path) mutable
     {
         S3::DeleteObjectRequest request;
         request.WithKey(path).WithBucket(bucket);
         auto outcome = client->DeleteObject(request);
+        if (blob_storage_log)
+            blob_storage_log->addEvent(
+                BlobStorageLogElement::EventType::Delete,
+                bucket, path, {}, 0, outcome.IsSuccess() ? nullptr : &outcome.GetError());
+
         if (!outcome.IsSuccess())
         {
             const auto & err = outcome.GetError();
diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h
index e594ddcce3e..f26b1175150 100644
--- a/src/Storages/S3Queue/StorageS3Queue.h
+++ b/src/Storages/S3Queue/StorageS3Queue.h
@@ -10,6 +10,8 @@
 #include <Storages/S3Queue/S3QueueSource.h>
 #include <Storages/StorageS3.h>
 #include <Interpreters/Context.h>
+#include <IO/S3/BlobStorageLogWriter.h>
+
 
 namespace Aws::S3
 {
@@ -74,6 +76,7 @@ private:
     std::atomic<bool> mv_attached = false;
     std::atomic<bool> shutdown_called = false;
     std::atomic<bool> table_is_being_dropped = false;
+
     Poco::Logger * log;
 
     void startup() override;
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index b43f25b0fff..5e5162a0b31 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1384,7 +1384,7 @@ std::unique_ptr<ReadBuffer> StorageAzureBlobSource::createAsyncAzureReadBuffer(
 {
     auto modified_settings{read_settings};
     modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size;
-    auto async_reader = object_storage->readObjects(StoredObjects{StoredObject{key, object_size}}, modified_settings);
+    auto async_reader = object_storage->readObjects(StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, modified_settings);
 
     async_reader->setReadUntilEnd();
     if (read_settings.remote_fs_prefetch)
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index bdbba5abd96..61c5fcdbfa8 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -711,7 +711,7 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
 
     auto s3_impl = std::make_unique<ReadBufferFromRemoteFSGather>(
         std::move(read_buffer_creator),
-        StoredObjects{StoredObject{key, object_size}},
+        StoredObjects{StoredObject{key, /* local_path */ "", object_size}},
         read_settings,
         /* cache_log */nullptr, /* use_external_buffer */true);
 
@@ -822,6 +822,13 @@ public:
         , sample_block(sample_block_)
         , format_settings(format_settings_)
     {
+        BlobStorageLogWriterPtr blob_log = nullptr;
+        if (auto blob_storage_log = context->getBlobStorageLog())
+        {
+            blob_log = std::make_shared<BlobStorageLogWriter>(std::move(blob_storage_log));
+            blob_log->query_id = context->getCurrentQueryId();
+        }
+
         write_buf = wrapWriteBufferWithCompressionMethod(
             std::make_unique<WriteBufferFromS3>(
                 configuration_.client,
@@ -829,6 +836,7 @@ public:
                 key,
                 DBMS_DEFAULT_BUFFER_SIZE,
                 configuration_.request_settings,
+                std::move(blob_log),
                 std::nullopt,
                 threadPoolCallbackRunner<void>(getIOThreadPool().get(), "S3ParallelWrite"),
                 context->getWriteSettings()),
@@ -1241,6 +1249,15 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &,
     request.SetDelete(delkeys);
 
     auto response = query_configuration.client->DeleteObjects(request);
+
+    const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError();
+    auto time_now = std::chrono::system_clock::now();
+    if (auto blob_storage_log = BlobStorageLogWriter::create())
+    {
+        for (const auto & key : query_configuration.keys)
+            blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now);
+    }
+
     if (!response.IsSuccess())
     {
         const auto & err = response.GetError();
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 3f35c578e19..469e5725715 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -24,6 +24,7 @@
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/StorageConfiguration.h>
 #include <Storages/prepareReadingFromFormat.h>
+#include <IO/S3/BlobStorageLogWriter.h>
 
 namespace Aws::S3
 {
diff --git a/tests/config/config.d/blob_storage_log.xml b/tests/config/config.d/blob_storage_log.xml
new file mode 100644
index 00000000000..474c163b937
--- /dev/null
+++ b/tests/config/config.d/blob_storage_log.xml
@@ -0,0 +1,9 @@
+<clickhouse>
+    <blob_storage_log>
+        <database>system</database>
+        <table>blob_storage_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <ttl>event_date + INTERVAL 30 DAY</ttl>
+    </blob_storage_log>
+</clickhouse>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index c31275cdcf2..1e8069c9ae1 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -18,6 +18,7 @@ mkdir -p $DEST_CLIENT_PATH
 ln -sf $SRC_PATH/config.d/zookeeper_write.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/
+ln -sf $SRC_PATH/config.d/blob_storage_log.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/custom_settings_prefixes.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/enable_access_control_improvements.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/macros.xml $DEST_SERVER_PATH/config.d/
diff --git a/tests/integration/test_backup_restore_s3/configs/blob_log.xml b/tests/integration/test_backup_restore_s3/configs/blob_log.xml
new file mode 100644
index 00000000000..474c163b937
--- /dev/null
+++ b/tests/integration/test_backup_restore_s3/configs/blob_log.xml
@@ -0,0 +1,9 @@
+<clickhouse>
+    <blob_storage_log>
+        <database>system</database>
+        <table>blob_storage_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <ttl>event_date + INTERVAL 30 DAY</ttl>
+    </blob_storage_log>
+</clickhouse>
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py
index f8ec39d240b..55d40b14ea7 100644
--- a/tests/integration/test_backup_restore_s3/test.py
+++ b/tests/integration/test_backup_restore_s3/test.py
@@ -12,6 +12,7 @@ node = cluster.add_instance(
         "configs/disk_s3.xml",
         "configs/named_collection_s3_backups.xml",
         "configs/s3_settings.xml",
+        "configs/blob_log.xml",
     ],
     user_configs=[
         "configs/zookeeper_retries.xml",
@@ -51,10 +52,12 @@ def get_events_for_query(query_id: str) -> Dict[str, int]:
             """
         )
     )
-    return {
+    result = {
         event: int(value)
         for event, value in [line.split("\t") for line in events.lines]
     }
+    result["query_id"] = query_id
+    return result
 
 
 def format_settings(settings):
@@ -118,7 +121,7 @@ def check_backup_and_restore(
         )
 
 
-def check_system_tables():
+def check_system_tables(backup_query_id=None):
     disks = [
         tuple(disk.split("\t"))
         for disk in node.query("SELECT name, type FROM system.disks").split("\n")
@@ -136,6 +139,14 @@ def check_system_tables():
         if expected_disk not in disks:
             raise AssertionError(f"Missed {expected_disk} in {disks}")
 
+    if backup_query_id is not None:
+        blob_storage_log = node.query(
+            f"SELECT count() FROM system.blob_storage_log WHERE query_id = '{backup_query_id}' AND error = '' AND event_type = 'Upload'"
+        ).strip()
+        assert int(blob_storage_log) >= 1, node.query(
+            "SELECT * FROM system.blob_storage_log FORMAT PrettyCompactMonoBlock"
+        )
+
 
 @pytest.mark.parametrize(
     "storage_policy, to_disk",
@@ -179,8 +190,8 @@ def test_backup_to_s3():
     backup_destination = (
         f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')"
     )
-    check_backup_and_restore(storage_policy, backup_destination)
-    check_system_tables()
+    (backup_events, _) = check_backup_and_restore(storage_policy, backup_destination)
+    check_system_tables(backup_events["query_id"])
 
 
 def test_backup_to_s3_named_collection():
@@ -203,6 +214,15 @@ def test_backup_to_s3_multipart():
         f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}"
     )
 
+    backup_query_id = backup_events["query_id"]
+    blob_storage_log = node.query(
+        f"SELECT countIf(event_type == 'MultiPartUploadCreate') * countIf(event_type == 'MultiPartUploadComplete') * countIf(event_type == 'MultiPartUploadWrite') "
+        f"FROM system.blob_storage_log WHERE query_id = '{backup_query_id}' AND error = ''"
+    ).strip()
+    assert int(blob_storage_log) >= 1, node.query(
+        "SELECT * FROM system.blob_storage_log FORMAT PrettyCompactMonoBlock"
+    )
+
     s3_backup_events = (
         "WriteBufferFromS3Microseconds",
         "WriteBufferFromS3Bytes",
diff --git a/tests/integration/test_merge_tree_s3/configs/config.d/blob_log.xml b/tests/integration/test_merge_tree_s3/configs/config.d/blob_log.xml
new file mode 100644
index 00000000000..474c163b937
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3/configs/config.d/blob_log.xml
@@ -0,0 +1,9 @@
+<clickhouse>
+    <blob_storage_log>
+        <database>system</database>
+        <table>blob_storage_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <ttl>event_date + INTERVAL 30 DAY</ttl>
+    </blob_storage_log>
+</clickhouse>
diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index 3b2f1c0f6a6..c0f1f7a4a32 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -1,6 +1,7 @@
 import logging
 import time
 import os
+import uuid
 
 import pytest
 from helpers.cluster import ClickHouseCluster
@@ -10,7 +11,6 @@ from helpers.wait_for_helpers import wait_for_delete_inactive_parts
 from helpers.wait_for_helpers import wait_for_delete_empty_parts
 from helpers.wait_for_helpers import wait_for_merges
 
-
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
 
 
@@ -24,6 +24,7 @@ def cluster():
                 "configs/config.xml",
                 "configs/config.d/storage_conf.xml",
                 "configs/config.d/bg_processing_pool_conf.xml",
+                "configs/config.d/blob_log.xml",
             ],
             user_configs=[
                 "configs/config.d/users.xml",
@@ -37,6 +38,7 @@ def cluster():
             main_configs=[
                 "configs/config.d/storage_conf.xml",
                 "configs/config.d/bg_processing_pool_conf.xml",
+                "configs/config.d/blob_log.xml",
             ],
             with_minio=True,
             tmpfs=[
@@ -126,17 +128,22 @@ def list_objects(cluster, path="data/", hint="list_objects"):
 
 def wait_for_delete_s3_objects(cluster, expected, timeout=30):
     while timeout > 0:
-        if len(list_objects(cluster, "data/")) == expected:
-            return
+        existing_objects = list_objects(cluster, "data/")
+        if len(existing_objects) == expected:
+            return existing_objects
         timeout -= 1
         time.sleep(1)
-    assert len(list_objects(cluster, "data/")) == expected
+    existing_objects = list_objects(cluster, "data/")
+    assert len(existing_objects) == expected
+    return existing_objects
 
 
 def remove_all_s3_objects(cluster):
     minio = cluster.minio_client
-    for obj in list_objects(cluster, "data/"):
+    objects_to_delete = list_objects(cluster, "data/")
+    for obj in objects_to_delete:
         minio.remove_object(cluster.minio_bucket, obj.object_name)
+    return objects_to_delete
 
 
 @pytest.fixture(autouse=True, scope="function")
@@ -155,7 +162,7 @@ def clear_minio(cluster):
 def check_no_objects_after_drop(cluster, table_name="s3_test", node_name="node"):
     node = cluster.instances[node_name]
     node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
-    wait_for_delete_s3_objects(cluster, 0, timeout=0)
+    return wait_for_delete_s3_objects(cluster, 0, timeout=0)
 
 
 @pytest.mark.parametrize(
@@ -173,10 +180,32 @@ def test_simple_insert_select(
     minio = cluster.minio_client
 
     values1 = generate_values("2020-01-03", 4096)
-    node.query("INSERT INTO s3_test VALUES {}".format(values1))
+    insert_query_id = uuid.uuid4().hex
+
+    node.query(
+        "INSERT INTO s3_test VALUES {}".format(values1), query_id=insert_query_id
+    )
     assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values") == values1
     assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD + files_per_part
 
+    node.query("SYSTEM FLUSH LOGS")
+    blob_storage_log = node.query(
+        f"SELECT * FROM system.blob_storage_log WHERE query_id = '{insert_query_id}' FORMAT PrettyCompactMonoBlock"
+    )
+
+    result = node.query(
+        f"""SELECT
+            (countIf( (event_type == 'Upload' OR event_type == 'MultiPartUploadWrite') as event_match) as total_events) > 0,
+            countIf(event_match AND bucket == 'root') == total_events,
+            countIf(event_match AND remote_path != '') == total_events,
+            countIf(event_match AND local_path != '') == total_events,
+            sumIf(data_size, event_match) > 0
+        FROM system.blob_storage_log
+        WHERE query_id = '{insert_query_id}' AND error == ''
+        """
+    )
+    assert result == "1\t1\t1\t1\t1\n", blob_storage_log
+
     values2 = generate_values("2020-01-04", 4096)
     node.query("INSERT INTO s3_test VALUES {}".format(values2))
     assert (
@@ -269,6 +298,30 @@ def test_alter_table_columns(cluster, node_name):
         "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096, -1))
     )
 
+    def assert_deleted_in_log(old_objects, new_objects):
+        node.query("SYSTEM FLUSH LOGS")
+
+        deleted_objects = set(obj.object_name for obj in old_objects) - set(
+            obj.object_name for obj in new_objects
+        )
+        deleted_in_log = set(
+            node.query(
+                f"SELECT remote_path FROM system.blob_storage_log WHERE error == '' AND event_type == 'Delete'"
+            )
+            .strip()
+            .split()
+        )
+
+        # all deleted objects should be in log
+        assert all(obj in deleted_in_log for obj in deleted_objects), (
+            deleted_objects,
+            node.query(
+                f"SELECT * FROM system.blob_storage_log FORMAT PrettyCompactMonoBlock"
+            ),
+        )
+
+    objects_before = list_objects(cluster, "data/")
+
     node.query("ALTER TABLE s3_test ADD COLUMN col1 UInt64 DEFAULT 1")
     # To ensure parts have merged
     node.query("OPTIMIZE TABLE s3_test")
@@ -278,30 +331,42 @@ def test_alter_table_columns(cluster, node_name):
         node.query("SELECT sum(col1) FROM s3_test WHERE id > 0 FORMAT Values")
         == "(4096)"
     )
-    wait_for_delete_s3_objects(
+
+    existing_objects = wait_for_delete_s3_objects(
         cluster,
         FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN,
     )
 
+    assert_deleted_in_log(objects_before, existing_objects)
+    objects_before = existing_objects
+
     node.query(
         "ALTER TABLE s3_test MODIFY COLUMN col1 String", settings={"mutations_sync": 2}
     )
 
     assert node.query("SELECT distinct(col1) FROM s3_test FORMAT Values") == "('1')"
     # and file with mutation
-    wait_for_delete_s3_objects(
+    existing_objects = wait_for_delete_s3_objects(
         cluster,
         FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN + 1,
     )
 
+    assert_deleted_in_log(objects_before, existing_objects)
+    objects_before = existing_objects
+
     node.query("ALTER TABLE s3_test DROP COLUMN col1", settings={"mutations_sync": 2})
 
     # and 2 files with mutations
-    wait_for_delete_s3_objects(
+    existing_objects = wait_for_delete_s3_objects(
         cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + 2
     )
+    assert_deleted_in_log(objects_before, existing_objects)
+    objects_before = existing_objects
 
-    check_no_objects_after_drop(cluster)
+    existing_objects = check_no_objects_after_drop(cluster)
+
+    assert_deleted_in_log(objects_before, existing_objects)
+    objects_before = existing_objects
 
 
 @pytest.mark.parametrize("node_name", ["node"])
@@ -796,6 +861,18 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli
 
     node.wait_for_log_line("ExpectedError Message: mock s3 injected error")
 
+    table_uuid = node.query(
+        "SELECT uuid FROM system.tables WHERE database = 'default' AND name = 'test_merge_canceled_by_s3_errors' LIMIT 1"
+    ).strip()
+
+    node.query("SYSTEM FLUSH LOGS")
+    error_count_in_blob_log = node.query(
+        f"SELECT count() FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' AND error like '%mock s3 injected error%'"
+    ).strip()
+    assert int(error_count_in_blob_log) > 0, node.query(
+        f"SELECT * FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' FORMAT PrettyCompactMonoBlock"
+    )
+
     check_no_objects_after_drop(
         cluster, table_name="test_merge_canceled_by_s3_errors", node_name=node_name
     )
diff --git a/tests/integration/test_storage_s3/configs/blob_log.xml b/tests/integration/test_storage_s3/configs/blob_log.xml
new file mode 100644
index 00000000000..474c163b937
--- /dev/null
+++ b/tests/integration/test_storage_s3/configs/blob_log.xml
@@ -0,0 +1,9 @@
+<clickhouse>
+    <blob_storage_log>
+        <database>system</database>
+        <table>blob_storage_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <ttl>event_date + INTERVAL 30 DAY</ttl>
+    </blob_storage_log>
+</clickhouse>
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 835c8b908f0..f5c6f54a1ea 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1,5 +1,5 @@
 import gzip
-import json
+import uuid
 import logging
 import os
 import io
@@ -54,6 +54,7 @@ def started_cluster():
                 "configs/defaultS3.xml",
                 "configs/named_collections.xml",
                 "configs/schema_cache.xml",
+                "configs/blob_log.xml",
             ],
             user_configs=[
                 "configs/access.xml",
@@ -104,11 +105,9 @@ def started_cluster():
         cluster.shutdown()
 
 
-def run_query(instance, query, stdin=None, settings=None):
-    # type: (ClickHouseInstance, str, object, dict) -> str
-
+def run_query(instance, query, *args, **kwargs):
     logging.info("Running query '{}'...".format(query))
-    result = instance.query(query, stdin=stdin, settings=settings)
+    result = instance.query(query, *args, **kwargs)
     logging.info("Query finished")
 
     return result
@@ -129,7 +128,7 @@ def run_query(instance, query, stdin=None, settings=None):
     ],
 )
 def test_put(started_cluster, maybe_auth, positive, compression):
-    # type: (ClickHouseCluster) -> None
+    # type: (ClickHouseCluster, str, bool, str) -> None
 
     bucket = (
         started_cluster.minio_bucket
@@ -496,7 +495,7 @@ def test_put_get_with_globs(started_cluster):
     ],
 )
 def test_multipart(started_cluster, maybe_auth, positive):
-    # type: (ClickHouseCluster) -> None
+    # type: (ClickHouseCluster, str, bool) -> None
 
     bucket = (
         started_cluster.minio_bucket
@@ -529,7 +528,7 @@ def test_multipart(started_cluster, maybe_auth, positive):
         maybe_auth,
         table_format,
     )
-
+    put_query_id = uuid.uuid4().hex
     try:
         run_query(
             instance,
@@ -539,6 +538,7 @@ def test_multipart(started_cluster, maybe_auth, positive):
                 "s3_min_upload_part_size": min_part_size_bytes,
                 "s3_max_single_part_upload_size": 0,
             },
+            query_id=put_query_id,
         )
     except helpers.client.QueryRuntimeException:
         if positive:
@@ -583,6 +583,24 @@ def test_multipart(started_cluster, maybe_auth, positive):
             == "\t".join(map(str, [total_rows, total_rows * 2, total_rows * 3])) + "\n"
         )
 
+    if positive:
+        instance.query("SYSTEM FLUSH LOGS")
+        blob_storage_log = instance.query(f"SELECT * FROM system.blob_storage_log")
+
+        result = instance.query(
+            f"""SELECT
+                countIf(event_type == 'MultiPartUploadCreate'),
+                countIf(event_type == 'MultiPartUploadWrite'),
+                countIf(event_type == 'MultiPartUploadComplete'),
+                count()
+            FROM system.blob_storage_log WHERE query_id = '{put_query_id}'"""
+        )
+        r = result.strip().split("\t")
+        assert int(r[0]) == 1, blob_storage_log
+        assert int(r[1]) >= 1, blob_storage_log
+        assert int(r[2]) == 1, blob_storage_log
+        assert int(r[0]) + int(r[1]) + int(r[2]) == int(r[3]), blob_storage_log
+
 
 def test_remote_host_filter(started_cluster):
     instance = started_cluster.instances["restricted_dummy"]
@@ -855,14 +873,34 @@ def test_storage_s3_put_uncompressed(started_cluster):
             name, started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, filename
         ),
     )
-
-    run_query(instance, "INSERT INTO {} VALUES ({})".format(name, "),(".join(data)))
+    insert_query_id = uuid.uuid4().hex
+    data_sep = "),("
+    run_query(
+        instance,
+        "INSERT INTO {} VALUES ({})".format(name, data_sep.join(data)),
+        query_id=insert_query_id,
+    )
 
     run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["753"]
 
     uncompressed_content = get_s3_file_content(started_cluster, bucket, filename)
     assert sum([int(i.split(",")[1]) for i in uncompressed_content.splitlines()]) == 753
 
+    instance.query("SYSTEM FLUSH LOGS")
+    blob_storage_log = instance.query(f"SELECT * FROM system.blob_storage_log")
+
+    result = instance.query(
+        f"""SELECT
+            countIf(event_type == 'Upload'),
+            countIf(remote_path == '{filename}'),
+            countIf(bucket == '{bucket}'),
+            count()
+        FROM system.blob_storage_log WHERE query_id = '{insert_query_id}'"""
+    )
+    r = result.strip().split("\t")
+    assert int(r[0]) >= 1, blob_storage_log
+    assert all(col == r[0] for col in r), blob_storage_log
+
 
 @pytest.mark.parametrize(
     "extension,method",

From 970e34f32eac67cd10cb408d042fb250fe2f3e27 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Tue, 21 Nov 2023 10:20:11 +0100
Subject: [PATCH 0835/1097] NamedCollections: make exception message more
 informative.

Fix test 00002_log_and_exception_messages_formatting
---
 src/Storages/NamedCollectionsHelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp
index 8124ba651f9..b2cb72a00fa 100644
--- a/src/Storages/NamedCollectionsHelpers.cpp
+++ b/src/Storages/NamedCollectionsHelpers.cpp
@@ -105,7 +105,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
             if (allow_override_by_default)
                 continue;
             // if allow_override_by_default is false we don't allow extra arguments
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed");
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed because allow_override_by_default is false");
         }
         else if (!collection_copy->isOverridable(value_override->first, allow_override_by_default))
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed for '{}'", value_override->first);

From 6bd20abb9095eacb960b85e0a0aa9fae68fffbb7 Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Fri, 17 Nov 2023 16:21:24 +0100
Subject: [PATCH 0836/1097] Add CachedReadBufferReadFromCache{Hits,Misses}
 profile events

---
 src/Common/ProfileEvents.cpp                  |  2 +
 .../IO/CachedOnDiskReadBufferFromFile.cpp     |  5 +++
 ..._filesystem_cache_profile_events.reference | 18 ++++-----
 .../02226_filesystem_cache_profile_events.sh  | 37 +++++++++++--------
 4 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 5e22bbd474b..f9ea03f4947 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -395,6 +395,8 @@ The server successfully detected this situation and will download merged part fr
     M(WriteBufferFromS3WaitInflightLimitMicroseconds, "Time spent on waiting while some of the current requests are done when its number reached the limit defined by s3_max_inflight_parts_for_one_file.") \
     M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \
     \
+    M(CachedReadBufferReadFromCacheHits, "Number of times the read from filesystem cache hit the cache.") \
+    M(CachedReadBufferReadFromCacheMisses, "Number of times the read from filesystem cache miss the cache.") \
     M(CachedReadBufferReadFromSourceMicroseconds, "Time reading from filesystem cache source (from remote filesystem, etc)") \
     M(CachedReadBufferReadFromCacheMicroseconds, "Time reading from filesystem cache") \
     M(CachedReadBufferReadFromSourceBytes, "Bytes read from filesystem cache source (from remote fs, etc)") \
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 1cfdd96b271..0c4468a3404 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -28,6 +28,9 @@ extern const Event CachedReadBufferReadFromSourceBytes;
 extern const Event CachedReadBufferReadFromCacheBytes;
 extern const Event CachedReadBufferCacheWriteBytes;
 extern const Event CachedReadBufferCreateBufferMicroseconds;
+
+extern const Event CachedReadBufferReadFromCacheHits;
+extern const Event CachedReadBufferReadFromCacheMisses;
 }
 
 namespace DB
@@ -938,11 +941,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
 
         if (read_type == ReadType::CACHED)
         {
+            ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheHits);
             ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheBytes, size);
             ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheMicroseconds, elapsed);
         }
         else
         {
+            ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheMisses);
             ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, size);
             ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceMicroseconds, elapsed);
         }
diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference
index 2ee0f256949..c538301cbd9 100644
--- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference
+++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference
@@ -1,15 +1,15 @@
 Using storage policy: s3_cache
-1	0	1
-0	1	0
-0	1	0
+0	1	1	0	1
+1	0	0	1	0
+1	0	0	1	0
 0
 Using storage policy: local_cache
-1	0	1
-0	1	0
-0	1	0
+0	1	1	0	1
+1	0	0	1	0
+1	0	0	1	0
 0
 Using storage policy: azure_cache
-1	0	1
-0	1	0
-0	1	0
+0	1	1	0	1
+1	0	0	1	0
+1	0	0	1	0
 0
diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh
index f071a570243..d6829af8214 100755
--- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh
+++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh
@@ -7,11 +7,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-
-for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do
+for STORAGE_POLICY in 's3_cache'; do
     echo "Using storage policy: $STORAGE_POLICY"
 
-    clickhouse client --multiquery --multiline  --query """
+    $CLICKHOUSE_CLIENT --multiquery --multiline  --query """
     SET max_memory_usage='20G';
     SET enable_filesystem_cache_on_write_operations = 0;
 
@@ -24,11 +23,13 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do
 
     query="SELECT * FROM test_02226 LIMIT 10"
 
-    query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1)
+    query_id=$($CLICKHOUSE_CLIENT --query "select queryID() from ($query) limit 1" 2>&1)
 
-    clickhouse client --multiquery --multiline  --query """
+    $CLICKHOUSE_CLIENT --multiquery --multiline  --query """
     SYSTEM FLUSH LOGS;
-    SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read,
+    SELECT ProfileEvents['CachedReadBufferReadFromCacheHits'] > 0 as remote_fs_cache_hit,
+           ProfileEvents['CachedReadBufferReadFromCacheMisses'] > 0 as remote_fs_cache_miss,
+           ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read,
            ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read,
            ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download
     FROM system.query_log
@@ -39,16 +40,18 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do
     LIMIT 1;
     """
 
-    clickhouse client --multiquery --multiline --query """
+    $CLICKHOUSE_CLIENT --multiquery --multiline --query """
     set remote_filesystem_read_method = 'read';
     set local_filesystem_read_method = 'pread';
     """
 
-    query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1)
+    query_id=$($CLICKHOUSE_CLIENT --query "select queryID() from ($query) limit 1" 2>&1)
 
-    clickhouse client --multiquery --multiline  --query """
+    $CLICKHOUSE_CLIENT --multiquery --multiline  --query """
     SYSTEM FLUSH LOGS;
-    SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read,
+    SELECT ProfileEvents['CachedReadBufferReadFromCacheHits'] > 0 as remote_fs_cache_hit,
+           ProfileEvents['CachedReadBufferReadFromCacheMisses'] > 0 as remote_fs_cache_miss,
+           ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read,
            ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read,
            ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download
     FROM system.query_log
@@ -60,15 +63,17 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do
     """
 
 
-    clickhouse client --multiquery --multiline --query """
+    $CLICKHOUSE_CLIENT --multiquery --multiline --query """
     set remote_filesystem_read_method='threadpool';
     """
 
-    query_id=$(clickhouse client --query "select queryID() from ($query) limit 1")
+    query_id=$($CLICKHOUSE_CLIENT --query "select queryID() from ($query) limit 1")
 
-    clickhouse client --multiquery --multiline  --query """
+    $CLICKHOUSE_CLIENT --multiquery --multiline  --query """
     SYSTEM FLUSH LOGS;
-    SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read,
+    SELECT ProfileEvents['CachedReadBufferReadFromCacheHits'] > 0 as remote_fs_cache_hit,
+           ProfileEvents['CachedReadBufferReadFromCacheMisses'] > 0 as remote_fs_cache_miss,
+           ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read,
            ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read,
            ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download
     FROM system.query_log
@@ -79,7 +84,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do
     LIMIT 1;
     """
 
-    clickhouse client --multiquery --multiline  --query """
+    $CLICKHOUSE_CLIENT --multiquery --multiline  --query """
     SELECT * FROM test_02226 WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null;
 
     SET enable_filesystem_cache_on_write_operations = 1;
@@ -92,5 +97,5 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do
     INSERT INTO test_02226 SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000;
     """
 
-    clickhouse client --query "DROP TABLE test_02226"
+    $CLICKHOUSE_CLIENT --query "DROP TABLE test_02226"
 done

From 283841f60929e31779eaad2557a865e707e1575e Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Fri, 17 Nov 2023 17:45:10 +0100
Subject: [PATCH 0837/1097] Revert test flavors

---
 .../0_stateless/02226_filesystem_cache_profile_events.sh        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh
index d6829af8214..02e98bbb1b0 100755
--- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh
+++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh
@@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-for STORAGE_POLICY in 's3_cache'; do
+for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do
     echo "Using storage policy: $STORAGE_POLICY"
 
     $CLICKHOUSE_CLIENT --multiquery --multiline  --query """

From f996dac75d017c3c3340b74073288df263feead0 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 21 Nov 2023 09:43:27 +0000
Subject: [PATCH 0838/1097] Fix fuzzBits

---
 src/Functions/fuzzBits.cpp                                    | 4 ++++
 .../0_stateless/02921_fuzzbits_with_array_join.reference      | 4 ++++
 tests/queries/0_stateless/02921_fuzzbits_with_array_join.sql  | 2 ++
 3 files changed, 10 insertions(+)
 create mode 100644 tests/queries/0_stateless/02921_fuzzbits_with_array_join.reference
 create mode 100644 tests/queries/0_stateless/02921_fuzzbits_with_array_join.sql

diff --git a/src/Functions/fuzzBits.cpp b/src/Functions/fuzzBits.cpp
index cd47b332219..b4aa7e48603 100644
--- a/src/Functions/fuzzBits.cpp
+++ b/src/Functions/fuzzBits.cpp
@@ -84,6 +84,10 @@ public:
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
         auto col_in_untyped = arguments[0].column;
+
+        if (input_rows_count == 0)
+            return col_in_untyped;
+
         const double inverse_probability = assert_cast<const ColumnConst &>(*arguments[1].column).getValue<double>();
 
         if (inverse_probability < 0.0 || 1.0 < inverse_probability)
diff --git a/tests/queries/0_stateless/02921_fuzzbits_with_array_join.reference b/tests/queries/0_stateless/02921_fuzzbits_with_array_join.reference
new file mode 100644
index 00000000000..39443245b6c
--- /dev/null
+++ b/tests/queries/0_stateless/02921_fuzzbits_with_array_join.reference
@@ -0,0 +1,4 @@
+12	1
+12	2
+100	1
+100	2
diff --git a/tests/queries/0_stateless/02921_fuzzbits_with_array_join.sql b/tests/queries/0_stateless/02921_fuzzbits_with_array_join.sql
new file mode 100644
index 00000000000..5d80a5fbea6
--- /dev/null
+++ b/tests/queries/0_stateless/02921_fuzzbits_with_array_join.sql
@@ -0,0 +1,2 @@
+SELECT length(fuzzBits('stringstring', 0.5)), a FROM numbers(1) ARRAY JOIN [1, 2] AS a;
+SELECT length(fuzzBits('stringstring'::FixedString(100), 0.5)), a FROM numbers(1) ARRAY JOIN [1, 2] AS a
\ No newline at end of file

From f96cd4cfb8fd70b039fadf081469bc0a7eaf43e2 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Tue, 21 Nov 2023 11:14:48 +0100
Subject: [PATCH 0839/1097] Update src/Storages/NamedCollectionsHelpers.cpp

Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
---
 src/Storages/NamedCollectionsHelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp
index b2cb72a00fa..f832c7f53f1 100644
--- a/src/Storages/NamedCollectionsHelpers.cpp
+++ b/src/Storages/NamedCollectionsHelpers.cpp
@@ -105,7 +105,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(
             if (allow_override_by_default)
                 continue;
             // if allow_override_by_default is false we don't allow extra arguments
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed because allow_override_by_default is false");
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed because setting allow_override_by_default is disabled");
         }
         else if (!collection_copy->isOverridable(value_override->first, allow_override_by_default))
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Override not allowed for '{}'", value_override->first);

From 0828447a5872e521f1346e25d0b6b5f4d52c2019 Mon Sep 17 00:00:00 2001
From: santrancisco <san.tran@ebfe.pw>
Date: Tue, 21 Nov 2023 21:19:42 +1100
Subject: [PATCH 0840/1097] Fix fast check grep

---
 .../queries/0_stateless/02921_database_filesystem_path_check.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02921_database_filesystem_path_check.sh b/tests/queries/0_stateless/02921_database_filesystem_path_check.sh
index 79315d89f07..8b60acbb47e 100755
--- a/tests/queries/0_stateless/02921_database_filesystem_path_check.sh
+++ b/tests/queries/0_stateless/02921_database_filesystem_path_check.sh
@@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 function get_exception_message()
 {
-  $CLICKHOUSE_CLIENT --query "$1" |& grep -o 'Path must.*path'
+  $CLICKHOUSE_CLIENT --query "$1" |& grep -o 'Path must be inside user-files path'
 }
 
 get_exception_message "create database db_filesystem ENGINE=Filesystem('/etc');"

From ee0ae17ce25c7b6e43251cb886fe8cf3cb0171c7 Mon Sep 17 00:00:00 2001
From: Aleksandr Musorin <aleksandr.musorin@semrush.com>
Date: Mon, 20 Nov 2023 17:55:42 +0100
Subject: [PATCH 0841/1097] Added optimize_trivial_approximate_count_query
 documentatiosn for RocksDB

---
 .../table-engines/integrations/embedded-rocksdb.md     |  4 ++++
 src/Core/Settings.h                                    |  2 +-
 src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp        |  2 +-
 src/Storages/RocksDB/StorageEmbeddedRocksDB.h          |  1 +
 .../0_stateless/02892_rocksdb_trivial_count.reference  | 10 +++++++---
 .../0_stateless/02892_rocksdb_trivial_count.sql        |  8 ++++++--
 6 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index 23ab89e1983..9af857b0835 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -85,6 +85,10 @@ You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wi
 </rocksdb>
 ```
 
+By default trivial approximate count optimization is turned off, which might affect the performance `count()` queries. To enable this
+optimization set up `optimize_trivial_approximate_count_query = 1`. Also, this setting affects `system.tables` for EmbeddedRocksDB engine,
+turn on the settings to see approximate values for `total_rows` and `total_bytes`.
+
 ## Supported operations {#supported-operations}
 
 ### Inserts
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4ef651c3708..951c5dac663 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -545,7 +545,7 @@ class IColumn;
     M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
     M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
     M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
-    M(Bool, optimize_trivial_approximate_count_query, true, "Use an approximate value for trivial count optimization of storages that support such estimations.", 0) \
+    M(Bool, optimize_trivial_approximate_count_query, false, "Use an approximate value for trivial count optimization of storages that support such estimations.", 0) \
     M(Bool, optimize_count_from_files, true, "Optimize counting rows from files in supported input formats", 0) \
     M(Bool, use_cache_for_count_from_files, true, "Use cache to count the number of rows in files", 0) \
     M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index fd563ccf62b..7897bd1cba7 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -720,7 +720,7 @@ std::optional<UInt64> StorageEmbeddedRocksDB::totalBytes(const Settings & /*sett
     if (!rocksdb_ptr)
         return {};
     UInt64 estimated_bytes;
-    if (!rocksdb_ptr->GetIntProperty("rocksdb.estimate-live-data-size", &estimated_bytes))
+    if (!rocksdb_ptr->GetAggregatedIntProperty("rocksdb.estimate-live-data-size", &estimated_bytes))
         return {};
     return estimated_bytes;
 }
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
index 4f987e1308a..84e8ac83810 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
@@ -86,6 +86,7 @@ public:
 
     bool supportsDelete() const override { return true; }
 
+    /// To turn on the optimization optimize_trivial_approximate_count_query=1 should be set for a query.
     bool supportsTrivialCountOptimization() const override { return true; }
 
     std::optional<UInt64> totalRows(const Settings & settings) const override;
diff --git a/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference b/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference
index 7f298ea71ad..4598404dd40 100644
--- a/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference
+++ b/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference
@@ -1,6 +1,10 @@
 -- { echoOn }
 SELECT count() FROM dict SETTINGS optimize_trivial_approximate_count_query = 0, max_rows_to_read = 1; -- { serverError TOO_MANY_ROWS }
-SELECT count() FROM dict SETTINGS max_rows_to_read = 1;
-121
-SELECT total_rows FROM system.tables WHERE database = currentDatabase() AND name = 'dict';
+SELECT count() FROM dict SETTINGS optimize_trivial_approximate_count_query = 1, max_rows_to_read = 1;
 121
+SET optimize_trivial_approximate_count_query = 1;
+-- needs more data to see total_bytes or just detach and attach the table
+DETACH TABLE dict SYNC;
+ATTACH TABLE dict;
+SELECT total_rows, total_bytes > 0 FROM system.tables WHERE database = currentDatabase() AND name = 'dict' FORMAT CSV;
+121,1
diff --git a/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql b/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql
index b5780954d74..a770b153760 100644
--- a/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql
+++ b/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql
@@ -4,5 +4,9 @@ CREATE TABLE dict (key UInt64, value String) ENGINE = EmbeddedRocksDB PRIMARY KE
 INSERT INTO dict SELECT number, toString(number) FROM numbers(121);
 -- { echoOn }
 SELECT count() FROM dict SETTINGS optimize_trivial_approximate_count_query = 0, max_rows_to_read = 1; -- { serverError TOO_MANY_ROWS }
-SELECT count() FROM dict SETTINGS max_rows_to_read = 1;
-SELECT total_rows FROM system.tables WHERE database = currentDatabase() AND name = 'dict';
+SELECT count() FROM dict SETTINGS optimize_trivial_approximate_count_query = 1, max_rows_to_read = 1;
+SET optimize_trivial_approximate_count_query = 1;
+-- needs more data to see total_bytes or just detach and attach the table
+DETACH TABLE dict SYNC;
+ATTACH TABLE dict;
+SELECT total_rows, total_bytes > 0 FROM system.tables WHERE database = currentDatabase() AND name = 'dict' FORMAT CSV;

From 474f1ae0b6365e34cb591df98cba73afaa960cbb Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 21 Nov 2023 11:41:37 +0100
Subject: [PATCH 0842/1097] Better

---
 src/Storages/S3Queue/S3QueueTableMetadata.cpp |  4 +--
 src/Storages/S3Queue/StorageS3Queue.cpp       | 27 +++++++++++--------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
index f9c89f4d87d..104f70224b6 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
@@ -83,8 +83,8 @@ void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata
             ErrorCodes::METADATA_MISMATCH,
             "Existing table metadata in ZooKeeper differs in engine mode. "
             "Stored in ZooKeeper: {}, local: {}",
-            DB::toString(from_zk.after_processing),
-            DB::toString(after_processing));
+            DB::toString(from_zk.mode),
+            DB::toString(mode));
 
     if (s3queue_tracked_files_limit != from_zk.s3queue_tracked_files_limit)
         throw Exception(
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index d2cf074e6a1..17fa0d677e2 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -142,27 +142,32 @@ StorageS3Queue::StorageS3Queue(
     {
         storage_metadata.setColumns(columns_);
     }
+
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
-
-    createOrCheckMetadata(storage_metadata);
     setInMemoryMetadata(storage_metadata);
-
     virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
-    task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); });
 
     LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
+    task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); });
+
+    /// Get metadata manager from S3QueueMetadataFactory,
+    /// it will increase the ref count for the metadata object.
+    /// The ref count is decreased when StorageS3Queue::drop() method is called.
+    files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings);
+    try
+    {
+        createOrCheckMetadata(storage_metadata);
+    }
+    catch (...)
+    {
+        S3QueueMetadataFactory::instance().remove(zk_path);
+        throw;
+    }
 }
 
 void StorageS3Queue::startup()
 {
-    if (!files_metadata)
-    {
-        /// Get metadata manager from S3QueueMetadataFactory,
-        /// it will increase the ref count for the metadata object.
-        /// The ref count is decreased when StorageS3Queue::drop() method is called.
-        files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings);
-    }
     if (task)
         task->activateAndSchedule();
 }

From ad9d4a9bb83d2ad0e00a4f425e12f6e0f4c8188f Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 21 Nov 2023 11:53:43 +0100
Subject: [PATCH 0843/1097] Add system.s3queue_log to default config

---
 programs/server/config.xml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index 7800aa51166..251b83e26bf 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1248,6 +1248,15 @@
         <flush_interval_milliseconds>7500</flush_interval_milliseconds>
     </backup_log>
 
+    <!-- Storage S3Queue log.
+    -->
+    <s3queue_log>
+        <database>system</database>
+        <table>s3queue_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+    </s3queue_log>
+
     <!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
     <!-- Custom TLD lists.
          Format: <name>/path/to/file</name>

From 6851c1dbbc2778279782678e02cb170b9a6792b0 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 21 Nov 2023 11:15:07 +0000
Subject: [PATCH 0844/1097] Suppress bs clang-tidy warning

---
 src/Functions/fromDaysSinceYearZero.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp
index 4aa96fc8fbb..f065b3c2d20 100644
--- a/src/Functions/fromDaysSinceYearZero.cpp
+++ b/src/Functions/fromDaysSinceYearZero.cpp
@@ -76,8 +76,8 @@ public:
             return false;
         };
 
-        const bool success = try_type(UInt8{}) || try_type(UInt16{}) || try_type(UInt32{}) || try_type(UInt64{}) || try_type(Int8{})
-            || try_type(Int16{}) || try_type(Int32{}) || try_type(Int64{});
+        const bool success = try_type(UInt8{}) || try_type(UInt16{}) || try_type(UInt32{}) || try_type(UInt64{})
+                                || try_type(Int8{}) || try_type(Int16{}) || try_type(Int32{}) || try_type(Int64{});
 
         if (!success)
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column while execute function {}", getName());
@@ -99,7 +99,7 @@ public:
             auto raw_value = src_data[i];
             if (raw_value < 0)
                 throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Expected a non-negative integer, got: {}", std::to_string(raw_value));
-            auto value = static_cast<equivalent_integer>(raw_value);
+            auto value = static_cast<equivalent_integer>(raw_value); /// NOLINT(bugprone-signed-char-misuse,cert-str34-c)
             dst_data[i] = static_cast<RawReturnType>(value - ToDaysSinceYearZeroImpl::DAYS_BETWEEN_YEARS_0_AND_1970);
         }
     }

From 08b2740d0f7ef117ec2992cbab89038cf2606d6c Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 21 Nov 2023 11:26:14 +0000
Subject: [PATCH 0845/1097] Fix: supress test with tsan

---
 tests/integration/test_storage_rabbitmq/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index cb34f7203d6..f26a273fe5e 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -110,7 +110,7 @@ def rabbitmq_setup_teardown():
     ],
 )
 def test_rabbitmq_select(rabbitmq_cluster, secure):
-    if secure and instance.is_built_with_memory_sanitizer():
+    if secure and instance.is_built_with_thread_sanitizer():
         pytest.skip(
             "Data races: see https://github.com/ClickHouse/ClickHouse/issues/56866"
         )

From e9f6c398cf6bd1052a3cd194252580736e97c561 Mon Sep 17 00:00:00 2001
From: santrancisco <san.tran@ebfe.pw>
Date: Tue, 21 Nov 2023 22:26:34 +1100
Subject: [PATCH 0846/1097] fixing test

---
 .../02921_database_filesystem_path_check.reference  |  2 --
 .../02921_database_filesystem_path_check.sh         | 13 -------------
 .../02921_database_filesystem_path_check.sql        |  2 ++
 3 files changed, 2 insertions(+), 15 deletions(-)
 delete mode 100755 tests/queries/0_stateless/02921_database_filesystem_path_check.sh
 create mode 100755 tests/queries/0_stateless/02921_database_filesystem_path_check.sql

diff --git a/tests/queries/0_stateless/02921_database_filesystem_path_check.reference b/tests/queries/0_stateless/02921_database_filesystem_path_check.reference
index 21ebc92b43f..e69de29bb2d 100644
--- a/tests/queries/0_stateless/02921_database_filesystem_path_check.reference
+++ b/tests/queries/0_stateless/02921_database_filesystem_path_check.reference
@@ -1,2 +0,0 @@
-Path must be inside user-files path
-Path must be inside user-files path
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02921_database_filesystem_path_check.sh b/tests/queries/0_stateless/02921_database_filesystem_path_check.sh
deleted file mode 100755
index 8b60acbb47e..00000000000
--- a/tests/queries/0_stateless/02921_database_filesystem_path_check.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env bash
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-function get_exception_message()
-{
-  $CLICKHOUSE_CLIENT --query "$1" |& grep -o 'Path must be inside user-files path'
-}
-
-get_exception_message "create database db_filesystem ENGINE=Filesystem('/etc');"
-get_exception_message "create database db_filesystem ENGINE=Filesystem('../../../../../../../../etc')';"
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02921_database_filesystem_path_check.sql b/tests/queries/0_stateless/02921_database_filesystem_path_check.sql
new file mode 100755
index 00000000000..d62b629df7b
--- /dev/null
+++ b/tests/queries/0_stateless/02921_database_filesystem_path_check.sql
@@ -0,0 +1,2 @@
+create database db_filesystem ENGINE=Filesystem('/etc'); -- { serverError BAD_ARGUMENTS }
+create database db_filesystem ENGINE=Filesystem('../../../../../../../../etc'); -- { serverError BAD_ARGUMENTS }
\ No newline at end of file

From 844125b5de5eb8b4210a89bb63e9b8a3ded085c1 Mon Sep 17 00:00:00 2001
From: santrancisco <san.tran@ebfe.pw>
Date: Tue, 21 Nov 2023 22:43:34 +1100
Subject: [PATCH 0847/1097] Fix executable flag for styling

---
 .../queries/0_stateless/02921_database_filesystem_path_check.sql  | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 tests/queries/0_stateless/02921_database_filesystem_path_check.sql

diff --git a/tests/queries/0_stateless/02921_database_filesystem_path_check.sql b/tests/queries/0_stateless/02921_database_filesystem_path_check.sql
old mode 100755
new mode 100644

From 5e581a9fa4acd831c1bead88b010fed78e91c427 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 21 Nov 2023 19:47:39 +0800
Subject: [PATCH 0848/1097] optimize code

---
 src/Functions/getClientHTTPHeader.cpp                 | 11 ++++-------
 src/Interpreters/Context.cpp                          |  8 +++++---
 src/Interpreters/Context.h                            |  4 ++--
 .../0_stateless/02911_getHTTPHeaderFuncion.reference  |  5 ++---
 .../queries/0_stateless/02911_getHTTPHeaderFuncion.sh |  8 ++++----
 5 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/Functions/getClientHTTPHeader.cpp b/src/Functions/getClientHTTPHeader.cpp
index 9a144f2c877..073211635cb 100644
--- a/src/Functions/getClientHTTPHeader.cpp
+++ b/src/Functions/getClientHTTPHeader.cpp
@@ -83,23 +83,20 @@ public:
         auto result_column = ColumnString::create();
 
         const String default_value;
-        String forbidden_header_names = getContext()->getClientHTTPHeaderForbiddenHeaders();
-        std::vector<String> forbidden_header_list;
-        boost::split(forbidden_header_list, forbidden_header_names, [](char c) { return c == ','; });
-        String header_list;
+        const std::unordered_set<String> & forbidden_header_list = getContext()->getClientHTTPHeaderForbiddenHeaders();
 
         for (size_t row = 0; row < input_rows_count; ++row)
         {
             auto header_name = arg_string->getDataAt(row).toString();
 
-            if (!headers.has(header_name)) 
+            if (!headers.has(header_name))
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} is not in HTTP request headers.", header_name);
             else
             {
-                auto it = std::find(forbidden_header_list.begin(), forbidden_header_list.end(), header_name);
+                auto it = forbidden_header_list.find(header_name);
                 if (it != forbidden_header_list.end())
                     throw Exception(ErrorCodes::BAD_ARGUMENTS, "The header {} is in headers_forbidden_to_return_list, you can config it in config file.", header_name);
-                    
+
                 const String & value = headers[header_name];
                 result_column->insertData(value.data(), value.size());
             }
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 10bf54c8548..597aaa096f3 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -317,7 +317,7 @@ struct ContextSharedPart : boost::noncopyable
     std::optional<MergeTreeSettings> merge_tree_settings TSA_GUARDED_BY(mutex);   /// Settings of MergeTree* engines.
     std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex);   /// Settings of ReplicatedMergeTree* engines.
     std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
-    String get_client_http_header_forbidden_headers;
+    std::unordered_set<String> get_client_http_header_forbidden_headers;
     bool allow_get_client_http_header;
     std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
     /// No lock required for format_schema_path modified only during initialization
@@ -3903,7 +3903,9 @@ void Context::checkTableCanBeDropped(const String & database, const String & tab
 
 void Context::setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers)
 {
-    shared->get_client_http_header_forbidden_headers = forbidden_headers;
+    std::unordered_set<String> forbidden_header_list;
+    boost::split(forbidden_header_list, forbidden_headers, [](char c) { return c == ','; });
+    shared->get_client_http_header_forbidden_headers = forbidden_header_list;
 }
 
 void Context::setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function)
@@ -3911,7 +3913,7 @@ void Context::setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function)
     shared->allow_get_client_http_header= allow_get_http_header_function;
 }
 
-String Context::getClientHTTPHeaderForbiddenHeaders() const
+const std::unordered_set<String> & Context::getClientHTTPHeaderForbiddenHeaders() const
 {
     return shared->get_client_http_header_forbidden_headers;
 }
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 4574f4d530e..933c7ca04ff 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1072,8 +1072,8 @@ public:
     size_t getMaxTableSizeToDrop() const;
     void setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers);
     /// Return the forbiddent headers that users cant get via getClientHTTPHeader function
-    String getClientHTTPHeaderForbiddenHeaders() const;
-    void setAllowGetHTTPHeaderFunction(const bool allow_get_http_header_function);
+    const std::unordered_set<String> & getClientHTTPHeaderForbiddenHeaders() const;
+    void setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function);
     bool allowGetHTTPHeaderFunction() const;
     void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size) const;
 
diff --git a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference
index c86cbdef7fe..e0f9ee39727 100644
--- a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference
+++ b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference
@@ -1,9 +1,8 @@
 value
 value1	value2
 value1	value1	value2
-BAD_ARGUMENTS
-BAD_ARGUMENTS
-BAD_ARGUMENTS
+NOT-FOUND-KEY is not in HTTP request headers
+FORBIDDEN-KEY1 is in headers_forbidden_to_return_list
 1	row1_value1	row1_value2	row1_value3	row1_value4	row1_value5	row1_value6	row1_value7
 2	row2_value1	row2_value2	row2_value3	row2_value4	row2_value5	row2_value6	row2_value7
 3							
diff --git a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
index 5b1de0ebe86..e81e2bf4350 100755
--- a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
+++ b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
@@ -14,11 +14,11 @@ echo "SELECT getClientHTTPHeader('test-' || 'key' || '-1'), getClientHTTPHeader(
 
 #Code: 36. DB::Exception: NOT-FOUND-KEY is not in HTTP request headers
 echo "SELECT getClientHTTPHeader('NOT-FOUND-KEY')"| curl -s -H 'X-Clickhouse-User: default' \
-    -H 'X-ClickHouse-Key: ' -H 'key1: value1' -H 'key2: value2' 'http://localhost:8123/' -d @- | grep -o -e BAD_ARGUMENTS
+    -H 'X-ClickHouse-Key: ' -H 'key1: value1' -H 'key2: value2' 'http://localhost:8123/' -d @- | grep -o -e "NOT-FOUND-KEY is not in HTTP request headers" 
 
-#Code: 36. DB::Exception: The header FORBIDDEN-KEY is in headers_forbidden_to_return_list, you can config it in config file.
-echo "SELECT getClientHTTPHeader('FORBIDDEN-KEY')" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' -H 'FORBIDDEN-KEY1: forbbiden1' 'http://localhost:8123/' -d @-  | grep -o -e BAD_ARGUMENTS
-echo "SELECT getClientHTTPHeader('FORBIDDEN-KEY')" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' -H 'FORBIDDEN-KEY2: forbbiden2' 'http://localhost:8123/' -d @-  | grep -o -e BAD_ARGUMENTS
+#Code: 36. DB::Exception: The header FORBIDDEN-KEY is in headers_forbidden_to_return, you can config it in config file.
+echo "SELECT getClientHTTPHeader('FORBIDDEN-KEY1')" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' \
+    -H 'FORBIDDEN-KEY1: forbbiden1' 'http://localhost:8123/' -d @- | grep -o -e "FORBIDDEN-KEY1 is in headers_forbidden_to_return_list"
 
 db_name=${CLICKHOUSE_DATABASE}
 

From 39b53e47d251b6541c96c996e38a28f6976a81c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 21 Nov 2023 19:50:09 +0800
Subject: [PATCH 0849/1097] Delete
 tests/02876_formats_with_names_dont_use_header_test_tvb024u3.csvwithnames

---
 ...ormats_with_names_dont_use_header_test_tvb024u3.csvwithnames | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 tests/02876_formats_with_names_dont_use_header_test_tvb024u3.csvwithnames

diff --git a/tests/02876_formats_with_names_dont_use_header_test_tvb024u3.csvwithnames b/tests/02876_formats_with_names_dont_use_header_test_tvb024u3.csvwithnames
deleted file mode 100644
index bfde6bfa0b8..00000000000
--- a/tests/02876_formats_with_names_dont_use_header_test_tvb024u3.csvwithnames
+++ /dev/null
@@ -1,2 +0,0 @@
-a,b,c
-1,2,3

From 2ae32d33838e4ebc953aa5899d497d9e706da5bb Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 21 Nov 2023 13:12:44 +0100
Subject: [PATCH 0850/1097] Update src/Functions/getClientHTTPHeader.cpp

---
 src/Functions/getClientHTTPHeader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/getClientHTTPHeader.cpp b/src/Functions/getClientHTTPHeader.cpp
index 073211635cb..23f6ca5aefa 100644
--- a/src/Functions/getClientHTTPHeader.cpp
+++ b/src/Functions/getClientHTTPHeader.cpp
@@ -95,7 +95,7 @@ public:
             {
                 auto it = forbidden_header_list.find(header_name);
                 if (it != forbidden_header_list.end())
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "The header {} is in headers_forbidden_to_return_list, you can config it in config file.", header_name);
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "The header {} is in get_client_http_header_forbidden_headers, you can config it in config file.", header_name);
 
                 const String & value = headers[header_name];
                 result_column->insertData(value.data(), value.size());

From 7630faa7a94d08f0553472c8eb9437d2fd973373 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 21 Nov 2023 13:12:52 +0100
Subject: [PATCH 0851/1097] Update
 tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh

---
 tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
index e81e2bf4350..505e017ee5d 100755
--- a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
+++ b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
@@ -18,7 +18,7 @@ echo "SELECT getClientHTTPHeader('NOT-FOUND-KEY')"| curl -s -H 'X-Clickhouse-Use
 
 #Code: 36. DB::Exception: The header FORBIDDEN-KEY is in headers_forbidden_to_return, you can config it in config file.
 echo "SELECT getClientHTTPHeader('FORBIDDEN-KEY1')" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' \
-    -H 'FORBIDDEN-KEY1: forbbiden1' 'http://localhost:8123/' -d @- | grep -o -e "FORBIDDEN-KEY1 is in headers_forbidden_to_return_list"
+    -H 'FORBIDDEN-KEY1: forbbiden1' 'http://localhost:8123/' -d @- | grep -o -e "FORBIDDEN-KEY1 is in get_client_http_header_forbidden_headers"
 
 db_name=${CLICKHOUSE_DATABASE}
 

From 83c6ac4f6c620971a307ca7fc741ee713da4ad22 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 21 Nov 2023 12:14:12 +0000
Subject: [PATCH 0852/1097] Remove storage as parameter dependency, only
 columns are needed

---
 src/Interpreters/InterpreterSelectQuery.cpp                | 2 +-
 src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp | 4 ++--
 src/Interpreters/getCustomKeyFilterForParallelReplicas.h   | 2 +-
 src/Planner/PlannerJoinTree.cpp                            | 2 +-
 src/Storages/StorageDistributed.cpp                        | 7 ++++++-
 5 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index f61f6745024..8d05ce8a877 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -579,7 +579,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                     settings.parallel_replica_offset,
                     std::move(custom_key_ast),
                     settings.parallel_replicas_custom_key_filter_type,
-                    *storage,
+                    storage->getInMemoryMetadataPtr()->columns,
                     context);
             }
             else if (settings.parallel_replica_offset > 0)
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
index 2a32d450497..2e9ee0af724 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
@@ -31,7 +31,7 @@ ASTPtr getCustomKeyFilterForParallelReplica(
     size_t replica_num,
     ASTPtr custom_key_ast,
     ParallelReplicasCustomKeyFilterType filter_type,
-    const IStorage & storage,
+    const ColumnsDescription & columns,
     const ContextPtr & context)
 {
     assert(replicas_count > 1);
@@ -49,7 +49,7 @@ ASTPtr getCustomKeyFilterForParallelReplica(
     assert(filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
 
     KeyDescription custom_key_description
-        = KeyDescription::getKeyFromAST(custom_key_ast, storage.getInMemoryMetadataPtr()->columns, context);
+        = KeyDescription::getKeyFromAST(custom_key_ast, columns, context);
 
     using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
 
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
index 543f1889b32..c35f00f3dfd 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
@@ -19,7 +19,7 @@ ASTPtr getCustomKeyFilterForParallelReplica(
     size_t replica_num,
     ASTPtr custom_key_ast,
     ParallelReplicasCustomKeyFilterType filter_type,
-    const IStorage & storage,
+    const ColumnsDescription & columns,
     const ContextPtr & context);
 
 ASTPtr parseCustomKeyForTable(const String & custom_keys, const Context & context);
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 7c1e6ded1e0..53a006c2eb3 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -485,7 +485,7 @@ FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage,
             settings.parallel_replica_offset,
             std::move(custom_key_ast),
             settings.parallel_replicas_custom_key_filter_type,
-            *storage,
+            storage->getInMemoryMetadataPtr()->columns,
             query_context);
 
     return buildFilterInfo(parallel_replicas_custom_filter_ast, table_expression_query_info.table_expression, planner_context);
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 94ce525bc38..9b075390bb0 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -896,7 +896,12 @@ void StorageDistributed::read(
                 [&, my_custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) -> ASTPtr
             {
                 return getCustomKeyFilterForParallelReplica(
-                    shard_count, shard_num - 1, my_custom_key_ast, settings.parallel_replicas_custom_key_filter_type, *this, local_context);
+                    shard_count,
+                    shard_num - 1,
+                    my_custom_key_ast,
+                    settings.parallel_replicas_custom_key_filter_type,
+                    this->getInMemoryMetadataPtr()->columns,
+                    local_context);
             };
         }
     }

From ee4d5d758f19e101e726e19025deac97e8389492 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 21 Nov 2023 12:15:12 +0000
Subject: [PATCH 0853/1097] Fix Nullptr dereference in partial merge join with
 joined_subquery_requires_alias = 0

---
 src/Interpreters/MergeJoin.cpp                |  4 +--
 .../02918_join_pm_lc_crash.reference          | 12 +++++++
 .../0_stateless/02918_join_pm_lc_crash.sql    | 31 +++++++++++++++++++
 3 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02918_join_pm_lc_crash.reference
 create mode 100644 tests/queries/0_stateless/02918_join_pm_lc_crash.sql

diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp
index e14cb544b10..30c62386ca3 100644
--- a/src/Interpreters/MergeJoin.cpp
+++ b/src/Interpreters/MergeJoin.cpp
@@ -530,16 +530,16 @@ MergeJoin::MergeJoin(std::shared_ptr<TableJoin> table_join_, const Block & right
     addConditionJoinColumn(right_sample_block, JoinTableSide::Right);
     JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys, right_columns_to_add);
 
+    const NameSet required_right_keys = table_join->requiredRightKeys();
     for (const auto & right_key : key_names_right)
     {
-        if (right_sample_block.getByName(right_key).type->lowCardinality())
+        if (required_right_keys.contains(right_key) && right_table_keys.getByName(right_key).type->lowCardinality())
             lowcard_right_keys.push_back(right_key);
     }
 
     JoinCommon::convertToFullColumnsInplace(right_table_keys);
     JoinCommon::convertToFullColumnsInplace(right_sample_block, key_names_right);
 
-    const NameSet required_right_keys = table_join->requiredRightKeys();
     for (const auto & column : right_table_keys)
         if (required_right_keys.contains(column.name))
             right_columns_to_add.insert(ColumnWithTypeAndName{nullptr, column.type, column.name});
diff --git a/tests/queries/0_stateless/02918_join_pm_lc_crash.reference b/tests/queries/0_stateless/02918_join_pm_lc_crash.reference
new file mode 100644
index 00000000000..7523f1c1774
--- /dev/null
+++ b/tests/queries/0_stateless/02918_join_pm_lc_crash.reference
@@ -0,0 +1,12 @@
+0
+
+0
+0
+
+0
+0
+
+\N
+0
+
+\N
diff --git a/tests/queries/0_stateless/02918_join_pm_lc_crash.sql b/tests/queries/0_stateless/02918_join_pm_lc_crash.sql
new file mode 100644
index 00000000000..123208ee981
--- /dev/null
+++ b/tests/queries/0_stateless/02918_join_pm_lc_crash.sql
@@ -0,0 +1,31 @@
+
+SET joined_subquery_requires_alias = 0, join_algorithm = 'partial_merge';
+
+SET allow_experimental_analyzer = 0, join_use_nulls = 0;
+
+SELECT * FROM (SELECT dummy AS val FROM system.one)
+JOIN (SELECT toLowCardinality(toNullable(dummy)) AS val
+FROM system.one GROUP BY val WITH TOTALS)
+USING (val);
+
+SET allow_experimental_analyzer = 0, join_use_nulls = 1;
+
+SELECT * FROM (SELECT dummy AS val FROM system.one)
+JOIN (SELECT toLowCardinality(toNullable(dummy)) AS val
+FROM system.one GROUP BY val WITH TOTALS)
+USING (val);
+
+SET allow_experimental_analyzer = 1, join_use_nulls = 0;
+
+SELECT * FROM (SELECT dummy AS val FROM system.one)
+JOIN (SELECT toLowCardinality(toNullable(dummy)) AS val
+FROM system.one GROUP BY val WITH TOTALS)
+USING (val);
+
+SET allow_experimental_analyzer = 1, join_use_nulls = 1;
+
+SELECT * FROM (SELECT dummy AS val FROM system.one)
+JOIN (SELECT toLowCardinality(toNullable(dummy)) AS val
+FROM system.one GROUP BY val WITH TOTALS)
+USING (val);
+

From 4b89d72a716572d6c9c310a6a646418eb56f150f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 21 Nov 2023 20:17:18 +0800
Subject: [PATCH 0854/1097] Update 02911_getHTTPHeaderFuncion.reference

---
 tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference
index e0f9ee39727..61effdb19c4 100644
--- a/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference
+++ b/tests/queries/0_stateless/02911_getHTTPHeaderFuncion.reference
@@ -2,7 +2,7 @@ value
 value1	value2
 value1	value1	value2
 NOT-FOUND-KEY is not in HTTP request headers
-FORBIDDEN-KEY1 is in headers_forbidden_to_return_list
+FORBIDDEN-KEY1 is in get_client_http_header_forbidden_headers
 1	row1_value1	row1_value2	row1_value3	row1_value4	row1_value5	row1_value6	row1_value7
 2	row2_value1	row2_value2	row2_value3	row2_value4	row2_value5	row2_value6	row2_value7
 3							

From 6052cfc9904d56e5fb1864f9f46faff2ab5abd35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 21 Nov 2023 20:34:11 +0800
Subject: [PATCH 0855/1097] Modified spelling

---
 src/Interpreters/Context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 45754a9074b..021da7cb2ec 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1074,7 +1074,7 @@ public:
     void setMaxTableSizeToDrop(size_t max_size);
     size_t getMaxTableSizeToDrop() const;
     void setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers);
-    /// Return the forbiddent headers that users cant get via getClientHTTPHeader function
+    /// Return the forbiddent headers that users can't get via getClientHTTPHeader function
     const std::unordered_set<String> & getClientHTTPHeaderForbiddenHeaders() const;
     void setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function);
     bool allowGetHTTPHeaderFunction() const;

From 2e1c142d6d968642636ef3ba3360f28ec4630dc3 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Tue, 21 Nov 2023 14:03:49 +0100
Subject: [PATCH 0856/1097] black style

---
 tests/integration/test_merge_tree_s3/test.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index 0856b1681f8..a3752323a53 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -851,7 +851,9 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name):
 def test_s3_engine_heavy_write_check_mem(
     cluster, broken_s3, node_name, in_flight_memory
 ):
-    pytest.skip("Disabled, will be fixed after https://github.com/ClickHouse/ClickHouse/issues/51152")
+    pytest.skip(
+        "Disabled, will be fixed after https://github.com/ClickHouse/ClickHouse/issues/51152"
+    )
 
     in_flight = in_flight_memory[0]
     memory = in_flight_memory[1]
@@ -874,7 +876,9 @@ def test_s3_engine_heavy_write_check_mem(
     broken_s3.setup_fake_multpartuploads()
     slow_responces = 10
     slow_timeout = 15
-    broken_s3.setup_slow_answers(10 * 1024 * 1024, timeout=slow_timeout, count=slow_responces)
+    broken_s3.setup_slow_answers(
+        10 * 1024 * 1024, timeout=slow_timeout, count=slow_responces
+    )
 
     query_id = f"INSERT_INTO_S3_ENGINE_QUERY_ID_{in_flight}"
     node.query(

From 47abb9e879ef694791f5667fe1eb9b3356346189 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 21 Nov 2023 14:07:14 +0100
Subject: [PATCH 0857/1097] Fix race condition in RemoteSource

---
 src/Processors/Sources/RemoteSource.cpp   | 23 +++++++++++------------
 src/QueryPipeline/RemoteQueryExecutor.cpp |  6 ++++++
 src/QueryPipeline/RemoteQueryExecutor.h   |  2 +-
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp
index 1aa1529fd10..dfa311a7079 100644
--- a/src/Processors/Sources/RemoteSource.cpp
+++ b/src/Processors/Sources/RemoteSource.cpp
@@ -35,6 +35,17 @@ RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation
             addTotalBytes(value.total_bytes_to_read);
         progress(value.read_rows, value.read_bytes);
     });
+
+    query_executor->setProfileInfoCallback([this](const ProfileInfo & info)
+    {
+        if (rows_before_limit)
+        {
+            if (info.hasAppliedLimit())
+                rows_before_limit->add(info.getRowsBeforeLimit());
+            else
+                manually_add_rows_before_limit_counter = true; /// Remote subquery doesn't contain a limit
+        }
+    });
 }
 
 RemoteSource::~RemoteSource() = default;
@@ -82,18 +93,6 @@ std::optional<Chunk> RemoteSource::tryGenerate()
 
     if (!was_query_sent)
     {
-        /// Get rows_before_limit result for remote query from ProfileInfo packet.
-        query_executor->setProfileInfoCallback([this](const ProfileInfo & info)
-        {
-            if (rows_before_limit)
-            {
-                if (info.hasAppliedLimit())
-                    rows_before_limit->add(info.getRowsBeforeLimit());
-                else
-                    manually_add_rows_before_limit_counter = true; /// Remote subquery doesn't contain a limit
-            }
-        });
-
         if (async_query_sending)
         {
             int fd_ = query_executor->sendQueryAsync();
diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index e4cfe6a4d35..414d5acdad1 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -771,10 +771,16 @@ bool RemoteQueryExecutor::hasThrownException() const
 
 void RemoteQueryExecutor::setProgressCallback(ProgressCallback callback)
 {
+    std::lock_guard guard(was_cancelled_mutex);
     progress_callback = std::move(callback);
 
     if (extension && extension->parallel_reading_coordinator)
         extension->parallel_reading_coordinator->setProgressCallback(progress_callback);
 }
 
+void RemoteQueryExecutor::setProfileInfoCallback(ProfileInfoCallback callback)
+{
+    std::lock_guard guard(was_cancelled_mutex);
+    profile_info_callback = std::move(callback);
+}
 }
diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h
index 8884ea091b6..49ea40bf4b6 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.h
+++ b/src/QueryPipeline/RemoteQueryExecutor.h
@@ -168,7 +168,7 @@ public:
     void setProgressCallback(ProgressCallback callback);
 
     /// Set callback for profile info. It will be called on ProfileInfo packet.
-    void setProfileInfoCallback(ProfileInfoCallback callback) { profile_info_callback = std::move(callback); }
+    void setProfileInfoCallback(ProfileInfoCallback callback);
 
     /// Set the query_id. For now, used by performance test to later find the query
     /// in the server query_log. Must be called before sending the query to the server.

From 90bb796d6373e096be2a6065134b365e11574ec5 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 21 Nov 2023 12:08:04 +0000
Subject: [PATCH 0858/1097] Small Keeper fixes

---
 src/Coordination/Changelog.cpp             | 27 ++++++++++++++++------
 src/Coordination/KeeperSnapshotManager.cpp |  6 +++++
 src/Coordination/KeeperStateManager.cpp    | 10 +++++++-
 src/Coordination/KeeperStateManager.h      |  1 +
 4 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index c28cc368ac0..f715a02db28 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -617,8 +617,13 @@ Changelog::Changelog(
 
     /// Load all files on changelog disks
 
+    std::unordered_set<DiskPtr> read_disks;
+
     const auto load_from_disk = [&](const auto & disk)
     {
+        if (read_disks.contains(disk))
+            return;
+
         LOG_TRACE(log, "Reading from disk {}", disk->getName());
         std::unordered_map<std::string, std::string> incomplete_files;
 
@@ -639,19 +644,25 @@ Changelog::Changelog(
         std::vector<std::string> changelog_files;
         for (auto it = disk->iterateDirectory(""); it->isValid(); it->next())
         {
-            if (it->name() == changelogs_detached_dir)
+            const auto & file_name = it->name();
+            if (file_name == changelogs_detached_dir)
                 continue;
 
-            if (it->name().starts_with(tmp_prefix))
+            if (file_name.starts_with(tmp_prefix))
             {
-                incomplete_files.emplace(it->name().substr(tmp_prefix.size()), it->path());
+                incomplete_files.emplace(file_name.substr(tmp_prefix.size()), it->path());
                 continue;
             }
 
-            if (clean_incomplete_file(it->path()))
-                continue;
-
-            changelog_files.push_back(it->path());
+            if (file_name.starts_with(DEFAULT_PREFIX))
+            {
+                if (!clean_incomplete_file(it->path()))
+                    changelog_files.push_back(it->path());
+            }
+            else
+            {
+                LOG_WARNING(log, "Unknown file found in log directory: {}", file_name);
+            }
         }
 
         for (const auto & changelog_file : changelog_files)
@@ -671,6 +682,8 @@ Changelog::Changelog(
 
         for (const auto & [name, path] : incomplete_files)
             disk->removeFile(path);
+
+        read_disks.insert(disk);
     };
 
     /// Load all files from old disks
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index a216272a9e1..98f490facf2 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -539,8 +539,12 @@ KeeperSnapshotManager::KeeperSnapshotManager(
     , storage_tick_time(storage_tick_time_)
     , keeper_context(keeper_context_)
 {
+    std::unordered_set<DiskPtr> read_disks;
     const auto load_snapshot_from_disk = [&](const auto & disk)
     {
+        if (read_disks.contains(disk))
+            return;
+
         LOG_TRACE(log, "Reading from disk {}", disk->getName());
         std::unordered_map<std::string, std::string> incomplete_files;
 
@@ -590,6 +594,8 @@ KeeperSnapshotManager::KeeperSnapshotManager(
 
         for (const auto & [name, path] : incomplete_files)
             disk->removeFile(path);
+
+        read_disks.insert(disk);
     };
 
     for (const auto & disk : keeper_context->getOldSnapshotDisks())
diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp
index 879a42f6258..efe8a0cb2bd 100644
--- a/src/Coordination/KeeperStateManager.cpp
+++ b/src/Coordination/KeeperStateManager.cpp
@@ -269,6 +269,7 @@ KeeperStateManager::KeeperStateManager(
 void KeeperStateManager::loadLogStore(uint64_t last_commited_index, uint64_t logs_to_keep)
 {
     log_store->init(last_commited_index, logs_to_keep);
+    log_store_initialized = true;
 }
 
 void KeeperStateManager::system_exit(const int /* exit_code */)
@@ -361,6 +362,8 @@ void KeeperStateManager::save_state(const nuraft::srv_state & state)
 
 nuraft::ptr<nuraft::srv_state> KeeperStateManager::read_state()
 {
+    chassert(log_store_initialized);
+
     const auto & old_path = getOldServerStatePath();
 
     auto disk = getStateFileDisk();
@@ -454,7 +457,12 @@ nuraft::ptr<nuraft::srv_state> KeeperStateManager::read_state()
         disk->removeFile(copy_lock_file);
     }
 
-    LOG_WARNING(logger, "No state was read");
+    if (log_store->next_slot() != 1)
+        LOG_ERROR(
+            logger,
+            "No state was read but Keeper contains data which indicates that the state file was lost. This is dangerous and can lead to "
+            "data loss.");
+
     return nullptr;
 }
 
diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h
index e402143c179..fd05261ac6c 100644
--- a/src/Coordination/KeeperStateManager.h
+++ b/src/Coordination/KeeperStateManager.h
@@ -121,6 +121,7 @@ private:
     mutable std::mutex configuration_wrapper_mutex;
     KeeperConfigurationWrapper configuration_wrapper TSA_GUARDED_BY(configuration_wrapper_mutex);
 
+    bool log_store_initialized = false;
     nuraft::ptr<KeeperLogStore> log_store;
 
     const String server_state_file_name;

From 66f6a6575f94501377e3e74a306f51ae191d427d Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 21 Nov 2023 13:29:04 +0000
Subject: [PATCH 0859/1097] Cleanup iteration: settings usage

---
 src/Client/ConnectionPoolWithFailover.cpp    | 40 +++++++++-----------
 src/Client/ConnectionPoolWithFailover.h      | 12 +++---
 src/Client/HedgedConnections.cpp             | 10 ++++-
 src/Client/HedgedConnectionsFactory.cpp      | 28 ++++++++------
 src/Client/HedgedConnectionsFactory.h        | 21 ++++++----
 src/Processors/QueryPlan/ReadFromRemote.cpp  |  4 +-
 src/Storages/Distributed/DistributedSink.cpp |  2 +-
 src/Storages/IStorageCluster.cpp             |  2 +-
 src/Storages/StorageDistributed.cpp          |  2 +-
 9 files changed, 68 insertions(+), 53 deletions(-)

diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp
index 4bbdc300d2b..26d1792266e 100644
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@@ -113,14 +113,14 @@ ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const
 }
 
 std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(const ConnectionTimeouts & timeouts,
-                                                                        const Settings * settings,
+                                                                        const Settings & settings,
                                                                         PoolMode pool_mode,
                                                                         AsyncCallback async_callback,
                                                                         std::optional<bool> skip_unavailable_endpoints)
 {
     TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
     {
-        return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback);
+        return tryGetEntry(pool, timeouts, fail_message, &settings, nullptr, async_callback);
     };
 
     std::vector<TryResult> results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints);
@@ -134,12 +134,12 @@ std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(const Co
 
 std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyForTableFunction(
     const ConnectionTimeouts & timeouts,
-    const Settings * settings,
+    const Settings & settings,
     PoolMode pool_mode)
 {
     TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
     {
-        return tryGetEntry(pool, timeouts, fail_message, settings);
+        return tryGetEntry(pool, timeouts, fail_message, &settings);
     };
 
     return getManyImpl(settings, pool_mode, try_get_entry);
@@ -147,34 +147,30 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
 
 std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyChecked(
     const ConnectionTimeouts & timeouts,
-    const Settings * settings, PoolMode pool_mode,
+    const Settings & settings, PoolMode pool_mode,
     const QualifiedTableName & table_to_check,
     AsyncCallback async_callback,
     std::optional<bool> skip_unavailable_endpoints)
 {
     TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
     {
-        return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback);
+        return tryGetEntry(pool, timeouts, fail_message, &settings, &table_to_check, async_callback);
     };
 
     return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints);
 }
 
-ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings)
+ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings & settings)
 {
-    size_t offset = 0;
     LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
-    if (settings)
-    {
-        offset = settings->load_balancing_first_offset % nested_pools.size();
-        load_balancing = LoadBalancing(settings->load_balancing);
-    }
+    const size_t offset = settings.load_balancing_first_offset % nested_pools.size();
+    load_balancing = LoadBalancing(settings.load_balancing);
 
     return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
 }
 
 std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(
-        const Settings * settings,
+        const Settings & settings,
         PoolMode pool_mode,
         const TryGetEntryFunc & try_get_entry,
         std::optional<bool> skip_unavailable_endpoints)
@@ -184,13 +180,11 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
                             "Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty");
 
     if (!skip_unavailable_endpoints.has_value())
-        skip_unavailable_endpoints = (settings && settings->skip_unavailable_shards);
+        skip_unavailable_endpoints = settings.skip_unavailable_shards;
 
     size_t min_entries = skip_unavailable_endpoints.value() ? 0 : 1;
 
-    size_t max_tries = (settings ?
-        size_t{settings->connections_with_failover_max_tries} :
-        size_t{DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES});
+    size_t max_tries = settings.connections_with_failover_max_tries;
     size_t max_entries;
     if (pool_mode == PoolMode::GET_ALL)
     {
@@ -200,14 +194,14 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
     else if (pool_mode == PoolMode::GET_ONE)
         max_entries = 1;
     else if (pool_mode == PoolMode::GET_MANY)
-        max_entries = settings ? size_t(settings->max_parallel_replicas) : 1;
+        max_entries = settings.max_parallel_replicas;
     else
         throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode");
 
     GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
 
-    UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
-    bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
+    UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value;
+    bool fallback_to_stale_replicas = settings.fallback_to_stale_replicas_for_distributed_queries.value;
 
     return Base::getMany(min_entries, max_entries, max_tries,
         max_ignored_errors, fallback_to_stale_replicas,
@@ -252,10 +246,10 @@ ConnectionPoolWithFailover::tryGetEntry(
     return result;
 }
 
-std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> ConnectionPoolWithFailover::getShuffledPools(const Settings * settings)
+std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> ConnectionPoolWithFailover::getShuffledPools(const Settings & settings)
 {
     GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
-    UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
+    UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value;
     return Base::getShuffledPools(max_ignored_errors, get_priority);
 }
 
diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h
index 72a441fe3d6..d2ccea3371c 100644
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@@ -54,13 +54,13 @@ public:
       * Connections provide access to different replicas of one shard.
       */
     std::vector<Entry> getMany(const ConnectionTimeouts & timeouts,
-                               const Settings * settings, PoolMode pool_mode,
+                               const Settings & settings, PoolMode pool_mode,
                                AsyncCallback async_callback = {},
                                std::optional<bool> skip_unavailable_endpoints = std::nullopt);
 
     /// The same as getMany(), but return std::vector<TryResult>.
     std::vector<TryResult> getManyForTableFunction(const ConnectionTimeouts & timeouts,
-                                                   const Settings * settings, PoolMode pool_mode);
+                                                   const Settings & settings, PoolMode pool_mode);
 
     using Base = PoolWithFailoverBase<IConnectionPool>;
     using TryResult = Base::TryResult;
@@ -69,7 +69,7 @@ public:
     /// Delay threshold is taken from settings.
     std::vector<TryResult> getManyChecked(
             const ConnectionTimeouts & timeouts,
-            const Settings * settings,
+            const Settings & settings,
             PoolMode pool_mode,
             const QualifiedTableName & table_to_check,
             AsyncCallback async_callback = {},
@@ -86,7 +86,7 @@ public:
     using Status = std::vector<NestedPoolStatus>;
     Status getStatus() const;
 
-    std::vector<Base::ShuffledPool> getShuffledPools(const Settings * settings);
+    std::vector<Base::ShuffledPool> getShuffledPools(const Settings & settings);
 
     size_t getMaxErrorCup() const { return Base::max_error_cap; }
 
@@ -98,7 +98,7 @@ public:
 private:
     /// Get the values of relevant settings and call Base::getMany()
     std::vector<TryResult> getManyImpl(
-            const Settings * settings,
+            const Settings & settings,
             PoolMode pool_mode,
             const TryGetEntryFunc & try_get_entry,
             std::optional<bool> skip_unavailable_endpoints = std::nullopt);
@@ -114,7 +114,7 @@ private:
             const QualifiedTableName * table_to_check = nullptr,
             AsyncCallback async_callback = {});
 
-    GetPriorityFunc makeGetPriorityFunc(const Settings * settings);
+    GetPriorityFunc makeGetPriorityFunc(const Settings & settings);
 
     GetPriorityForLoadBalancing get_priority_load_balancing;
 };
diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp
index 7d723d02347..0c69d7712ea 100644
--- a/src/Client/HedgedConnections.cpp
+++ b/src/Client/HedgedConnections.cpp
@@ -29,7 +29,15 @@ HedgedConnections::HedgedConnections(
     PoolMode pool_mode,
     std::shared_ptr<QualifiedTableName> table_to_check_,
     AsyncCallback async_callback)
-    : hedged_connections_factory(pool_, &context_->getSettingsRef(), timeouts_, table_to_check_)
+    : hedged_connections_factory(
+          pool_,
+          context_->getSettingsRef(),
+          timeouts_,
+          context_->getSettingsRef().connections_with_failover_max_tries.value,
+          context_->getSettingsRef().fallback_to_stale_replicas_for_distributed_queries.value,
+          context_->getSettingsRef().max_parallel_replicas.value,
+          context_->getSettingsRef().skip_unavailable_shards.value,
+          table_to_check_)
     , context(std::move(context_))
     , settings(context->getSettingsRef())
     , throttler(throttler_)
diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp
index eb2a33b1ccc..9c072ca9b51 100644
--- a/src/Client/HedgedConnectionsFactory.cpp
+++ b/src/Client/HedgedConnectionsFactory.cpp
@@ -23,19 +23,25 @@ namespace ErrorCodes
 
 HedgedConnectionsFactory::HedgedConnectionsFactory(
     const ConnectionPoolWithFailoverPtr & pool_,
-    const Settings * settings_,
+    const Settings & settings_,
     const ConnectionTimeouts & timeouts_,
+    UInt64 max_tries_,
+    bool fallback_to_stale_replicas_,
+    UInt64 max_parallel_replicas_,
+    bool skip_unavailable_shards_,
     std::shared_ptr<QualifiedTableName> table_to_check_)
-    : pool(pool_), settings(settings_), timeouts(timeouts_), table_to_check(table_to_check_), log(&Poco::Logger::get("HedgedConnectionsFactory"))
+    : pool(pool_)
+    , timeouts(timeouts_)
+    , table_to_check(table_to_check_)
+    , log(&Poco::Logger::get("HedgedConnectionsFactory"))
+    , max_tries(max_tries_)
+    , fallback_to_stale_replicas(fallback_to_stale_replicas_)
+    , max_parallel_replicas(max_parallel_replicas_)
+    , skip_unavailable_shards(skip_unavailable_shards_)
 {
-    shuffled_pools = pool->getShuffledPools(settings);
+    shuffled_pools = pool->getShuffledPools(settings_);
     for (auto shuffled_pool : shuffled_pools)
-        replicas.emplace_back(std::make_unique<ConnectionEstablisherAsync>(shuffled_pool.pool, &timeouts, settings, log, table_to_check.get()));
-
-    max_tries
-        = (settings ? size_t{settings->connections_with_failover_max_tries} : size_t{DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES});
-
-    fallback_to_stale_replicas = settings && settings->fallback_to_stale_replicas_for_distributed_queries;
+        replicas.emplace_back(std::make_unique<ConnectionEstablisherAsync>(shuffled_pool.pool, &timeouts, &settings_, log, table_to_check.get()));
 }
 
 HedgedConnectionsFactory::~HedgedConnectionsFactory()
@@ -55,7 +61,7 @@ HedgedConnectionsFactory::~HedgedConnectionsFactory()
 
 std::vector<Connection *> HedgedConnectionsFactory::getManyConnections(PoolMode pool_mode, AsyncCallback async_callback)
 {
-    size_t min_entries = (settings && settings->skip_unavailable_shards) ? 0 : 1;
+    size_t min_entries = skip_unavailable_shards ? 0 : 1;
 
     size_t max_entries = 1;
     switch (pool_mode)
@@ -73,7 +79,7 @@ std::vector<Connection *> HedgedConnectionsFactory::getManyConnections(PoolMode
         }
         case PoolMode::GET_MANY:
         {
-            max_entries = settings ? size_t(settings->max_parallel_replicas) : 1;
+            max_entries = max_parallel_replicas;
             break;
         }
     }
diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h
index 009e7e868ab..e41ac9767a5 100644
--- a/src/Client/HedgedConnectionsFactory.h
+++ b/src/Client/HedgedConnectionsFactory.h
@@ -45,10 +45,15 @@ public:
         bool is_ready = false;
     };
 
-    HedgedConnectionsFactory(const ConnectionPoolWithFailoverPtr & pool_,
-                        const Settings * settings_,
-                        const ConnectionTimeouts & timeouts_,
-                        std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr);
+    HedgedConnectionsFactory(
+        const ConnectionPoolWithFailoverPtr & pool_,
+        const Settings & settings_,
+        const ConnectionTimeouts & timeouts_,
+        UInt64 max_tries_,
+        bool fallback_to_stale_replicas_,
+        UInt64 max_parallel_replicas_,
+        bool skip_unavailable_shards_,
+        std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr);
 
     /// Create and return active connections according to pool_mode.
     std::vector<Connection *> getManyConnections(PoolMode pool_mode, AsyncCallback async_callback = {});
@@ -109,7 +114,6 @@ private:
     bool isTwoLevelAggregationIncompatible(Connection * connection);
 
     const ConnectionPoolWithFailoverPtr pool;
-    const Settings * settings;
     const ConnectionTimeouts timeouts;
 
     std::vector<ShuffledPool> shuffled_pools;
@@ -127,13 +131,13 @@ private:
 
     std::shared_ptr<QualifiedTableName> table_to_check;
     int last_used_index = -1;
-    bool fallback_to_stale_replicas;
     Epoll epoll;
     Poco::Logger * log;
     std::string fail_messages;
 
     /// The maximum number of attempts to connect to replicas.
-    size_t max_tries;
+    const size_t max_tries;
+    const bool fallback_to_stale_replicas;
     /// Total number of established connections.
     size_t entries_count = 0;
     /// The number of established connections that are usable.
@@ -152,6 +156,9 @@ private:
     /// The number of requested in startNewConnection replicas (it's needed for
     /// checking the number of requested replicas that are still in process).
     size_t requested_connections_count = 0;
+
+    const size_t max_parallel_replicas = 0;
+    const bool skip_unavailable_shards = 0;
 };
 
 }
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index ceb9d29d27a..cd88f5cc93a 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -162,10 +162,10 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream
         try
         {
             if (my_table_func_ptr)
-                try_results = my_shard.shard_info.pool->getManyForTableFunction(timeouts, &current_settings, PoolMode::GET_MANY);
+                try_results = my_shard.shard_info.pool->getManyForTableFunction(timeouts, current_settings, PoolMode::GET_MANY);
             else
                 try_results = my_shard.shard_info.pool->getManyChecked(
-                    timeouts, &current_settings, PoolMode::GET_MANY,
+                    timeouts, current_settings, PoolMode::GET_MANY,
                     my_shard.main_table ? my_shard.main_table.getQualifiedName() : my_main_table.getQualifiedName());
         }
         catch (const Exception & ex)
diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index 65a4aa2741a..6cf3a2542bd 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -373,7 +373,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "There are several writing job for an automatically replicated shard");
 
                     /// TODO: it make sense to rewrite skip_unavailable_shards and max_parallel_replicas here
-                    auto results = shard_info.pool->getManyChecked(timeouts, &settings, PoolMode::GET_ONE, main_table.getQualifiedName());
+                    auto results = shard_info.pool->getManyChecked(timeouts, settings, PoolMode::GET_ONE, main_table.getQualifiedName());
                     if (results.empty() || results.front().entry.isNull())
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected exactly one connection for shard {}", toString(job.shard_index));
 
diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp
index 8bbe38b6949..1447dad1374 100644
--- a/src/Storages/IStorageCluster.cpp
+++ b/src/Storages/IStorageCluster.cpp
@@ -94,7 +94,7 @@ Pipe IStorageCluster::read(
     auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
     for (const auto & shard_info : cluster->getShardsInfo())
     {
-        auto try_results = shard_info.pool->getMany(timeouts, &current_settings, PoolMode::GET_MANY);
+        auto try_results = shard_info.pool->getMany(timeouts, current_settings, PoolMode::GET_MANY);
         for (auto & try_result : try_results)
         {
             auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 9b075390bb0..ba4741aefd8 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1040,7 +1040,7 @@ std::optional<QueryPipeline> StorageDistributed::distributedWriteBetweenDistribu
         else
         {
             auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
-            auto connections = shard_info.pool->getMany(timeouts, &settings, PoolMode::GET_ONE);
+            auto connections = shard_info.pool->getMany(timeouts, settings, PoolMode::GET_ONE);
             if (connections.empty() || connections.front().isNull())
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected exactly one connection for shard {}",
                     shard_info.shard_num);

From 2235adcae53973660ad2c3969df0c33dd352f634 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Mon, 30 Oct 2023 11:08:23 -0700
Subject: [PATCH 0860/1097] Implemented series periods detect method using
 pocketfft lib

---
 .gitmodules                                   |   4 +
 contrib/CMakeLists.txt                        |   1 +
 contrib/pocketfft                             |   1 +
 contrib/pocketfft-cmake/CMakeLists.txt        |   3 +
 src/CMakeLists.txt                            |   2 +
 src/Functions/CMakeLists.txt                  |   1 +
 src/Functions/seriesPeriodDetect.cpp          | 152 ++++++++++++++++++
 .../02813_series_period_detect.reference      |   1 +
 .../02813_series_period_detect.sql            |   3 +
 9 files changed, 168 insertions(+)
 create mode 160000 contrib/pocketfft
 create mode 100644 contrib/pocketfft-cmake/CMakeLists.txt
 create mode 100644 src/Functions/seriesPeriodDetect.cpp
 create mode 100644 tests/queries/0_stateless/02813_series_period_detect.reference
 create mode 100644 tests/queries/0_stateless/02813_series_period_detect.sql

diff --git a/.gitmodules b/.gitmodules
index 1a464ee1170..8cf62822c85 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -354,3 +354,7 @@
 [submodule "contrib/aklomp-base64"]
 	path = contrib/aklomp-base64
 	url = https://github.com/aklomp/base64.git
+[submodule "contrib/pocketfft"]
+	path = contrib/pocketfft
+	url = https://gitlab.mpcdf.mpg.de/mtr/pocketfft.git
+	branch = cpp
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 6b38d16bf63..a8f0705df88 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -44,6 +44,7 @@ else ()
 endif ()
 add_contrib (miniselect-cmake miniselect)
 add_contrib (pdqsort-cmake pdqsort)
+add_contrib (pocketfft-cmake pocketfft)
 add_contrib (crc32-vpmsum-cmake crc32-vpmsum)
 add_contrib (sparsehash-c11-cmake sparsehash-c11)
 add_contrib (abseil-cpp-cmake abseil-cpp)
diff --git a/contrib/pocketfft b/contrib/pocketfft
new file mode 160000
index 00000000000..128707fc745
--- /dev/null
+++ b/contrib/pocketfft
@@ -0,0 +1 @@
+Subproject commit 128707fc745348d9dae5f1e37cd289aa31571dce
diff --git a/contrib/pocketfft-cmake/CMakeLists.txt b/contrib/pocketfft-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..4d22a9d6cd0
--- /dev/null
+++ b/contrib/pocketfft-cmake/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_library(_pocketfft INTERFACE)
+target_include_directories(_pocketfft INTERFACE ${ClickHouse_SOURCE_DIR}/contrib/pocketfft)
+add_library(ch_contrib::pocketfft ALIAS _pocketfft)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3733295e9b4..60d20c65ab8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -435,6 +435,8 @@ dbms_target_link_libraries(PRIVATE ch_contrib::zstd)
 target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::zstd)
 target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::xz)
 
+target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::pocketfft)
+
 if (TARGET ch_contrib::icu)
     dbms_target_link_libraries (PRIVATE ch_contrib::icu)
 endif ()
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 57904a8ca1c..c1efea43636 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -43,6 +43,7 @@ list (APPEND PRIVATE_LIBS
         boost::filesystem
         divide_impl
         ch_contrib::xxHash
+        ch_contrib::pocketfft
 )
 
 if (TARGET ch_rust::blake3)
diff --git a/src/Functions/seriesPeriodDetect.cpp b/src/Functions/seriesPeriodDetect.cpp
new file mode 100644
index 00000000000..903733e2833
--- /dev/null
+++ b/src/Functions/seriesPeriodDetect.cpp
@@ -0,0 +1,152 @@
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wshadow"
+#pragma clang diagnostic ignored "-Wextra-semi-stmt"
+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#include <pocketfft_hdronly.h>
+#pragma clang diagnostic pop
+
+#include <Functions/IFunction.h>
+#include <Functions/FunctionFactory.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnArray.h>
+
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int ILLEGAL_COLUMN;
+}
+
+class FunctionSeriesPeriodDetect : public IFunction
+{
+public:
+    static constexpr auto name = "seriesPeriodDetect";
+
+    static FunctionPtr create(ContextPtr)
+    {
+        return std::make_shared<FunctionSeriesPeriodDetect>();
+    }
+
+    std::string getName() const override
+    {
+        return name;
+    }
+
+    size_t getNumberOfArguments() const override { return 1; }
+
+    bool isVariadic() const override { return false; }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
+    {
+        return std::make_shared<DataTypeFloat64>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
+    {
+        ColumnPtr array_ptr = arguments[0].column;
+        const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
+        const IColumn & src_data = array->getData();
+        //const ColumnArray::Offsets & offsets = array->getOffsets();
+
+        /* const ColumnPtr & col = arguments[0].column;
+        const ColumnVector<Int64> * src_col = checkAndGetColumn<ColumnVector<Int64>>(col.get());
+        const PaddedPODArray<Int64> & values = src_col->getData(); */
+
+        
+
+        auto res = ColumnFloat64::create(1);
+        auto & res_data = res->getData();
+
+        Float64 period;
+
+        if(executeNumber<UInt8>(src_data,  period)
+        || executeNumber<UInt16>(src_data, period)
+        || executeNumber<UInt32>(src_data,  period)
+        || executeNumber<UInt64>(src_data,  period)
+        || executeNumber<Int8>(src_data,  period)
+        || executeNumber<Int16>(src_data,  period)
+        || executeNumber<Int32>(src_data,  period)
+        || executeNumber<Int64>(src_data,  period)
+        || executeNumber<Float32>(src_data,  period)
+        || executeNumber<Float64>(src_data,  period)){
+            res_data[0] = period;
+            return res;
+        }
+        else
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {} of first argument of function {}",
+                    arguments[0].column->getName(), getName());
+    }
+
+    template <typename T>  
+    bool executeNumber(const IColumn & src_data, Float64 &period) const
+    {
+        if(const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data))
+        {
+            const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
+
+            size_t len = src_vec.size();
+            std::vector<Float64> src(src_vec.begin(), src_vec.end());
+            std::vector<std::complex<double>> out((len/2)+1);
+
+            pocketfft::shape_t shape{static_cast<size_t>(len)};
+
+            pocketfft::shape_t axes;
+            for (size_t i=0; i<shape.size(); ++i)
+                axes.push_back(i);
+
+            pocketfft::stride_t stride_src{ sizeof(double)};
+            pocketfft::stride_t stride_out{ sizeof(std::complex<double>) };
+
+            pocketfft::r2c(shape,
+                stride_src,
+                stride_out,
+                axes,
+                pocketfft::FORWARD,
+                src.data(),
+                out.data(),
+                static_cast<double>(1));
+
+            size_t specLen = (len/2 - (len%2 == 0 ? 1 : 0));  //removing the nyquist element when len is even
+
+            double maxMag = 0;
+            size_t idx = 0;
+            for(size_t i = 1; i < specLen; ++i){
+                double magnitude =  sqrt(out[i].real() * out[i].real() + out[i].imag() * out[i].imag());
+                if(magnitude > maxMag){
+                    maxMag = magnitude;
+                    idx = i;
+                }
+            }
+
+            std::vector<double> xfreq(specLen);
+            double step = 0.5 / (specLen-1);
+            for(size_t i = 0; i < specLen; ++i)
+                xfreq[i] = i * step;
+
+            auto freq = xfreq[idx];
+
+            period = (1/freq);
+            return true;
+        }
+        else
+            return false;
+    }
+
+};
+
+REGISTER_FUNCTION(SeriesPeriodDetect)
+{
+    factory.registerFunction<FunctionSeriesPeriodDetect>();
+}
+}
+
diff --git a/tests/queries/0_stateless/02813_series_period_detect.reference b/tests/queries/0_stateless/02813_series_period_detect.reference
new file mode 100644
index 00000000000..8351c19397f
--- /dev/null
+++ b/tests/queries/0_stateless/02813_series_period_detect.reference
@@ -0,0 +1 @@
+14
diff --git a/tests/queries/0_stateless/02813_series_period_detect.sql b/tests/queries/0_stateless/02813_series_period_detect.sql
new file mode 100644
index 00000000000..2dd8a4f39d8
--- /dev/null
+++ b/tests/queries/0_stateless/02813_series_period_detect.sql
@@ -0,0 +1,3 @@
+SELECT seriesPeriodDetect([139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97, 156, 94, 149, 95, 140, 77, 61, 50, 54, 47, 133, 72, 152, 94, 148, 105, 162, 101, 160, 87, 63, 53, 55, 54, 151, 103, 189, 108, 183, 113, 175, 113, 178, 90, 71, 62, 62, 65, 165, 109, 181, 115, 182, 121, 178, 114, 170]);
+SELECT seriesPeriodDetect([NULL, NULL, NULL]); -- { serverError 44 }
+SELECT seriesPeriodDetect([10,20,30,10,202,30,NULL]); -- { serverError 44 }
\ No newline at end of file

From c4c12509eeba3970c510d12f162f2277f3b5da8b Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Tue, 31 Oct 2023 07:11:54 -0700
Subject: [PATCH 0861/1097] removed commented code and fixed a bug

---
 src/Functions/seriesPeriodDetect.cpp | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/Functions/seriesPeriodDetect.cpp b/src/Functions/seriesPeriodDetect.cpp
index 903733e2833..33f0009e66f 100644
--- a/src/Functions/seriesPeriodDetect.cpp
+++ b/src/Functions/seriesPeriodDetect.cpp
@@ -10,8 +10,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnArray.h>
-
-
+#include <cmath>
 
 namespace DB
 {
@@ -54,14 +53,7 @@ public:
         ColumnPtr array_ptr = arguments[0].column;
         const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
         const IColumn & src_data = array->getData();
-        //const ColumnArray::Offsets & offsets = array->getOffsets();
-
-        /* const ColumnPtr & col = arguments[0].column;
-        const ColumnVector<Int64> * src_col = checkAndGetColumn<ColumnVector<Int64>>(col.get());
-        const PaddedPODArray<Int64> & values = src_col->getData(); */
-
-        
-
+       
         auto res = ColumnFloat64::create(1);
         auto & res_data = res->getData();
 
@@ -135,7 +127,7 @@ public:
 
             auto freq = xfreq[idx];
 
-            period = (1/freq);
+            period = std::round(1/freq);
             return true;
         }
         else

From 3fa104ec115492b8153b8f510f0ad432648e3348 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Tue, 31 Oct 2023 11:02:30 -0700
Subject: [PATCH 0862/1097] format file

---
 src/Functions/seriesPeriodDetect.cpp | 92 +++++++++++-----------------
 1 file changed, 36 insertions(+), 56 deletions(-)

diff --git a/src/Functions/seriesPeriodDetect.cpp b/src/Functions/seriesPeriodDetect.cpp
index 33f0009e66f..4cb7efb9257 100644
--- a/src/Functions/seriesPeriodDetect.cpp
+++ b/src/Functions/seriesPeriodDetect.cpp
@@ -2,22 +2,22 @@
 #pragma clang diagnostic ignored "-Wshadow"
 #pragma clang diagnostic ignored "-Wextra-semi-stmt"
 #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
-#include <pocketfft_hdronly.h>
+#include "pocketfft_hdronly.h"
 #pragma clang diagnostic pop
 
-#include <Functions/IFunction.h>
-#include <Functions/FunctionFactory.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnArray.h>
 #include <cmath>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunction.h>
 
 namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
-    extern const int ILLEGAL_COLUMN;
+extern const int LOGICAL_ERROR;
+extern const int ILLEGAL_COLUMN;
 }
 
 class FunctionSeriesPeriodDetect : public IFunction
@@ -25,15 +25,9 @@ class FunctionSeriesPeriodDetect : public IFunction
 public:
     static constexpr auto name = "seriesPeriodDetect";
 
-    static FunctionPtr create(ContextPtr)
-    {
-        return std::make_shared<FunctionSeriesPeriodDetect>();
-    }
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesPeriodDetect>(); }
 
-    std::string getName() const override
-    {
-        return name;
-    }
+    std::string getName() const override { return name; }
 
     size_t getNumberOfArguments() const override { return 1; }
 
@@ -43,32 +37,24 @@ public:
 
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
-    {
-        return std::make_shared<DataTypeFloat64>();
-    }
+    DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared<DataTypeFloat64>(); }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
     {
         ColumnPtr array_ptr = arguments[0].column;
         const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
         const IColumn & src_data = array->getData();
-       
+
         auto res = ColumnFloat64::create(1);
         auto & res_data = res->getData();
 
         Float64 period;
 
-        if(executeNumber<UInt8>(src_data,  period)
-        || executeNumber<UInt16>(src_data, period)
-        || executeNumber<UInt32>(src_data,  period)
-        || executeNumber<UInt64>(src_data,  period)
-        || executeNumber<Int8>(src_data,  period)
-        || executeNumber<Int16>(src_data,  period)
-        || executeNumber<Int32>(src_data,  period)
-        || executeNumber<Int64>(src_data,  period)
-        || executeNumber<Float32>(src_data,  period)
-        || executeNumber<Float64>(src_data,  period)){
+        if (executeNumber<UInt8>(src_data, period) || executeNumber<UInt16>(src_data, period) || executeNumber<UInt32>(src_data, period)
+            || executeNumber<UInt64>(src_data, period) || executeNumber<Int8>(src_data, period) || executeNumber<Int16>(src_data, period)
+            || executeNumber<Int32>(src_data, period) || executeNumber<Int64>(src_data, period) || executeNumber<Float32>(src_data, period)
+            || executeNumber<Float64>(src_data, period))
+        {
             res_data[0] = period;
             return res;
         }
@@ -76,64 +62,59 @@ public:
             throw Exception(
                 ErrorCodes::ILLEGAL_COLUMN,
                 "Illegal column {} of first argument of function {}",
-                    arguments[0].column->getName(), getName());
+                arguments[0].column->getName(),
+                getName());
     }
 
-    template <typename T>  
-    bool executeNumber(const IColumn & src_data, Float64 &period) const
+    template <typename T>
+    bool executeNumber(const IColumn & src_data, Float64 & period) const
     {
-        if(const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data))
+        if (const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data))
         {
             const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
 
             size_t len = src_vec.size();
             std::vector<Float64> src(src_vec.begin(), src_vec.end());
-            std::vector<std::complex<double>> out((len/2)+1);
+            std::vector<std::complex<double>> out((len / 2) + 1);
 
             pocketfft::shape_t shape{static_cast<size_t>(len)};
 
             pocketfft::shape_t axes;
-            for (size_t i=0; i<shape.size(); ++i)
+            for (size_t i = 0; i < shape.size(); ++i)
                 axes.push_back(i);
 
-            pocketfft::stride_t stride_src{ sizeof(double)};
-            pocketfft::stride_t stride_out{ sizeof(std::complex<double>) };
+            pocketfft::stride_t stride_src{sizeof(double)};
+            pocketfft::stride_t stride_out{sizeof(std::complex<double>)};
 
-            pocketfft::r2c(shape,
-                stride_src,
-                stride_out,
-                axes,
-                pocketfft::FORWARD,
-                src.data(),
-                out.data(),
-                static_cast<double>(1));
+            pocketfft::r2c(shape, stride_src, stride_out, axes, pocketfft::FORWARD, src.data(), out.data(), static_cast<double>(1));
 
-            size_t specLen = (len/2 - (len%2 == 0 ? 1 : 0));  //removing the nyquist element when len is even
+            size_t specLen = (len / 2 - (len % 2 == 0 ? 1 : 0)); //removing the nyquist element when len is even
 
             double maxMag = 0;
             size_t idx = 0;
-            for(size_t i = 1; i < specLen; ++i){
-                double magnitude =  sqrt(out[i].real() * out[i].real() + out[i].imag() * out[i].imag());
-                if(magnitude > maxMag){
+            for (size_t i = 1; i < specLen; ++i)
+            {
+                double magnitude = sqrt(out[i].real() * out[i].real() + out[i].imag() * out[i].imag());
+                if (magnitude > maxMag)
+                {
                     maxMag = magnitude;
                     idx = i;
                 }
             }
 
             std::vector<double> xfreq(specLen);
-            double step = 0.5 / (specLen-1);
-            for(size_t i = 0; i < specLen; ++i)
+            double step = 0.5 / (specLen - 1);
+            for (size_t i = 0; i < specLen; ++i)
                 xfreq[i] = i * step;
 
             auto freq = xfreq[idx];
 
-            period = std::round(1/freq);
+            period = std::round(1 / freq);
             return true;
         }
         else
             return false;
     }
-
 };
 
 REGISTER_FUNCTION(SeriesPeriodDetect)
@@ -141,4 +122,3 @@ REGISTER_FUNCTION(SeriesPeriodDetect)
     factory.registerFunction<FunctionSeriesPeriodDetect>();
 }
 }
-

From e3826786f189b94445fa7b8e828db5f41c42e55b Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 2 Nov 2023 13:37:07 -0700
Subject: [PATCH 0863/1097] Added testcases, documentation and addressed
 comments

---
 .gitmodules                                   |  1 -
 contrib/pocketfft-cmake/CMakeLists.txt        |  7 +++
 .../functions/time-series-functions.md        | 45 +++++++++++++++++++
 .../02813_series_period_detect.reference      |  3 ++
 .../02813_series_period_detect.sql            |  7 ++-
 5 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 docs/en/sql-reference/functions/time-series-functions.md

diff --git a/.gitmodules b/.gitmodules
index 8cf62822c85..0646160999c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -357,4 +357,3 @@
 [submodule "contrib/pocketfft"]
 	path = contrib/pocketfft
 	url = https://gitlab.mpcdf.mpg.de/mtr/pocketfft.git
-	branch = cpp
diff --git a/contrib/pocketfft-cmake/CMakeLists.txt b/contrib/pocketfft-cmake/CMakeLists.txt
index 4d22a9d6cd0..01911ee4496 100644
--- a/contrib/pocketfft-cmake/CMakeLists.txt
+++ b/contrib/pocketfft-cmake/CMakeLists.txt
@@ -1,3 +1,10 @@
+option (ENABLE_POCKETFFT "Enable pocketfft" ${ENABLE_LIBRARIES})
+
+if (NOT ENABLE_POCKETFFT)
+    message(STATUS "Not using pocketfft")
+    return()
+endif()
+
 add_library(_pocketfft INTERFACE)
 target_include_directories(_pocketfft INTERFACE ${ClickHouse_SOURCE_DIR}/contrib/pocketfft)
 add_library(ch_contrib::pocketfft ALIAS _pocketfft)
diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
new file mode 100644
index 00000000000..d79d3ad1cf1
--- /dev/null
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -0,0 +1,45 @@
+---
+slug: /en/sql-reference/functions/time-series-functions
+sidebar_position: 140
+sidebar_label: Time Series
+---
+
+# Time Series Functions
+
+Below functions are used for time series anlaysis.
+
+## seriesPeriodDetect
+
+Finds the period of the given time series data
+
+**Syntax**
+
+``` sql
+seriesPeriodDetect(series);
+```
+
+**Arguments**
+
+- `series` - An array of numeric values
+
+**Returned value**
+
+- A real value equal to the period of time series
+
+Type: [Float64](../../sql-reference/data-types/float.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT seriesPeriodDetect([1,4,6,1,4,6,1,4,6,1,4,6]) AS print_0;
+```
+
+Result:
+
+``` text
+┌───────────print_0──────┐
+│                      3 │
+└────────────────────────┘
+```
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02813_series_period_detect.reference b/tests/queries/0_stateless/02813_series_period_detect.reference
index 8351c19397f..43df809c757 100644
--- a/tests/queries/0_stateless/02813_series_period_detect.reference
+++ b/tests/queries/0_stateless/02813_series_period_detect.reference
@@ -1 +1,4 @@
 14
+3
+3
+3
diff --git a/tests/queries/0_stateless/02813_series_period_detect.sql b/tests/queries/0_stateless/02813_series_period_detect.sql
index 2dd8a4f39d8..21361a3e355 100644
--- a/tests/queries/0_stateless/02813_series_period_detect.sql
+++ b/tests/queries/0_stateless/02813_series_period_detect.sql
@@ -1,3 +1,6 @@
 SELECT seriesPeriodDetect([139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97, 156, 94, 149, 95, 140, 77, 61, 50, 54, 47, 133, 72, 152, 94, 148, 105, 162, 101, 160, 87, 63, 53, 55, 54, 151, 103, 189, 108, 183, 113, 175, 113, 178, 90, 71, 62, 62, 65, 165, 109, 181, 115, 182, 121, 178, 114, 170]);
-SELECT seriesPeriodDetect([NULL, NULL, NULL]); -- { serverError 44 }
-SELECT seriesPeriodDetect([10,20,30,10,202,30,NULL]); -- { serverError 44 }
\ No newline at end of file
+SELECT seriesPeriodDetect([10,20,30,10,20,30,10,20,30, 10,20,30,10,20,30,10,20,30,10,20,30]);
+SELECT seriesPeriodDetect([10.1, 20.45, 40.34, 10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34, 10.1, 20.45, 40.34]);
+SELECT seriesPeriodDetect([10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400]);
+SELECT seriesPeriodDetect([NULL, NULL, NULL]); -- { serverError ILLEGAL_COLUMN}
+SELECT seriesPeriodDetect([10,20,30,10,202,30,NULL]); -- { serverError ILLEGAL_COLUMN }
\ No newline at end of file

From 62d69946eb15d0b194739532c1bd72e63f3921a4 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 2 Nov 2023 13:37:07 -0700
Subject: [PATCH 0864/1097] Added testcases, documentation and addressed
 comments

---
 src/Functions/seriesPeriodDetect.cpp          | 101 ++++++++++--------
 .../02813_series_period_detect.sql            |   3 +
 2 files changed, 59 insertions(+), 45 deletions(-)

diff --git a/src/Functions/seriesPeriodDetect.cpp b/src/Functions/seriesPeriodDetect.cpp
index 4cb7efb9257..34c06d692ea 100644
--- a/src/Functions/seriesPeriodDetect.cpp
+++ b/src/Functions/seriesPeriodDetect.cpp
@@ -16,10 +16,18 @@ namespace DB
 {
 namespace ErrorCodes
 {
-extern const int LOGICAL_ERROR;
+extern const int BAD_ARGUMENTS;
 extern const int ILLEGAL_COLUMN;
 }
 
+/*Detect Period in time series data using FFT.
+ * FFT - Fast Fourier transform (https://en.wikipedia.org/wiki/Fast_Fourier_transform)
+ * 1. Convert time series data to frequency domain using FFT.
+ * 2. Remove the 0th(the Dc component) and n/2th the Nyquist frequency
+ * 3. Find the peak value (highest) for dominant frequency component.
+ * 4. Inverse of the dominant frequency component is the period.
+*/
+
 class FunctionSeriesPeriodDetect : public IFunction
 {
 public:
@@ -43,6 +51,7 @@ public:
     {
         ColumnPtr array_ptr = arguments[0].column;
         const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
+
         const IColumn & src_data = array->getData();
 
         auto res = ColumnFloat64::create(1);
@@ -69,51 +78,53 @@ public:
     template <typename T>
     bool executeNumber(const IColumn & src_data, Float64 & period) const
     {
-        if (const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data))
-        {
-            const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
-
-            size_t len = src_vec.size();
-            std::vector<Float64> src(src_vec.begin(), src_vec.end());
-            std::vector<std::complex<double>> out((len / 2) + 1);
-
-            pocketfft::shape_t shape{static_cast<size_t>(len)};
-
-            pocketfft::shape_t axes;
-            for (size_t i = 0; i < shape.size(); ++i)
-                axes.push_back(i);
-
-            pocketfft::stride_t stride_src{sizeof(double)};
-            pocketfft::stride_t stride_out{sizeof(std::complex<double>)};
-
-            pocketfft::r2c(shape, stride_src, stride_out, axes, pocketfft::FORWARD, src.data(), out.data(), static_cast<double>(1));
-
-            size_t specLen = (len / 2 - (len % 2 == 0 ? 1 : 0)); //removing the nyquist element when len is even
-
-            double maxMag = 0;
-            size_t idx = 0;
-            for (size_t i = 1; i < specLen; ++i)
-            {
-                double magnitude = sqrt(out[i].real() * out[i].real() + out[i].imag() * out[i].imag());
-                if (magnitude > maxMag)
-                {
-                    maxMag = magnitude;
-                    idx = i;
-                }
-            }
-
-            std::vector<double> xfreq(specLen);
-            double step = 0.5 / (specLen - 1);
-            for (size_t i = 0; i < specLen; ++i)
-                xfreq[i] = i * step;
-
-            auto freq = xfreq[idx];
-
-            period = std::round(1 / freq);
-            return true;
-        }
-        else
+        const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
+        if (!src_data_concrete)
             return false;
+
+        const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
+
+        size_t len = src_vec.size();
+        if (len < 4)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Atleast four data points are needed for function {}", getName());
+
+        std::vector<Float64> src(src_vec.begin(), src_vec.end());
+        std::vector<std::complex<double>> out((len / 2) + 1);
+
+        pocketfft::shape_t shape{static_cast<size_t>(len)};
+
+        pocketfft::shape_t axes;
+        for (size_t i = 0; i < shape.size(); ++i)
+            axes.push_back(i);
+
+        pocketfft::stride_t stride_src{sizeof(double)};
+        pocketfft::stride_t stride_out{sizeof(std::complex<double>)};
+
+        pocketfft::r2c(shape, stride_src, stride_out, axes, pocketfft::FORWARD, src.data(), out.data(), static_cast<double>(1));
+
+        size_t specLen = (len - 1) / 2; //removing the nyquist element when len is even
+
+        double maxMag = 0;
+        size_t idx = 1;
+        for (size_t i = 1; i < specLen; ++i)
+        {
+            double magnitude = sqrt(out[i].real() * out[i].real() + out[i].imag() * out[i].imag());
+            if (magnitude > maxMag)
+            {
+                maxMag = magnitude;
+                idx = i;
+            }
+        }
+
+        std::vector<double> xfreq(specLen);
+        double step = 0.5 / (specLen - 1);
+        for (size_t i = 0; i < specLen; ++i)
+            xfreq[i] = i * step;
+
+        auto freq = xfreq[idx];
+
+        period = std::round(1 / freq);
+        return true;
     }
 };
 
diff --git a/tests/queries/0_stateless/02813_series_period_detect.sql b/tests/queries/0_stateless/02813_series_period_detect.sql
index 21361a3e355..19241efe430 100644
--- a/tests/queries/0_stateless/02813_series_period_detect.sql
+++ b/tests/queries/0_stateless/02813_series_period_detect.sql
@@ -2,5 +2,8 @@ SELECT seriesPeriodDetect([139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97, 1
 SELECT seriesPeriodDetect([10,20,30,10,20,30,10,20,30, 10,20,30,10,20,30,10,20,30,10,20,30]);
 SELECT seriesPeriodDetect([10.1, 20.45, 40.34, 10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34, 10.1, 20.45, 40.34]);
 SELECT seriesPeriodDetect([10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400]);
+SELECT seriesPeriodDetect([1,2,3]);  -- { serverError BAD_ARGUMENTS}
+SELECT seriesPeriodDetect(); --{ serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
+SELECT seriesPeriodDetect([]); -- { serverError ILLEGAL_COLUMN}
 SELECT seriesPeriodDetect([NULL, NULL, NULL]); -- { serverError ILLEGAL_COLUMN}
 SELECT seriesPeriodDetect([10,20,30,10,202,30,NULL]); -- { serverError ILLEGAL_COLUMN }
\ No newline at end of file

From ad99cb902fc6429eb29e684c17ea22912f91e5fa Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Tue, 7 Nov 2023 11:56:34 -0800
Subject: [PATCH 0865/1097] changed method name, updated pocketfft repo
 reference

---
 .gitmodules                                   |  2 +-
 contrib/pocketfft                             |  2 +-
 .../functions/time-series-functions.md        | 14 ++++++-----
 src/CMakeLists.txt                            |  4 +++-
 src/Functions/CMakeLists.txt                  |  1 -
 ...odDetect.cpp => seriesPeriodDetectFFT.cpp} | 23 +++++++++++--------
 .../02813_series_period_detect.sql            | 18 +++++++--------
 7 files changed, 36 insertions(+), 28 deletions(-)
 rename src/Functions/{seriesPeriodDetect.cpp => seriesPeriodDetectFFT.cpp} (89%)

diff --git a/.gitmodules b/.gitmodules
index 0646160999c..af90c788012 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -356,4 +356,4 @@
 	url = https://github.com/aklomp/base64.git
 [submodule "contrib/pocketfft"]
 	path = contrib/pocketfft
-	url = https://gitlab.mpcdf.mpg.de/mtr/pocketfft.git
+	url = https://github.com/mreineck/pocketfft.git
diff --git a/contrib/pocketfft b/contrib/pocketfft
index 128707fc745..9efd4da52cf 160000
--- a/contrib/pocketfft
+++ b/contrib/pocketfft
@@ -1 +1 @@
-Subproject commit 128707fc745348d9dae5f1e37cd289aa31571dce
+Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546
diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index d79d3ad1cf1..11d5766f96c 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -1,21 +1,23 @@
 ---
 slug: /en/sql-reference/functions/time-series-functions
-sidebar_position: 140
+sidebar_position: 172
 sidebar_label: Time Series
 ---
 
 # Time Series Functions
 
-Below functions are used for time series anlaysis.
+Below functions are used for time series analysis.
 
-## seriesPeriodDetect
+## seriesPeriodDetectFFT
 
-Finds the period of the given time series data
+Finds the period of the given time series data using FFT
+Detect Period in time series data using FFT.
+FFT - Fast Fourier transform (https://en.wikipedia.org/wiki/Fast_Fourier_transform)
 
 **Syntax**
 
 ``` sql
-seriesPeriodDetect(series);
+seriesPeriodDetectFFT(series);
 ```
 
 **Arguments**
@@ -33,7 +35,7 @@ Type: [Float64](../../sql-reference/data-types/float.md).
 Query:
 
 ``` sql
-SELECT seriesPeriodDetect([1,4,6,1,4,6,1,4,6,1,4,6]) AS print_0;
+SELECT seriesPeriodDetectFFT([1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6]) AS print_0;
 ```
 
 Result:
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 60d20c65ab8..1abfe2f2a95 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -435,7 +435,9 @@ dbms_target_link_libraries(PRIVATE ch_contrib::zstd)
 target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::zstd)
 target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::xz)
 
-target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::pocketfft)
+if (TARGET ch_contrib::pocketfft)
+    target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::pocketfft)
+endif ()
 
 if (TARGET ch_contrib::icu)
     dbms_target_link_libraries (PRIVATE ch_contrib::icu)
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index c1efea43636..57904a8ca1c 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -43,7 +43,6 @@ list (APPEND PRIVATE_LIBS
         boost::filesystem
         divide_impl
         ch_contrib::xxHash
-        ch_contrib::pocketfft
 )
 
 if (TARGET ch_rust::blake3)
diff --git a/src/Functions/seriesPeriodDetect.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
similarity index 89%
rename from src/Functions/seriesPeriodDetect.cpp
rename to src/Functions/seriesPeriodDetectFFT.cpp
index 34c06d692ea..be6947cb9ad 100644
--- a/src/Functions/seriesPeriodDetect.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -1,9 +1,15 @@
+#ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wshadow"
 #pragma clang diagnostic ignored "-Wextra-semi-stmt"
 #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+
 #include "pocketfft_hdronly.h"
+
+#ifdef __clang__
 #pragma clang diagnostic pop
+#endif
 
 #include <cmath>
 #include <Columns/ColumnArray.h>
@@ -28,19 +34,17 @@ extern const int ILLEGAL_COLUMN;
  * 4. Inverse of the dominant frequency component is the period.
 */
 
-class FunctionSeriesPeriodDetect : public IFunction
+class FunctionSeriesPeriodDetectFFT : public IFunction
 {
 public:
-    static constexpr auto name = "seriesPeriodDetect";
+    static constexpr auto name = "seriesPeriodDetectFFT";
 
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesPeriodDetect>(); }
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesPeriodDetectFFT>(); }
 
     std::string getName() const override { return name; }
 
     size_t getNumberOfArguments() const override { return 1; }
 
-    bool isVariadic() const override { return false; }
-
     bool useDefaultImplementationForConstants() const override { return true; }
 
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
@@ -86,14 +90,15 @@ public:
 
         size_t len = src_vec.size();
         if (len < 4)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Atleast four data points are needed for function {}", getName());
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());
 
         std::vector<Float64> src(src_vec.begin(), src_vec.end());
         std::vector<std::complex<double>> out((len / 2) + 1);
 
-        pocketfft::shape_t shape{static_cast<size_t>(len)};
+        pocketfft::shape_t shape{len};
 
         pocketfft::shape_t axes;
+        axes.reserve(shape.size());
         for (size_t i = 0; i < shape.size(); ++i)
             axes.push_back(i);
 
@@ -128,8 +133,8 @@ public:
     }
 };
 
-REGISTER_FUNCTION(SeriesPeriodDetect)
+REGISTER_FUNCTION(SeriesPeriodDetectFFT)
 {
-    factory.registerFunction<FunctionSeriesPeriodDetect>();
+    factory.registerFunction<FunctionSeriesPeriodDetectFFT>();
 }
 }
diff --git a/tests/queries/0_stateless/02813_series_period_detect.sql b/tests/queries/0_stateless/02813_series_period_detect.sql
index 19241efe430..2cf28c778e3 100644
--- a/tests/queries/0_stateless/02813_series_period_detect.sql
+++ b/tests/queries/0_stateless/02813_series_period_detect.sql
@@ -1,9 +1,9 @@
-SELECT seriesPeriodDetect([139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97, 156, 94, 149, 95, 140, 77, 61, 50, 54, 47, 133, 72, 152, 94, 148, 105, 162, 101, 160, 87, 63, 53, 55, 54, 151, 103, 189, 108, 183, 113, 175, 113, 178, 90, 71, 62, 62, 65, 165, 109, 181, 115, 182, 121, 178, 114, 170]);
-SELECT seriesPeriodDetect([10,20,30,10,20,30,10,20,30, 10,20,30,10,20,30,10,20,30,10,20,30]);
-SELECT seriesPeriodDetect([10.1, 20.45, 40.34, 10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34, 10.1, 20.45, 40.34]);
-SELECT seriesPeriodDetect([10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400]);
-SELECT seriesPeriodDetect([1,2,3]);  -- { serverError BAD_ARGUMENTS}
-SELECT seriesPeriodDetect(); --{ serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
-SELECT seriesPeriodDetect([]); -- { serverError ILLEGAL_COLUMN}
-SELECT seriesPeriodDetect([NULL, NULL, NULL]); -- { serverError ILLEGAL_COLUMN}
-SELECT seriesPeriodDetect([10,20,30,10,202,30,NULL]); -- { serverError ILLEGAL_COLUMN }
\ No newline at end of file
+SELECT seriesPeriodDetectFFT([139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97, 156, 94, 149, 95, 140, 77, 61, 50, 54, 47, 133, 72, 152, 94, 148, 105, 162, 101, 160, 87, 63, 53, 55, 54, 151, 103, 189, 108, 183, 113, 175, 113, 178, 90, 71, 62, 62, 65, 165, 109, 181, 115, 182, 121, 178, 114, 170]);
+SELECT seriesPeriodDetectFFT([10,20,30,10,20,30,10,20,30, 10,20,30,10,20,30,10,20,30,10,20,30]);
+SELECT seriesPeriodDetectFFT([10.1, 20.45, 40.34, 10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34, 10.1, 20.45, 40.34]);
+SELECT seriesPeriodDetectFFT([10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400]);
+SELECT seriesPeriodDetectFFT([1,2,3]);  -- { serverError BAD_ARGUMENTS}
+SELECT seriesPeriodDetectFFT(); --{ serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
+SELECT seriesPeriodDetectFFT([]); -- { serverError ILLEGAL_COLUMN}
+SELECT seriesPeriodDetectFFT([NULL, NULL, NULL]); -- { serverError ILLEGAL_COLUMN}
+SELECT seriesPeriodDetectFFT([10,20,30,10,202,30,NULL]); -- { serverError ILLEGAL_COLUMN }
\ No newline at end of file

From 56a7fe6c8600fbc4b977a6384209beb8be10c791 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 9 Nov 2023 10:28:14 -0800
Subject: [PATCH 0866/1097] fix header file issue and added edge case handling

---
 src/Functions/seriesPeriodDetectFFT.cpp       | 26 ++++++++++++-------
 .../02813_series_period_detect.reference      |  1 +
 .../02813_series_period_detect.sql            |  1 +
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
index be6947cb9ad..30d8d1529a8 100644
--- a/src/Functions/seriesPeriodDetectFFT.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -5,7 +5,7 @@
 #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
 #endif
 
-#include "pocketfft_hdronly.h"
+#include <pocketfft_hdronly.h>
 
 #ifdef __clang__
 #pragma clang diagnostic pop
@@ -107,23 +107,31 @@ public:
 
         pocketfft::r2c(shape, stride_src, stride_out, axes, pocketfft::FORWARD, src.data(), out.data(), static_cast<double>(1));
 
-        size_t specLen = (len - 1) / 2; //removing the nyquist element when len is even
+        size_t spec_len = (len - 1) / 2; //removing the nyquist element when len is even
 
-        double maxMag = 0;
+        double max_mag = 0;
         size_t idx = 1;
-        for (size_t i = 1; i < specLen; ++i)
+        for (size_t i = 1; i < spec_len; ++i)
         {
             double magnitude = sqrt(out[i].real() * out[i].real() + out[i].imag() * out[i].imag());
-            if (magnitude > maxMag)
+            if (magnitude > max_mag)
             {
-                maxMag = magnitude;
+                max_mag = magnitude;
                 idx = i;
             }
         }
 
-        std::vector<double> xfreq(specLen);
-        double step = 0.5 / (specLen - 1);
-        for (size_t i = 0; i < specLen; ++i)
+        // In case all FFT values are zero, it means the input signal is flat.
+        // It implies the period of the series should be 0.
+        if(max_mag == 0)
+        {
+            period = 0;
+            return true;
+        }
+
+        std::vector<double> xfreq(spec_len);
+        double step = 0.5 / (spec_len - 1);
+        for (size_t i = 0; i < spec_len; ++i)
             xfreq[i] = i * step;
 
         auto freq = xfreq[idx];
diff --git a/tests/queries/0_stateless/02813_series_period_detect.reference b/tests/queries/0_stateless/02813_series_period_detect.reference
index 43df809c757..f72e8498f31 100644
--- a/tests/queries/0_stateless/02813_series_period_detect.reference
+++ b/tests/queries/0_stateless/02813_series_period_detect.reference
@@ -2,3 +2,4 @@
 3
 3
 3
+0
diff --git a/tests/queries/0_stateless/02813_series_period_detect.sql b/tests/queries/0_stateless/02813_series_period_detect.sql
index 2cf28c778e3..b5e7c489fde 100644
--- a/tests/queries/0_stateless/02813_series_period_detect.sql
+++ b/tests/queries/0_stateless/02813_series_period_detect.sql
@@ -2,6 +2,7 @@ SELECT seriesPeriodDetectFFT([139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97
 SELECT seriesPeriodDetectFFT([10,20,30,10,20,30,10,20,30, 10,20,30,10,20,30,10,20,30,10,20,30]);
 SELECT seriesPeriodDetectFFT([10.1, 20.45, 40.34, 10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34, 10.1, 20.45, 40.34]);
 SELECT seriesPeriodDetectFFT([10.1, 10, 400, 10.1, 10, 400, 10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400,10.1, 10, 400]);
+SELECT seriesPeriodDetectFFT([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]);
 SELECT seriesPeriodDetectFFT([1,2,3]);  -- { serverError BAD_ARGUMENTS}
 SELECT seriesPeriodDetectFFT(); --{ serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
 SELECT seriesPeriodDetectFFT([]); -- { serverError ILLEGAL_COLUMN}

From 2c1cf7ed4395846b9f17e58409dd706e89b4485d Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 9 Nov 2023 10:28:14 -0800
Subject: [PATCH 0867/1097] fix header file issue and added edge case handling

---
 src/Functions/seriesPeriodDetectFFT.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
index 30d8d1529a8..a34da5c7369 100644
--- a/src/Functions/seriesPeriodDetectFFT.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -1,14 +1,14 @@
 #ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wshadow"
-#pragma clang diagnostic ignored "-Wextra-semi-stmt"
-#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wshadow"
+#    pragma clang diagnostic ignored "-Wextra-semi-stmt"
+#    pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
 #endif
 
 #include <pocketfft_hdronly.h>
 
 #ifdef __clang__
-#pragma clang diagnostic pop
+#    pragma clang diagnostic pop
 #endif
 
 #include <cmath>
@@ -123,7 +123,11 @@ public:
 
         // In case all FFT values are zero, it means the input signal is flat.
         // It implies the period of the series should be 0.
+<<<<<<< HEAD
         if(max_mag == 0)
+=======
+        if (max_mag == 0)
+>>>>>>> a8045235719 (fix header file issue and added edge case handling)
         {
             period = 0;
             return true;

From 90df67a6b143d5c120c95dd83b872fc960dd916a Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 9 Nov 2023 10:28:14 -0800
Subject: [PATCH 0868/1097] fix header file issue and added edge case handling

---
 src/Functions/seriesPeriodDetectFFT.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
index a34da5c7369..d0be666bc32 100644
--- a/src/Functions/seriesPeriodDetectFFT.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -123,11 +123,7 @@ public:
 
         // In case all FFT values are zero, it means the input signal is flat.
         // It implies the period of the series should be 0.
-<<<<<<< HEAD
-        if(max_mag == 0)
-=======
         if (max_mag == 0)
->>>>>>> a8045235719 (fix header file issue and added edge case handling)
         {
             period = 0;
             return true;

From 0843d5e8902b583df6b737d98447242b8fa3560a Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 9 Nov 2023 13:16:38 -0800
Subject: [PATCH 0869/1097] added macro to enable/disable pocketfft lib

---
 src/Common/config.h.in                  |  1 +
 src/Functions/CMakeLists.txt            |  4 +++
 src/Functions/seriesPeriodDetectFFT.cpp | 36 ++++++++++++++-----------
 src/configure_config.cmake              |  3 +++
 4 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/Common/config.h.in b/src/Common/config.h.in
index dbd8572f7b9..ea77e2fed2d 100644
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@@ -61,6 +61,7 @@
 #cmakedefine01 FIU_ENABLE
 #cmakedefine01 USE_BCRYPT
 #cmakedefine01 USE_LIBARCHIVE
+#cmakedefine01 USE_POCKETFFT
 
 /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO.
 /// That's why we use absolute paths.
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 57904a8ca1c..4b0d9335198 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -95,6 +95,10 @@ if (TARGET ch_contrib::rapidjson)
     list (APPEND PRIVATE_LIBS ch_contrib::rapidjson)
 endif()
 
+if (TARGET ch_contrib::pocketfft)
+    list (APPEND PRIVATE_LIBS ch_contrib::pocketfft)
+endif()
+
 if (TARGET ch_contrib::crc32-vpmsum)
     list (APPEND PUBLIC_LIBS ch_contrib::crc32-vpmsum)
 endif()
diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
index d0be666bc32..a0db8f20973 100644
--- a/src/Functions/seriesPeriodDetectFFT.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -1,22 +1,25 @@
-#ifdef __clang__
-#    pragma clang diagnostic push
-#    pragma clang diagnostic ignored "-Wshadow"
-#    pragma clang diagnostic ignored "-Wextra-semi-stmt"
-#    pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
-#endif
+#include "config.h"
 
-#include <pocketfft_hdronly.h>
+#if USE_POCKETFFT
+#    ifdef __clang__
+#        pragma clang diagnostic push
+#        pragma clang diagnostic ignored "-Wshadow"
+#        pragma clang diagnostic ignored "-Wextra-semi-stmt"
+#        pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#    endif
 
-#ifdef __clang__
-#    pragma clang diagnostic pop
-#endif
+#    include <pocketfft_hdronly.h>
 
-#include <cmath>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnsNumber.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/IFunction.h>
+#    ifdef __clang__
+#        pragma clang diagnostic pop
+#    endif
+
+#    include <cmath>
+#    include <Columns/ColumnArray.h>
+#    include <Columns/ColumnsNumber.h>
+#    include <DataTypes/DataTypesNumber.h>
+#    include <Functions/FunctionFactory.h>
+#    include <Functions/IFunction.h>
 
 namespace DB
 {
@@ -146,3 +149,4 @@ REGISTER_FUNCTION(SeriesPeriodDetectFFT)
     factory.registerFunction<FunctionSeriesPeriodDetectFFT>();
 }
 }
+#endif
diff --git a/src/configure_config.cmake b/src/configure_config.cmake
index 5b4615f0793..2980fde4851 100644
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@@ -166,5 +166,8 @@ endif()
 if (TARGET ch_contrib::libarchive)
     set(USE_LIBARCHIVE 1)
 endif()
+if (TARGET ch_contrib::pocketfft)
+    set(USE_POCKETFFT 1)
+endif()
 
 set(SOURCE_DIR ${PROJECT_SOURCE_DIR})

From baaa15b5b05b67adf790f7a3ce2b910525929c11 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 9 Nov 2023 13:16:38 -0800
Subject: [PATCH 0870/1097] added macro to enable/disable pocketfft lib

---
 docs/en/sql-reference/functions/time-series-functions.md | 2 +-
 tests/queries/0_stateless/02813_series_period_detect.sql | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index 11d5766f96c..38a3a88f627 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -4,7 +4,7 @@ sidebar_position: 172
 sidebar_label: Time Series
 ---
 
-# Time Series Functions
+#Time Series Functions
 
 Below functions are used for time series analysis.
 
diff --git a/tests/queries/0_stateless/02813_series_period_detect.sql b/tests/queries/0_stateless/02813_series_period_detect.sql
index b5e7c489fde..e860fd75923 100644
--- a/tests/queries/0_stateless/02813_series_period_detect.sql
+++ b/tests/queries/0_stateless/02813_series_period_detect.sql
@@ -1,3 +1,5 @@
+-- Tags: no-fasttest
+
 SELECT seriesPeriodDetectFFT([139, 87, 110, 68, 54, 50, 51, 53, 133, 86, 141, 97, 156, 94, 149, 95, 140, 77, 61, 50, 54, 47, 133, 72, 152, 94, 148, 105, 162, 101, 160, 87, 63, 53, 55, 54, 151, 103, 189, 108, 183, 113, 175, 113, 178, 90, 71, 62, 62, 65, 165, 109, 181, 115, 182, 121, 178, 114, 170]);
 SELECT seriesPeriodDetectFFT([10,20,30,10,20,30,10,20,30, 10,20,30,10,20,30,10,20,30,10,20,30]);
 SELECT seriesPeriodDetectFFT([10.1, 20.45, 40.34, 10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34,10.1, 20.45, 40.34, 10.1, 20.45, 40.34]);

From a324bd27806473828b0dfa09e1325bdf10fd10f0 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Thu, 9 Nov 2023 13:16:38 -0800
Subject: [PATCH 0871/1097] added macro to enable/disable pocketfft lib

---
 docs/en/sql-reference/functions/time-series-functions.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index 38a3a88f627..e183fdcdcd7 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -4,7 +4,7 @@ sidebar_position: 172
 sidebar_label: Time Series
 ---
 
-#Time Series Functions
+# Time Series Functions
 
 Below functions are used for time series analysis.
 
@@ -44,4 +44,4 @@ Result:
 ┌───────────print_0──────┐
 │                      3 │
 └────────────────────────┘
-```
\ No newline at end of file
+```

From 3731d57dd542cab8574779ea7cf2e4ece0fb8f8b Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Fri, 10 Nov 2023 07:40:21 -0800
Subject: [PATCH 0872/1097] added method name in aspell-dict

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 5d11185ff76..077e323ad4e 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -2218,6 +2218,7 @@ seektable
 sequenceCount
 sequenceMatch
 sequenceNextNode
+seriesPeriodDetectFFT
 serverTimeZone
 serverTimezone
 serverUUID

From 464cdc4a91966c2801a23cb3326bcfa9449f3aab Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Fri, 10 Nov 2023 11:44:29 -0800
Subject: [PATCH 0873/1097] fixed getReturnTypeImpl to accept only array
 argument

---
 src/Functions/seriesPeriodDetectFFT.cpp | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
index a0db8f20973..4b90d2e0734 100644
--- a/src/Functions/seriesPeriodDetectFFT.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -17,10 +17,13 @@
 #    include <cmath>
 #    include <Columns/ColumnArray.h>
 #    include <Columns/ColumnsNumber.h>
+#    include <DataTypes/DataTypeArray.h>
 #    include <DataTypes/DataTypesNumber.h>
 #    include <Functions/FunctionFactory.h>
+#    include <Functions/FunctionHelpers.h>
 #    include <Functions/IFunction.h>
 
+
 namespace DB
 {
 namespace ErrorCodes
@@ -52,7 +55,18 @@ public:
 
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared<DataTypeFloat64>(); }
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
+        if (!array_type)
+            throw DB::Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Argument for function {} must be array but it  has type {}.",
+                getName(),
+                arguments[0]->getName());
+
+        return std::make_shared<DataTypeFloat64>();
+    }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
     {

From 64292f36f5a3cdaa617116b470a16895b159d423 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Fri, 10 Nov 2023 11:44:29 -0800
Subject: [PATCH 0874/1097] fixed getReturnTypeImpl to accept only array
 argument

---
 src/Functions/seriesPeriodDetectFFT.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
index 4b90d2e0734..42801dc6353 100644
--- a/src/Functions/seriesPeriodDetectFFT.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -30,6 +30,7 @@ namespace ErrorCodes
 {
 extern const int BAD_ARGUMENTS;
 extern const int ILLEGAL_COLUMN;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 /*Detect Period in time series data using FFT.

From f0d9037e74e0de04c04a414fa42bcea4658dd3e1 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Mon, 13 Nov 2023 09:51:53 -0800
Subject: [PATCH 0875/1097] Added embeded function documentation

---
 src/Functions/seriesPeriodDetectFFT.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
index 42801dc6353..164aa3dbca1 100644
--- a/src/Functions/seriesPeriodDetectFFT.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -161,7 +161,10 @@ public:
 
 REGISTER_FUNCTION(SeriesPeriodDetectFFT)
 {
-    factory.registerFunction<FunctionSeriesPeriodDetectFFT>();
+    factory.registerFunction<FunctionSeriesPeriodDetectFFT>(FunctionDocumentation{
+        .description = R"(
+Detects period in time series data using FFT.)",
+        .categories{"Time series analysis"}});
 }
 }
 #endif

From 4767859ba5cd042f905b3415db5212f7e76cba68 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Wed, 15 Nov 2023 12:06:04 -0800
Subject: [PATCH 0876/1097] added validateFunctionArgumentTypes

---
 src/Functions/seriesPeriodDetectFFT.cpp | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
index 164aa3dbca1..dc5e56d35e0 100644
--- a/src/Functions/seriesPeriodDetectFFT.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -56,15 +56,10 @@ public:
 
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
-        if (!array_type)
-            throw DB::Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Argument for function {} must be array but it  has type {}.",
-                getName(),
-                arguments[0]->getName());
+        FunctionArgumentDescriptors args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
+        validateFunctionArgumentTypes(*this, arguments, args);
 
         return std::make_shared<DataTypeFloat64>();
     }

From eb35e61aea3549db9635118a77abf0534752b455 Mon Sep 17 00:00:00 2001
From: Bhavna Jindal <bhavna.jindal@ibm.com>
Date: Wed, 15 Nov 2023 12:06:04 -0800
Subject: [PATCH 0877/1097] added validateFunctionArgumentTypes

---
 src/Functions/seriesPeriodDetectFFT.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp
index dc5e56d35e0..1b81cbba3c3 100644
--- a/src/Functions/seriesPeriodDetectFFT.cpp
+++ b/src/Functions/seriesPeriodDetectFFT.cpp
@@ -30,7 +30,6 @@ namespace ErrorCodes
 {
 extern const int BAD_ARGUMENTS;
 extern const int ILLEGAL_COLUMN;
-extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 /*Detect Period in time series data using FFT.

From 304b6362080359ac347f7a034e7a2227e73ed519 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 21 Nov 2023 16:28:27 +0100
Subject: [PATCH 0878/1097] Fix gRPC build fix on Mac

Fixes #57050
---
 contrib/grpc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/grpc b/contrib/grpc
index 740e3dfd973..77b2737a709 160000
--- a/contrib/grpc
+++ b/contrib/grpc
@@ -1 +1 @@
-Subproject commit 740e3dfd97301a52ad8165b65285bcc149d9e817
+Subproject commit 77b2737a709d43d8c6895e3f03ca62b00bd9201c

From b1ec8a36707412c66e829b08103d87cd699af65c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 21 Nov 2023 15:35:09 +0000
Subject: [PATCH 0879/1097] Better comment for
 ITransformingStep::transformPipeline

---
 src/Processors/QueryPlan/ITransformingStep.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h
index 32bf3b6af90..a9fdbbe5695 100644
--- a/src/Processors/QueryPlan/ITransformingStep.h
+++ b/src/Processors/QueryPlan/ITransformingStep.h
@@ -50,6 +50,8 @@ public:
 
     QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) override;
 
+    /// Append processors from the current step to the query pipeline.
+    /// Step always has a single input stream, so we implement updatePipeline over this function.
     virtual void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) = 0;
 
     const TransformTraits & getTransformTraits() const { return transform_traits; }

From c85d7394869ec1502434fabd63bbec4a987ae5a2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 21 Nov 2023 15:57:53 +0000
Subject: [PATCH 0880/1097] Fix Duplicate set for StorageSet with analyzer.

---
 src/Planner/CollectSets.cpp                           | 3 +++
 tests/queries/0_stateless/00116_storage_set.reference | 2 ++
 tests/queries/0_stateless/00116_storage_set.sql       | 6 ++++++
 3 files changed, 11 insertions(+)

diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp
index 8dd7c6637bf..beddfd38e36 100644
--- a/src/Planner/CollectSets.cpp
+++ b/src/Planner/CollectSets.cpp
@@ -55,6 +55,9 @@ public:
         {
             /// Handle storage_set as ready set.
             auto set_key = in_second_argument->getTreeHash();
+            if (sets.findStorage(set_key))
+                return;
+
             sets.addFromStorage(set_key, storage_set->getSet());
         }
         else if (const auto * constant_node = in_second_argument->as<ConstantNode>())
diff --git a/tests/queries/0_stateless/00116_storage_set.reference b/tests/queries/0_stateless/00116_storage_set.reference
index 01bd24ebe17..b68e740f72b 100644
--- a/tests/queries/0_stateless/00116_storage_set.reference
+++ b/tests/queries/0_stateless/00116_storage_set.reference
@@ -19,3 +19,5 @@ abc
 Hello
 World
 abc
+Hello
+Hello
diff --git a/tests/queries/0_stateless/00116_storage_set.sql b/tests/queries/0_stateless/00116_storage_set.sql
index 0eeed7e859a..c156b387c8f 100644
--- a/tests/queries/0_stateless/00116_storage_set.sql
+++ b/tests/queries/0_stateless/00116_storage_set.sql
@@ -1,5 +1,6 @@
 DROP TABLE IF EXISTS set;
 DROP TABLE IF EXISTS set2;
+DROP TABLE IF EXISTS tab;
 
 CREATE TABLE set (x String) ENGINE = Set;
 
@@ -26,4 +27,9 @@ SELECT arrayJoin(['Hello', 'test', 'World', 'world', 'abc', 'xyz']) AS s WHERE s
 RENAME TABLE set2 TO set;
 SELECT arrayJoin(['Hello', 'test', 'World', 'world', 'abc', 'xyz']) AS s WHERE s IN set;
 
+create table tab (x String) engine = MergeTree order by x as select 'Hello';
+SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings allow_experimental_analyzer=0;
+SELECT * FROM tab PREWHERE x IN (set) WHERE x IN (set) LIMIT 1 settings allow_experimental_analyzer=1;
+DROP TABLE tab;
+
 DROP TABLE set;

From ce98dfb251bc2666fd8dbce324dbd1e6264fae52 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 21 Nov 2023 16:04:54 +0000
Subject: [PATCH 0881/1097] Settings pointer to reference

---
 programs/copier/ClusterCopier.cpp             |  4 +--
 src/Client/ConnectionEstablisher.cpp          |  6 ++--
 src/Client/ConnectionEstablisher.h            | 14 ++++----
 src/Client/ConnectionPool.h                   | 18 ++++++----
 src/Client/ConnectionPoolWithFailover.cpp     | 34 +++++++++++--------
 src/Client/ConnectionPoolWithFailover.h       |  5 +--
 src/Client/HedgedConnectionsFactory.cpp       |  2 +-
 src/Common/GetPriorityForLoadBalancing.h      |  4 +--
 src/QueryPipeline/RemoteQueryExecutor.cpp     | 10 ++++--
 .../DistributedAsyncInsertDirectoryQueue.cpp  |  2 +-
 src/Storages/Distributed/DistributedSink.cpp  |  2 +-
 11 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp
index b2b4970d04f..b748ad4b583 100644
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@@ -1407,7 +1407,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
         /// 3) Create helping table on the whole destination cluster
         auto & settings_push = task_cluster->settings_push;
 
-        auto connection = task_table.cluster_push->getAnyShardInfo().pool->get(timeouts, &settings_push, true);
+        auto connection = task_table.cluster_push->getAnyShardInfo().pool->get(timeouts, settings_push, true);
         String create_query = getRemoteCreateTable(task_shard.task_table.table_push, *connection, settings_push);
 
         ParserCreateQuery parser_create_query;
@@ -1785,7 +1785,7 @@ String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, C
 ASTPtr ClusterCopier::getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard)
 {
     /// Fetch and parse (possibly) new definition
-    auto connection_entry = task_shard.info.pool->get(timeouts, &task_cluster->settings_pull, true);
+    auto connection_entry = task_shard.info.pool->get(timeouts, task_cluster->settings_pull, true);
     String create_query_pull_str = getRemoteCreateTable(
             task_shard.task_table.table_pull,
             *connection_entry,
diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp
index 9805838a311..e5b1347add5 100644
--- a/src/Client/ConnectionEstablisher.cpp
+++ b/src/Client/ConnectionEstablisher.cpp
@@ -24,7 +24,7 @@ namespace ErrorCodes
 ConnectionEstablisher::ConnectionEstablisher(
     IConnectionPool * pool_,
     const ConnectionTimeouts * timeouts_,
-    const Settings * settings_,
+    const Settings & settings_,
     Poco::Logger * log_,
     const QualifiedTableName * table_to_check_)
     : pool(pool_), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_), is_finished(false)
@@ -72,7 +72,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
         ProfileEvents::increment(ProfileEvents::DistributedConnectionUsable);
         result.is_usable = true;
 
-        UInt64 max_allowed_delay = settings ? UInt64(settings->max_replica_delay_for_distributed_queries) : 0;
+        const UInt64 max_allowed_delay = settings.max_replica_delay_for_distributed_queries;
         if (!max_allowed_delay)
         {
             result.is_up_to_date = true;
@@ -113,7 +113,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
 ConnectionEstablisherAsync::ConnectionEstablisherAsync(
     IConnectionPool * pool_,
     const ConnectionTimeouts * timeouts_,
-    const Settings * settings_,
+    const Settings & settings_,
     Poco::Logger * log_,
     const QualifiedTableName * table_to_check_)
     : AsyncTaskExecutor(std::make_unique<Task>(*this)), connection_establisher(pool_, timeouts_, settings_, log_, table_to_check_)
diff --git a/src/Client/ConnectionEstablisher.h b/src/Client/ConnectionEstablisher.h
index a8126900d3b..880e44c8a1a 100644
--- a/src/Client/ConnectionEstablisher.h
+++ b/src/Client/ConnectionEstablisher.h
@@ -22,7 +22,7 @@ public:
 
     ConnectionEstablisher(IConnectionPool * pool_,
                           const ConnectionTimeouts * timeouts_,
-                          const Settings * settings_,
+                          const Settings & settings_,
                           Poco::Logger * log,
                           const QualifiedTableName * table_to_check = nullptr);
 
@@ -37,13 +37,12 @@ public:
 private:
     IConnectionPool * pool;
     const ConnectionTimeouts * timeouts;
-    const Settings * settings;
+    const Settings & settings;
     Poco::Logger * log;
     const QualifiedTableName * table_to_check;
 
     bool is_finished;
     AsyncCallback async_callback = {};
-
 };
 
 #if defined(OS_LINUX)
@@ -61,7 +60,7 @@ public:
 
     ConnectionEstablisherAsync(IConnectionPool * pool_,
                                const ConnectionTimeouts * timeouts_,
-                               const Settings * settings_,
+                               const Settings & settings_,
                                Poco::Logger * log_,
                                const QualifiedTableName * table_to_check_ = nullptr);
 
@@ -72,7 +71,7 @@ public:
     /// Check if the process of connection establishing was finished.
     /// The process is considered finished if connection is ready,
     /// some exception occurred or timeout exceeded.
-    bool isFinished() { return is_finished; }
+    bool isFinished() const { return is_finished; }
     TryResult getResult() const { return result; }
 
     const std::string & getFailMessage() const { return fail_message; }
@@ -87,7 +86,10 @@ private:
 
     struct Task : public AsyncTask
     {
-        Task(ConnectionEstablisherAsync & connection_establisher_async_) : connection_establisher_async(connection_establisher_async_) {}
+        explicit Task(ConnectionEstablisherAsync & connection_establisher_async_)
+            : connection_establisher_async(connection_establisher_async_)
+        {
+        }
 
         ConnectionEstablisherAsync & connection_establisher_async;
 
diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h
index 96191cf32c3..d663c052abc 100644
--- a/src/Client/ConnectionPool.h
+++ b/src/Client/ConnectionPool.h
@@ -30,9 +30,10 @@ public:
     virtual ~IConnectionPool() = default;
 
     /// Selects the connection to work.
+    virtual Entry get(const ConnectionTimeouts & timeouts) = 0;
     /// If force_connected is false, the client must manually ensure that returned connection is good.
     virtual Entry get(const ConnectionTimeouts & timeouts, /// NOLINT
-                      const Settings * settings = nullptr,
+                      const Settings & settings,
                       bool force_connected = true) = 0;
 
     virtual Priority getPriority() const { return Priority{1}; }
@@ -79,15 +80,18 @@ public:
     {
     }
 
+    Entry get(const ConnectionTimeouts & timeouts) override
+    {
+        Entry entry = Base::get(-1);
+        entry->forceConnected(timeouts);
+        return entry;
+    }
+
     Entry get(const ConnectionTimeouts & timeouts, /// NOLINT
-              const Settings * settings = nullptr,
+              const Settings & settings,
               bool force_connected = true) override
     {
-        Entry entry;
-        if (settings)
-            entry = Base::get(settings->connection_pool_max_wait_ms.totalMilliseconds());
-        else
-            entry = Base::get(-1);
+        Entry entry = Base::get(settings.connection_pool_max_wait_ms.totalMilliseconds());
 
         if (force_connected)
             entry->forceConnected(timeouts);
diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp
index 26d1792266e..84c4f533c56 100644
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@@ -44,8 +44,19 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
     }
 }
 
+IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts & timeouts)
+{
+    Settings settings;
+    settings.load_balancing = get_priority_load_balancing.load_balancing;
+    settings.load_balancing_first_offset = 0;
+    settings.distributed_replica_max_ignored_errors = 0;
+    settings.fallback_to_stale_replicas_for_distributed_queries = true;
+
+    return get(timeouts, settings, true);
+}
+
 IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts & timeouts,
-                                                       const Settings * settings,
+                                                       const Settings & settings,
                                                        bool /*force_connected*/)
 {
     if (nested_pools.empty())
@@ -57,18 +68,13 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
         return tryGetEntry(pool, timeouts, fail_message, settings, {});
     };
 
-    size_t offset = 0;
-    LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
-    if (settings)
-    {
-        offset = settings->load_balancing_first_offset % nested_pools.size();
-        load_balancing = LoadBalancing(settings->load_balancing);
-    }
+    const size_t offset = settings.load_balancing_first_offset % nested_pools.size();
+    const LoadBalancing load_balancing = settings.load_balancing;
 
     GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
 
-    UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
-    bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
+    const UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors;
+    const bool fallback_to_stale_replicas = settings.fallback_to_stale_replicas_for_distributed_queries;
 
     return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority);
 }
@@ -120,7 +126,7 @@ std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(const Co
 {
     TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
     {
-        return tryGetEntry(pool, timeouts, fail_message, &settings, nullptr, async_callback);
+        return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback);
     };
 
     std::vector<TryResult> results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints);
@@ -139,7 +145,7 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
 {
     TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
     {
-        return tryGetEntry(pool, timeouts, fail_message, &settings);
+        return tryGetEntry(pool, timeouts, fail_message, settings);
     };
 
     return getManyImpl(settings, pool_mode, try_get_entry);
@@ -154,7 +160,7 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
 {
     TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
     {
-        return tryGetEntry(pool, timeouts, fail_message, &settings, &table_to_check, async_callback);
+        return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback);
     };
 
     return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints);
@@ -213,7 +219,7 @@ ConnectionPoolWithFailover::tryGetEntry(
         IConnectionPool & pool,
         const ConnectionTimeouts & timeouts,
         std::string & fail_message,
-        const Settings * settings,
+        const Settings & settings,
         const QualifiedTableName * table_to_check,
         [[maybe_unused]] AsyncCallback async_callback)
 {
diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h
index d2ccea3371c..208a003edb8 100644
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@@ -44,8 +44,9 @@ public:
     using Entry = IConnectionPool::Entry;
 
     /** Allocates connection to work. */
+    Entry get(const ConnectionTimeouts & timeouts) override;
     Entry get(const ConnectionTimeouts & timeouts,
-              const Settings * settings,
+              const Settings & settings,
               bool force_connected) override; /// From IConnectionPool
 
     Priority getPriority() const override; /// From IConnectionPool
@@ -110,7 +111,7 @@ private:
             IConnectionPool & pool,
             const ConnectionTimeouts & timeouts,
             std::string & fail_message,
-            const Settings * settings,
+            const Settings & settings,
             const QualifiedTableName * table_to_check = nullptr,
             AsyncCallback async_callback = {});
 
diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp
index 9c072ca9b51..6ac504772e2 100644
--- a/src/Client/HedgedConnectionsFactory.cpp
+++ b/src/Client/HedgedConnectionsFactory.cpp
@@ -41,7 +41,7 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
 {
     shuffled_pools = pool->getShuffledPools(settings_);
     for (auto shuffled_pool : shuffled_pools)
-        replicas.emplace_back(std::make_unique<ConnectionEstablisherAsync>(shuffled_pool.pool, &timeouts, &settings_, log, table_to_check.get()));
+        replicas.emplace_back(std::make_unique<ConnectionEstablisherAsync>(shuffled_pool.pool, &timeouts, settings_, log, table_to_check.get()));
 }
 
 HedgedConnectionsFactory::~HedgedConnectionsFactory()
diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h
index b4fdbbe9016..c60d180eca0 100644
--- a/src/Common/GetPriorityForLoadBalancing.h
+++ b/src/Common/GetPriorityForLoadBalancing.h
@@ -8,8 +8,8 @@ namespace DB
 class GetPriorityForLoadBalancing
 {
 public:
-    GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
-    GetPriorityForLoadBalancing(){}
+    explicit GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
+    GetPriorityForLoadBalancing() = default;
 
     bool operator == (const GetPriorityForLoadBalancing & other) const
     {
diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index e4cfe6a4d35..02d36411708 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -131,14 +131,20 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 
         if (main_table)
         {
-            auto try_results = pool->getManyChecked(timeouts, &current_settings, pool_mode, main_table.getQualifiedName(), std::move(async_callback), skip_unavailable_endpoints);
+            auto try_results = pool->getManyChecked(
+                timeouts,
+                current_settings,
+                pool_mode,
+                main_table.getQualifiedName(),
+                std::move(async_callback),
+                skip_unavailable_endpoints);
             connection_entries.reserve(try_results.size());
             for (auto & try_result : try_results)
                 connection_entries.emplace_back(std::move(try_result.entry));
         }
         else
         {
-            connection_entries = pool->getMany(timeouts, &current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints);
+            connection_entries = pool->getMany(timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints);
         }
 
         auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index 1f649747994..8479412ad64 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -444,7 +444,7 @@ void DistributedAsyncInsertDirectoryQueue::processFile(std::string & file_path)
             storage.getContext()->getOpenTelemetrySpanLog());
 
         auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
-        auto connection = pool->get(timeouts, &distributed_header.insert_settings);
+        auto connection = pool->get(timeouts, distributed_header.insert_settings);
         LOG_DEBUG(log, "Sending `{}` to {} ({} rows, {} bytes)",
             file_path,
             connection->getDescription(),
diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index 6cf3a2542bd..ef2a90af11b 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -387,7 +387,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si
                     if (!connection_pool)
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Connection pool for replica {} does not exist", replica.readableString());
 
-                    job.connection_entry = connection_pool->get(timeouts, &settings);
+                    job.connection_entry = connection_pool->get(timeouts, settings);
                     if (job.connection_entry.isNull())
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty connection for replica{}", replica.readableString());
                 }

From 63bd45fa5f2c6b57d99c1d13d29e54c8e66bd9a5 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Tue, 21 Nov 2023 17:42:37 +0100
Subject: [PATCH 0882/1097] Update tests/integration/test_merge_tree_s3/test.py

---
 tests/integration/test_merge_tree_s3/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index a3752323a53..6a9aa1738d6 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -885,7 +885,7 @@ def test_s3_engine_heavy_write_check_mem(
         "INSERT INTO s3_test SELECT number, toString(number) FROM numbers(50000000)"
         f" SETTINGS "
         f" max_memory_usage={2*memory}"
-        # f", max_threads=1" # ParallelFormattingOutputFormat consumption depends on it
+        f", max_threads=1" # ParallelFormattingOutputFormat consumption depends on it
         f", s3_max_inflight_parts_for_one_file={in_flight}",
         query_id=query_id,
     )

From c31f10d1d1f584fe41c58e182ea6a2502eb6ded2 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 21 Nov 2023 18:08:23 +0100
Subject: [PATCH 0883/1097] fix

---
 .../0_stateless/01111_create_drop_replicated_db_stress.sh       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh
index f61a60a0bda..8ebe1807a1b 100755
--- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh
+++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh
@@ -69,7 +69,7 @@ function alter_table()
         if [ -z "$table" ]; then continue; fi
         $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \
         "alter table $table update n = n + (select max(n) from merge(REGEXP('${CLICKHOUSE_DATABASE}.*'), '.*')) where 1 settings allow_nondeterministic_mutations=1" \
-        2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED | grep -Fv "Error while executing table function merge"
+        2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED | grep -Fv ABORTED | grep -Fv "Error while executing table function merge"
         sleep 0.$RANDOM
     done
 }

From fd8714ac297ea884e5889d370312ab9e8f03c739 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 18:38:00 +0100
Subject: [PATCH 0884/1097] Implement `bitHammingDistance` for big integers

---
 src/Functions/bitCount.cpp           | 13 +++++++------
 src/Functions/bitHammingDistance.cpp | 21 ++++++++++++++++++---
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp
index 566a11481be..f1a3ac897c1 100644
--- a/src/Functions/bitCount.cpp
+++ b/src/Functions/bitCount.cpp
@@ -1,6 +1,7 @@
 #include <base/bit_cast.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionUnaryArithmetic.h>
+#include <bit>
 
 
 namespace DB
@@ -21,19 +22,19 @@ struct BitCountImpl
         {
             ResultType res = 0;
             for (auto item : a.items)
-                res += __builtin_popcountll(item);
+                res += std::popcount(item);
             return res;
         }
         if constexpr (std::is_same_v<A, UInt64> || std::is_same_v<A, Int64>)
-            return __builtin_popcountll(a);
+            return std::popcount(static_cast<UInt64>(a));
         if constexpr (std::is_same_v<A, UInt32> || std::is_same_v<A, Int32> || std::is_unsigned_v<A>)
-            return __builtin_popcount(a);
+            return std::popcount(static_cast<UInt32>(a));
         if constexpr (std::is_same_v<A, Int16>)
-            return __builtin_popcount(static_cast<UInt16>(a));
+            return std::popcount(static_cast<UInt16>(a));
         if constexpr (std::is_same_v<A, Int8>)
-            return __builtin_popcount(static_cast<UInt8>(a));
+            return std::popcount(static_cast<uint8_t>(a));
         else
-            return __builtin_popcountll(bit_cast<uint64_t>(a));
+            return std::popcount(bit_cast<uint64_t>(a));
     }
 
 #if USE_EMBEDDED_COMPILER
diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp
index 2eaa397dd04..4f36f731eda 100644
--- a/src/Functions/bitHammingDistance.cpp
+++ b/src/Functions/bitHammingDistance.cpp
@@ -2,20 +2,35 @@
 #include <Functions/FunctionFactory.h>
 #include <bit>
 
+
 namespace DB
 {
 template <typename A, typename B>
 struct BitHammingDistanceImpl
 {
-    using ResultType = UInt8;
+    using ResultType = std::conditional_t<(sizeof(A) * 8 >= 256), UInt16, UInt8>;
     static constexpr bool allow_fixed_string = true;
     static constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
     static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
     {
-        UInt64 res = static_cast<UInt64>(a) ^ static_cast<UInt64>(b);
-        return std::popcount(res);
+        if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64))
+        {
+            UInt64 res = static_cast<UInt64>(a) ^ static_cast<UInt64>(b);
+            return std::popcount(res);
+        }
+        else if constexpr (is_big_int_v<A> && is_big_int_v<B>)
+        {
+            auto xored = a ^ b;
+
+            ResultType res = 0;
+            for (auto item : xored.items)
+                res += std::popcount(item);
+            return res;
+        }
+        else
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported data type combination in function 'bitHammingDistance'");
     }
 
 #if USE_EMBEDDED_COMPILER

From f1c27a1b6e4c10d4577dc7522a2b52900139b3bd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 18:41:05 +0100
Subject: [PATCH 0885/1097] Avoid excessive allocation in Arena

---
 src/Common/Arena.h          | 107 ++++++++++++++++++++++--------------
 src/Common/ArenaAllocator.h |   4 +-
 2 files changed, 69 insertions(+), 42 deletions(-)

diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 0cf343809e8..5870fc83af6 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -4,10 +4,8 @@
 #include <memory>
 #include <vector>
 #include <boost/noncopyable.hpp>
+#include <base/defines.h>
 #include <Core/Defines.h>
-#if __has_include(<sanitizer/asan_interface.h>) && defined(ADDRESS_SANITIZER)
-#   include <sanitizer/asan_interface.h>
-#endif
 #include <Common/memcpySmall.h>
 #include <Common/ProfileEvents.h>
 #include <Common/Allocator.h>
@@ -39,13 +37,36 @@ private:
     /// Contiguous MemoryChunk of memory and pointer to free space inside it. Member of single-linked list.
     struct alignas(16) MemoryChunk : private Allocator<false>    /// empty base optimization
     {
-        char * begin;
-        char * pos;
-        char * end; /// does not include padding.
+        char * begin = nullptr;
+        char * pos = nullptr;
+        char * end = nullptr; /// does not include padding.
 
-        MemoryChunk * prev;
+        std::unique_ptr<MemoryChunk> prev;
 
-        MemoryChunk(size_t size_, MemoryChunk * prev_)
+        MemoryChunk()
+        {
+        }
+
+        void swap(MemoryChunk & other)
+        {
+            std::swap(begin, other.begin);
+            std::swap(pos, other.pos);
+            std::swap(end, other.end);
+            prev.swap(other.prev);
+        }
+
+        MemoryChunk(MemoryChunk && other)
+        {
+            *this = std::move(other);
+        }
+
+        MemoryChunk & operator=(MemoryChunk && other)
+        {
+            swap(other);
+            return *this;
+        }
+
+        MemoryChunk(size_t size_)
         {
             ProfileEvents::increment(ProfileEvents::ArenaAllocChunks);
             ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_);
@@ -53,7 +74,6 @@ private:
             begin = reinterpret_cast<char *>(Allocator<false>::alloc(size_));
             pos = begin;
             end = begin + size_ - pad_right;
-            prev = prev_;
 
             ASAN_POISON_MEMORY_REGION(begin, size_);
         }
@@ -67,19 +87,18 @@ private:
             ASAN_UNPOISON_MEMORY_REGION(begin, size());
 
             Allocator<false>::free(begin, size());
-
-            delete prev;
         }
 
         size_t size() const { return end + pad_right - begin; }
         size_t remaining() const { return end - pos; }
     };
 
+    size_t initial_size;
     size_t growth_factor;
     size_t linear_growth_threshold;
 
     /// Last contiguous MemoryChunk of memory.
-    MemoryChunk * head;
+    MemoryChunk head;
     size_t allocated_bytes;
     size_t used_bytes;
     size_t page_size;
@@ -95,9 +114,13 @@ private:
     {
         size_t size_after_grow = 0;
 
-        if (head->size() < linear_growth_threshold)
+        if (head.size() == 0)
         {
-            size_after_grow = std::max(min_next_size, head->size() * growth_factor);
+            size_after_grow = initial_size;
+        }
+        else if (head.size() < linear_growth_threshold)
+        {
+            size_after_grow = std::max(min_next_size, head.size() * growth_factor);
         }
         else
         {
@@ -119,8 +142,18 @@ private:
     /// Add next contiguous MemoryChunk of memory with size not less than specified.
     void NO_INLINE addMemoryChunk(size_t min_size)
     {
-        head = new MemoryChunk(nextSize(min_size + pad_right), head);
-        allocated_bytes += head->size();
+        size_t next_size = nextSize(min_size + pad_right);
+        if (!head.begin)
+        {
+            head = MemoryChunk(next_size);
+        }
+        else
+        {
+            auto chunk = std::make_unique<MemoryChunk>(next_size);
+            head.swap(*chunk);
+            head.prev = std::move(chunk);
+            allocated_bytes += head.size();
+        }
     }
 
     friend class ArenaAllocator;
@@ -128,29 +161,24 @@ private:
 
 public:
     explicit Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, size_t linear_growth_threshold_ = 128 * 1024 * 1024)
-        : growth_factor(growth_factor_)
+        : initial_size(initial_size_)
+        , growth_factor(growth_factor_)
         , linear_growth_threshold(linear_growth_threshold_)
-        , head(new MemoryChunk(initial_size_, nullptr))
-        , allocated_bytes(head->size())
+        , allocated_bytes(head.size())
         , used_bytes(0)
         , page_size(static_cast<size_t>(::getPageSize()))
     {
     }
 
-    ~Arena()
-    {
-        delete head;
-    }
-
     /// Get piece of memory, without alignment.
     char * alloc(size_t size)
     {
         used_bytes += size;
-        if (unlikely(static_cast<std::ptrdiff_t>(size) > head->end - head->pos))
+        if (unlikely(static_cast<std::ptrdiff_t>(size) > head.end - head.pos))
             addMemoryChunk(size);
 
-        char * res = head->pos;
-        head->pos += size;
+        char * res = head.pos;
+        head.pos += size;
         ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
         return res;
     }
@@ -161,14 +189,14 @@ public:
         used_bytes += size;
         do
         {
-            void * head_pos = head->pos;
-            size_t space = head->end - head->pos;
+            void * head_pos = head.pos;
+            size_t space = head.end - head.pos;
 
             auto * res = static_cast<char *>(std::align(alignment, size, head_pos, space));
             if (res)
             {
-                head->pos = static_cast<char *>(head_pos);
-                head->pos += size;
+                head.pos = static_cast<char *>(head_pos);
+                head.pos += size;
                 ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
                 return res;
             }
@@ -191,9 +219,9 @@ public:
     void * rollback(size_t size)
     {
         used_bytes -= size;
-        head->pos -= size;
-        ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
-        return head->pos;
+        head.pos -= size;
+        ASAN_POISON_MEMORY_REGION(head.pos, size + pad_right);
+        return head.pos;
     }
 
     /** Begin or expand a contiguous range of memory.
@@ -234,10 +262,10 @@ public:
         // This method only works for extending the last allocation. For lack of
         // original size, check a weaker condition: that 'begin' is at least in
         // the current MemoryChunk.
-        assert(range_start >= head->begin);
-        assert(range_start < head->end);
+        assert(range_start >= head.begin);
+        assert(range_start < head.end);
 
-        if (head->pos + additional_bytes <= head->end)
+        if (head.pos + additional_bytes <= head.end)
         {
             // The new size fits into the last MemoryChunk, so just alloc the
             // additional size. We can alloc without alignment here, because it
@@ -254,7 +282,7 @@ public:
         // solved not by complicating this method, but by rethinking the
         // approach to memory management for aggregate function states, so that
         // we can provide a proper realloc().
-        const size_t existing_bytes = head->pos - range_start;
+        const size_t existing_bytes = head.pos - range_start;
         const size_t new_bytes = existing_bytes + additional_bytes;
         const char * old_range = range_start;
 
@@ -317,12 +345,11 @@ public:
     /// yourself having to use this method, probably you're doing something wrong.
     size_t remainingSpaceInCurrentMemoryChunk() const
     {
-        return head->remaining();
+        return head.remaining();
     }
 };
 
 using ArenaPtr = std::shared_ptr<Arena>;
 using Arenas = std::vector<ArenaPtr>;
 
-
 }
diff --git a/src/Common/ArenaAllocator.h b/src/Common/ArenaAllocator.h
index 950e1f85999..36d1772daea 100644
--- a/src/Common/ArenaAllocator.h
+++ b/src/Common/ArenaAllocator.h
@@ -20,7 +20,7 @@ public:
         char const * data = reinterpret_cast<char *>(buf);
 
         // Invariant should be maintained: new_size > old_size
-        if (data + old_size == arena->head->pos)
+        if (data + old_size == arena->head.pos)
         {
             // Consecutive optimization
             arena->allocContinue(new_size - old_size, data);
@@ -59,7 +59,7 @@ public:
     {
         char const * data = reinterpret_cast<char *>(buf);
 
-        if (data + old_size == arena->head->pos)
+        if (data + old_size == arena->head.pos)
         {
             arena->allocContinue(new_size - old_size, data, alignment);
             return reinterpret_cast<void *>(const_cast<char *>(data));

From d78073c8e111452cf6645bdbbbeb636ed14e17ab Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 20:29:40 +0100
Subject: [PATCH 0886/1097] Fix error

---
 src/Common/Arena.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 5870fc83af6..cd8f6546828 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -152,8 +152,8 @@ private:
             auto chunk = std::make_unique<MemoryChunk>(next_size);
             head.swap(*chunk);
             head.prev = std::move(chunk);
-            allocated_bytes += head.size();
         }
+        allocated_bytes += head.size();
     }
 
     friend class ArenaAllocator;

From a82a38fae3f54bbb2d9fccabb491c452be13cbd4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 20:34:20 +0100
Subject: [PATCH 0887/1097] Fix error

---
 src/Common/Arena.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index cd8f6546828..5164ccf8326 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -4,12 +4,15 @@
 #include <memory>
 #include <vector>
 #include <boost/noncopyable.hpp>
-#include <base/defines.h>
 #include <Core/Defines.h>
 #include <Common/memcpySmall.h>
 #include <Common/ProfileEvents.h>
 #include <Common/Allocator.h>
 
+#if __has_include(<sanitizer/asan_interface.h>) && defined(ADDRESS_SANITIZER)
+#   include <sanitizer/asan_interface.h>
+#endif
+
 
 namespace ProfileEvents
 {

From 1e0b4f979c90564159f9cd10acfb310ba71c1e79 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Tue, 21 Nov 2023 22:55:55 +0100
Subject: [PATCH 0888/1097] Add extra test

---
 src/Storages/MergeTree/tests/gtest_combine_filters.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
index 84ddec04c24..53696474eb8 100644
--- a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
+++ b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp
@@ -148,6 +148,7 @@ TEST(MergeTree, CombineFilters)
     EXPECT_TRUE(testCombineFilters(65));
     EXPECT_TRUE(testCombineFilters(200));
     EXPECT_TRUE(testCombineFilters(201));
+    EXPECT_TRUE(testCombineFilters(300));
     /// Extended tests: combination of two UInt8 columns.
     EXPECT_TRUE(testCombineColumns(1));
     EXPECT_TRUE(testCombineColumns(2));

From c2816ec5899b51d490fc6a701d29d681b97a1b4f Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Tue, 21 Nov 2023 22:06:18 +0000
Subject: [PATCH 0889/1097] merge_row_policy: make clang-tidy happy

---
 src/Storages/StorageMerge.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index c6593c81fb7..ea682a6255e 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -198,7 +198,7 @@ private:
         UInt64 max_block_size,
         const StorageWithLockAndName & storage_with_lock,
         Names && real_column_names,
-        const RowPolicyDataOpt & row_policy_data_ptr,
+        const RowPolicyDataOpt & row_policy_data_opt,
         ContextMutablePtr modified_context,
         size_t streams_num);
 
@@ -209,7 +209,7 @@ private:
         QueryProcessingStage::Enum processed_stage,
         const Block & header,
         const Aliases & aliases,
-        const RowPolicyDataOpt & row_policy_data_ptr,
+        const RowPolicyDataOpt & row_policy_data_opt,
         const StorageWithLockAndName & storage_with_lock,
         ContextMutablePtr modified_context,
         bool concat_streams = false) const;
@@ -223,7 +223,7 @@ private:
         const Block & header,
         const StorageMetadataPtr & metadata_snapshot,
         const Aliases & aliases,
-        const RowPolicyDataOpt & row_policy_data_ptr,
+        const RowPolicyDataOpt & row_policy_data_opt,
         ContextPtr context,
         QueryPipelineBuilder & builder,
         QueryProcessingStage::Enum processed_stage);

From bd9ff8871f06dcbc51ed92af21d46232d073c6f6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:32:47 +0100
Subject: [PATCH 0890/1097] Faster build time

---
 src/Columns/ColumnString.h   |   3 +-
 src/Columns/IColumnDummy.cpp | 137 +++++++++++++++++++++++++++++++++++
 src/Columns/IColumnDummy.h   | 135 ++++++----------------------------
 src/Common/Arena.h           |  14 ++--
 4 files changed, 171 insertions(+), 118 deletions(-)
 create mode 100644 src/Columns/IColumnDummy.cpp

diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index e8e5ebbcbf9..345513248fd 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -11,7 +11,6 @@
 #include <Common/memcmpSmall.h>
 #include <Common/assert_cast.h>
 #include <Core/Field.h>
-#include <Common/Arena.h>
 
 
 class Collator;
@@ -20,6 +19,8 @@ class Collator;
 namespace DB
 {
 
+class Arena;
+
 /** Column for String values.
   */
 class ColumnString final : public COWHelper<IColumn, ColumnString>
diff --git a/src/Columns/IColumnDummy.cpp b/src/Columns/IColumnDummy.cpp
new file mode 100644
index 00000000000..e2e4b29e0e1
--- /dev/null
+++ b/src/Columns/IColumnDummy.cpp
@@ -0,0 +1,137 @@
+#include <Common/Arena.h>
+#include <Columns/IColumnDummy.h>
+#include <Columns/ColumnsCommon.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
+    extern const int NOT_IMPLEMENTED;
+}
+
+
+Field IColumnDummy::operator[](size_t) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); 
+}
+
+void IColumnDummy::get(size_t, Field &) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName());
+}
+
+void IColumnDummy::insert(const Field &)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert element into {}", getName());
+}
+
+bool IColumnDummy::isDefaultAt(size_t) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "isDefaultAt is not implemented for {}", getName());
+}
+
+StringRef IColumnDummy::serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const
+{
+    /// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
+    char * res = arena.allocContinue(1, begin);
+    *res = 0;
+    return { res, 1 };
+}
+
+const char * IColumnDummy::deserializeAndInsertFromArena(const char * pos)
+{
+    ++s;
+    return pos + 1;
+}
+
+const char * IColumnDummy::skipSerializedInArena(const char * pos) const
+{
+    return pos;
+}
+
+ColumnPtr IColumnDummy::filter(const Filter & filt, ssize_t /*result_size_hint*/) const
+{
+    size_t bytes = countBytesInFilter(filt);
+    return cloneDummy(bytes);
+}
+
+void IColumnDummy::expand(const IColumn::Filter & mask, bool inverted)
+{
+    size_t bytes = countBytesInFilter(mask);
+    if (inverted)
+        bytes = mask.size() - bytes;
+    s = bytes;
+}
+
+ColumnPtr IColumnDummy::permute(const Permutation & perm, size_t limit) const
+{
+    if (s != perm.size())
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of permutation doesn't match size of column.");
+
+    return cloneDummy(limit ? std::min(s, limit) : s);
+}
+
+ColumnPtr IColumnDummy::index(const IColumn & indexes, size_t limit) const
+{
+    if (indexes.size() < limit)
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of indexes is less than required.");
+
+    return cloneDummy(limit ? limit : s);
+}
+
+void IColumnDummy::getPermutation(IColumn::PermutationSortDirection /*direction*/, IColumn::PermutationSortStability /*stability*/,
+                size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const
+{
+    res.resize(s);
+    for (size_t i = 0; i < s; ++i)
+        res[i] = i;
+}
+
+ColumnPtr IColumnDummy::replicate(const Offsets & offsets) const
+{
+    if (s != offsets.size())
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of offsets doesn't match size of column.");
+
+    return cloneDummy(offsets.back());
+}
+
+MutableColumns IColumnDummy::scatter(ColumnIndex num_columns, const Selector & selector) const
+{
+    if (s != selector.size())
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of selector doesn't match size of column.");
+
+    std::vector<size_t> counts(num_columns);
+    for (auto idx : selector)
+        ++counts[idx];
+
+    MutableColumns res(num_columns);
+    for (size_t i = 0; i < num_columns; ++i)
+        res[i] = cloneResized(counts[i]);
+
+    return res;
+}
+
+double IColumnDummy::getRatioOfDefaultRows(double) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
+}
+
+UInt64 IColumnDummy::getNumberOfDefaultRows() const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
+}
+
+void IColumnDummy::getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
+}
+
+void IColumnDummy::gather(ColumnGathererStream &)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method gather is not supported for {}", getName());
+}
+
+}
diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h
index 4cadae2bc3d..6715fdb5e1d 100644
--- a/src/Columns/IColumnDummy.h
+++ b/src/Columns/IColumnDummy.h
@@ -1,21 +1,12 @@
 #pragma once
 
-#include <Common/Arena.h>
-#include <Common/PODArray.h>
 #include <Columns/IColumn.h>
-#include <Columns/ColumnsCommon.h>
-#include <Core/Field.h>
 
 
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
-    extern const int NOT_IMPLEMENTED;
-}
-
+class Arena;
 
 /** Base class for columns-constants that contain a value that is not in the `Field`.
   * Not a full-fledged column and is used in a special way.
@@ -42,10 +33,10 @@ public:
 
     bool hasEqualValues() const override { return true; }
 
-    Field operator[](size_t) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); }
-    void get(size_t, Field &) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); }
-    void insert(const Field &) override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert element into {}", getName()); }
-    bool isDefaultAt(size_t) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "isDefaultAt is not implemented for {}", getName()); }
+    Field operator[](size_t) const override;
+    void get(size_t, Field &) const override;
+    void insert(const Field &) override;
+    bool isDefaultAt(size_t) const override;
 
     StringRef getDataAt(size_t) const override
     {
@@ -57,24 +48,9 @@ public:
         ++s;
     }
 
-    StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const override
-    {
-        /// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
-        char * res = arena.allocContinue(1, begin);
-        *res = 0;
-        return { res, 1 };
-    }
-
-    const char * deserializeAndInsertFromArena(const char * pos) override
-    {
-        ++s;
-        return pos + 1;
-    }
-
-    const char * skipSerializedInArena(const char * pos) const override
-    {
-        return pos;
-    }
+    StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const override;
+    const char * deserializeAndInsertFromArena(const char * pos) override;
+    const char * skipSerializedInArena(const char * pos) const override;
 
     void updateHashWithValue(size_t /*n*/, SipHash & /*hash*/) const override
     {
@@ -98,95 +74,32 @@ public:
         s += length;
     }
 
-    ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/) const override
-    {
-        size_t bytes = countBytesInFilter(filt);
-        return cloneDummy(bytes);
-    }
+    ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/) const override;
 
-    void expand(const IColumn::Filter & mask, bool inverted) override
-    {
-        size_t bytes = countBytesInFilter(mask);
-        if (inverted)
-            bytes = mask.size() - bytes;
-        s = bytes;
-    }
+    void expand(const IColumn::Filter & mask, bool inverted) override;
 
-    ColumnPtr permute(const Permutation & perm, size_t limit) const override
-    {
-        if (s != perm.size())
-            throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of permutation doesn't match size of column.");
+    ColumnPtr permute(const Permutation & perm, size_t limit) const override;
 
-        return cloneDummy(limit ? std::min(s, limit) : s);
-    }
-
-    ColumnPtr index(const IColumn & indexes, size_t limit) const override
-    {
-        if (indexes.size() < limit)
-            throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of indexes is less than required.");
-
-        return cloneDummy(limit ? limit : s);
-    }
+    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
 
     void getPermutation(IColumn::PermutationSortDirection /*direction*/, IColumn::PermutationSortStability /*stability*/,
-                    size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const override
-    {
-        res.resize(s);
-        for (size_t i = 0; i < s; ++i)
-            res[i] = i;
-    }
+                    size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const override;
 
     void updatePermutation(IColumn::PermutationSortDirection /*direction*/, IColumn::PermutationSortStability /*stability*/,
-                    size_t, int, Permutation &, EqualRanges&) const override {}
-
-    ColumnPtr replicate(const Offsets & offsets) const override
-    {
-        if (s != offsets.size())
-            throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of offsets doesn't match size of column.");
-
-        return cloneDummy(offsets.back());
-    }
-
-    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
-    {
-        if (s != selector.size())
-            throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of selector doesn't match size of column.");
-
-        std::vector<size_t> counts(num_columns);
-        for (auto idx : selector)
-            ++counts[idx];
-
-        MutableColumns res(num_columns);
-        for (size_t i = 0; i < num_columns; ++i)
-            res[i] = cloneResized(counts[i]);
-
-        return res;
-    }
-
-    double getRatioOfDefaultRows(double) const override
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
-    }
-
-    UInt64 getNumberOfDefaultRows() const override
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
-    }
-
-    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
-    }
-
-    void gather(ColumnGathererStream &) override
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method gather is not supported for {}", getName());
-    }
-
-    void getExtremes(Field &, Field &) const override
+                    size_t, int, Permutation &, EqualRanges&) const override
     {
     }
 
+    ColumnPtr replicate(const Offsets & offsets) const override;
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
+
+    double getRatioOfDefaultRows(double) const override;
+    UInt64 getNumberOfDefaultRows() const override;
+    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override;
+    void gather(ColumnGathererStream &) override;
+    void getExtremes(Field &, Field &) const override;
+
     void addSize(size_t delta)
     {
         s += delta;
diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 5164ccf8326..89b2a0b7160 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -83,6 +83,9 @@ private:
 
         ~MemoryChunk()
         {
+            if (empty())
+                return;
+
             /// We must unpoison the memory before returning to the allocator,
             /// because the allocator might not have asan integration, and the
             /// memory would stay poisoned forever. If the allocator supports
@@ -92,6 +95,7 @@ private:
             Allocator<false>::free(begin, size());
         }
 
+        bool empty() const { return begin == end;}
         size_t size() const { return end + pad_right - begin; }
         size_t remaining() const { return end - pos; }
     };
@@ -102,8 +106,8 @@ private:
 
     /// Last contiguous MemoryChunk of memory.
     MemoryChunk head;
-    size_t allocated_bytes;
-    size_t used_bytes;
+    size_t allocated_bytes = 0;
+    size_t used_bytes = 0;
     size_t page_size;
 
     static size_t roundUpToPageSize(size_t s, size_t page_size)
@@ -117,7 +121,7 @@ private:
     {
         size_t size_after_grow = 0;
 
-        if (head.size() == 0)
+        if (head.empty())
         {
             size_after_grow = initial_size;
         }
@@ -146,7 +150,7 @@ private:
     void NO_INLINE addMemoryChunk(size_t min_size)
     {
         size_t next_size = nextSize(min_size + pad_right);
-        if (!head.begin)
+        if (head.empty())
         {
             head = MemoryChunk(next_size);
         }
@@ -167,8 +171,6 @@ public:
         : initial_size(initial_size_)
         , growth_factor(growth_factor_)
         , linear_growth_threshold(linear_growth_threshold_)
-        , allocated_bytes(head.size())
-        , used_bytes(0)
         , page_size(static_cast<size_t>(::getPageSize()))
     {
     }

From 5136dad502e78ed09af76b846178728b535a2031 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:38:06 +0100
Subject: [PATCH 0891/1097] Faster build time

---
 src/Columns/IColumnDummy.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h
index 6715fdb5e1d..27876e28c7f 100644
--- a/src/Columns/IColumnDummy.h
+++ b/src/Columns/IColumnDummy.h
@@ -98,7 +98,10 @@ public:
     UInt64 getNumberOfDefaultRows() const override;
     void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override;
     void gather(ColumnGathererStream &) override;
-    void getExtremes(Field &, Field &) const override;
+
+    void getExtremes(Field &, Field &) const override
+    {
+    }
 
     void addSize(size_t delta)
     {

From 2b490c931b99b1abb20bc962b5774f6354b2c4a1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:52:40 +0100
Subject: [PATCH 0892/1097] Fix error

---
 src/Interpreters/Aggregator.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index e2ddfbe3418..c3cfd0647be 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -11,11 +11,9 @@
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeLowCardinality.h>
-#include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnSparse.h>
 #include <Formats/NativeWriter.h>
-#include <IO/WriteBufferFromFile.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <Interpreters/Aggregator.h>
 #include <AggregateFunctions/Combinators/AggregateFunctionArray.h>
@@ -23,7 +21,6 @@
 #include <IO/Operators.h>
 #include <Interpreters/JIT/compileFunction.h>
 #include <Interpreters/JIT/CompiledExpressionCache.h>
-#include <Core/ProtocolDefines.h>
 #include <Disks/TemporaryFileOnDisk.h>
 #include <Interpreters/TemporaryDataOnDisk.h>
 #include <Common/Stopwatch.h>
@@ -37,13 +34,13 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Common/JSONBuilder.h>
-#include <Common/filesystemHelpers.h>
 #include <Common/scope_guard_safe.h>
 
 #include <Parsers/ASTSelectQuery.h>
 
 #include <Interpreters/AggregationUtils.h>
 
+
 namespace ProfileEvents
 {
     extern const Event ExternalAggregationWritePart;
@@ -1123,7 +1120,9 @@ void NO_INLINE Aggregator::executeImplBatch(
             return;
 
         /// For all rows.
-        AggregateDataPtr place = aggregates_pool->alloc(0);
+
+        /// This pointer is unused, but the logic will compare it for nullptr to check if the cell is set.
+        AggregateDataPtr place = reinterpret_cast<AggregateDataPtr>(0x1);
         if (all_keys_are_const)
         {
             state.emplaceKey(method.data, 0, *aggregates_pool).setMapped(place);

From 687e6d20f8cded34be135fd104703a60c714c1fd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:53:14 +0100
Subject: [PATCH 0893/1097] Fix error

---
 src/Common/Arena.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 89b2a0b7160..6441a6f1cc8 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -176,10 +176,11 @@ public:
     }
 
     /// Get piece of memory, without alignment.
+    /// Note: we expect it will return a non-nullptr even if the size is zero.
     char * alloc(size_t size)
     {
         used_bytes += size;
-        if (unlikely(static_cast<std::ptrdiff_t>(size) > head.end - head.pos))
+        if (unlikely(head.empty() || static_cast<std::ptrdiff_t>(size) > head.end - head.pos))
             addMemoryChunk(size);
 
         char * res = head.pos;

From b30a864fe3ff3436e500f1d7daff3ce74ab79608 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:57:06 +0100
Subject: [PATCH 0894/1097] Minor changes

---
 src/Storages/PartitionedSink.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp
index 71c1dd7ab69..69940990569 100644
--- a/src/Storages/PartitionedSink.cpp
+++ b/src/Storages/PartitionedSink.cpp
@@ -5,11 +5,8 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
-#include <Interpreters/evaluateConstantExpression.h>
 
 #include <Parsers/ASTFunction.h>
-#include <Parsers/ASTInsertQuery.h>
-#include <Parsers/ASTLiteral.h>
 
 #include <Processors/ISource.h>
 

From 4dd19c2097bc077783a4929b63fe21c1b6eaf404 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 22 Nov 2023 00:08:00 +0100
Subject: [PATCH 0895/1097] Minor changes

---
 src/Columns/IColumnDummy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Columns/IColumnDummy.cpp b/src/Columns/IColumnDummy.cpp
index e2e4b29e0e1..42b66e1156c 100644
--- a/src/Columns/IColumnDummy.cpp
+++ b/src/Columns/IColumnDummy.cpp
@@ -15,7 +15,7 @@ namespace ErrorCodes
 
 Field IColumnDummy::operator[](size_t) const
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); 
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName());
 }
 
 void IColumnDummy::get(size_t, Field &) const

From de5556b458786d8d620cf3b1727aec99b454b4da Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 22 Nov 2023 00:29:43 +0100
Subject: [PATCH 0896/1097] Add a test

---
 src/Functions/bitHammingDistance.cpp                 |  3 +++
 .../02921_bit_hamming_distance_big_int.reference     |  9 +++++++++
 .../02921_bit_hamming_distance_big_int.sql           | 12 ++++++++++++
 3 files changed, 24 insertions(+)
 create mode 100644 tests/queries/0_stateless/02921_bit_hamming_distance_big_int.reference
 create mode 100644 tests/queries/0_stateless/02921_bit_hamming_distance_big_int.sql

diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp
index 4f36f731eda..ef5ee16e553 100644
--- a/src/Functions/bitHammingDistance.cpp
+++ b/src/Functions/bitHammingDistance.cpp
@@ -15,6 +15,9 @@ struct BitHammingDistanceImpl
     template <typename Result = ResultType>
     static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
     {
+        /// Note: it's unspecified if signed integers should be promoted with sign-extension or with zero-fill.
+        /// This behavior can change in the future.
+
         if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64))
         {
             UInt64 res = static_cast<UInt64>(a) ^ static_cast<UInt64>(b);
diff --git a/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.reference b/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.reference
new file mode 100644
index 00000000000..62245f5d176
--- /dev/null
+++ b/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.reference
@@ -0,0 +1,9 @@
+314776434768051644139306697240981192872	0	74	74
+14776434768051644139306697240981192872314776434768051644139306697240981192872	0	141	141
+314776434768051644139306697240981192872	14776434768051644139306697240981192872314776434768051644139306697240981192872	115	115
+-25505932152886819324067910190787018584	0	74	74
+14776434768051644139306697240981192872314776434768051644139306697240981192872	0	141	141
+-25505932152886819324067910190787018584	14776434768051644139306697240981192872314776434768051644139306697240981192872	99	99
+314776434768051644139306697240981192872	0	74	74
+14776434768051644139306697240981192872314776434768051644139306697240981192872	0	141	141
+314776434768051644139306697240981192872	14776434768051644139306697240981192872314776434768051644139306697240981192872	115	115
diff --git a/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.sql b/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.sql
new file mode 100644
index 00000000000..6f241e104b6
--- /dev/null
+++ b/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.sql
@@ -0,0 +1,12 @@
+SELECT 314776434768051644139306697240981192872::UInt128 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
+SELECT 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
+SELECT 314776434768051644139306697240981192872::UInt128 AS x, 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
+
+SELECT 314776434768051644139306697240981192872::Int128 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
+SELECT 14776434768051644139306697240981192872314776434768051644139306697240981192872::Int256 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
+SELECT 314776434768051644139306697240981192872::Int128 AS x, 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
+
+SELECT 314776434768051644139306697240981192872::UInt128 AS x, 0::Int128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
+SELECT 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS x, 0::Int128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
+SELECT 314776434768051644139306697240981192872::UInt128 AS x, 14776434768051644139306697240981192872314776434768051644139306697240981192872::Int256 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
+

From f4f4615d724983e696962e920521e2bcdb35f71b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 22 Nov 2023 01:23:24 +0100
Subject: [PATCH 0897/1097] Fix test

---
 .../02741_hashed_dictionary_load_factor.reference         | 8 ++++----
 .../02741_hashed_dictionary_load_factor.sql.j2            | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference
index abe891cbb9b..81f1bdda20c 100644
--- a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference
+++ b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference
@@ -1,4 +1,4 @@
-test_dictionary_hashed	1000000	0.4768	33558760
-test_dictionary_hashed_load_factor	1000000	0.9537	16781544
-test_dictionary_sparse_hashed	1000000	0.4768	20975848
-test_dictionary_sparse_hashed_load_factor	1000000	0.9537	10490088
+test_dictionary_hashed	1000000	0.4768	34000000
+test_dictionary_hashed_load_factor	1000000	0.9537	17000000
+test_dictionary_sparse_hashed	1000000	0.4768	21000000
+test_dictionary_sparse_hashed_load_factor	1000000	0.9537	10000000
diff --git a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2 b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2
index 870acd54514..41d68216412 100644
--- a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2
+++ b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2
@@ -31,7 +31,7 @@ LIFETIME(0);
 
 SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}};
 SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}}_load_factor;
-SELECT name, element_count, round(load_factor, 4), bytes_allocated FROM system.dictionaries WHERE database = currentDatabase() ORDER BY name;
+SELECT name, element_count, round(load_factor, 4), round(bytes_allocated, -6) FROM system.dictionaries WHERE database = currentDatabase() ORDER BY name;
 
 DROP DICTIONARY IF EXISTS test_dictionary_{{layout}};
 DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}_load_factor;

From 5cb55d3f8cc767300c95446bac0c74a39bafc401 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 22 Nov 2023 01:24:58 +0100
Subject: [PATCH 0898/1097] Fix style

---
 src/Functions/bitHammingDistance.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp
index ef5ee16e553..f00f38b61af 100644
--- a/src/Functions/bitHammingDistance.cpp
+++ b/src/Functions/bitHammingDistance.cpp
@@ -5,6 +5,12 @@
 
 namespace DB
 {
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+}
+
 template <typename A, typename B>
 struct BitHammingDistanceImpl
 {

From 58b51a7f237db4175a02820edee03e3365ebb3e4 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 22 Nov 2023 05:37:51 +0000
Subject: [PATCH 0899/1097] Better metadata path

---
 src/Interpreters/DatabaseCatalog.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index e0ce2b99b14..6d978c2839f 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -1046,9 +1046,13 @@ String DatabaseCatalog::getPathForDroppedMetadata(const StorageID & table_id) co
 
 String DatabaseCatalog::getPathForMetadata(const StorageID & table_id) const
 {
-    return getContext()->getPath() + "metadata/" +
-           escapeForFileName(table_id.getDatabaseName()) + "/" +
-           escapeForFileName(table_id.getTableName()) + ".sql";
+    auto database = getDatabase(table_id.getDatabaseName());
+    auto * database_ptr = dynamic_cast<DatabaseOnDisk *>(database.get());
+
+    if (!database_ptr)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to get metadata path from database {}", table_id.getDatabaseName());
+
+    return database_ptr->getMetadataPath() + escapeForFileName(table_id.getTableName()) + ".sql";
 }
 
 void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr table, String dropped_metadata_path, bool ignore_delay)

From 0a14b39aed328bf5c00c67d94b4cb3d0ded12bdc Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Tue, 21 Nov 2023 11:00:44 +0100
Subject: [PATCH 0900/1097] Avoid returning bigger resolution when fpr > 0.283

---
 src/Interpreters/BloomFilterHash.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/Interpreters/BloomFilterHash.h b/src/Interpreters/BloomFilterHash.h
index a340fe29ab4..73514b18b0c 100644
--- a/src/Interpreters/BloomFilterHash.h
+++ b/src/Interpreters/BloomFilterHash.h
@@ -248,6 +248,13 @@ struct BloomFilterHash
         static const size_t MAX_BITS_PER_ROW = 20;
         static const size_t MAX_HASH_FUNCTION_COUNT = 15;
 
+        static const size_t MIN_BITS_PER_ROW = 2;
+        static const size_t MIN_HASH_FUNCTION_COUNT = 2;
+
+        /// Return the smaller possible parameters for false positive rates bigger than 0.283
+        if (max_conflict_probability >= 0.283)
+            return std::pair<size_t, size_t>(MIN_BITS_PER_ROW, MIN_HASH_FUNCTION_COUNT);
+
         /// For the smallest index per level in probability_lookup_table
         static const size_t min_probability_index_each_bits[] = {0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14};
 

From 80d21e2ccd906eca2beb4559dc42a54700cd4c5e Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Tue, 21 Nov 2023 16:11:21 +0100
Subject: [PATCH 0901/1097] Adding a unit test

---
 src/Interpreters/BloomFilterHash.h            |  5 +-
 .../tests/gtest_calculationBestPractices.cpp  | 57 +++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 src/Interpreters/tests/gtest_calculationBestPractices.cpp

diff --git a/src/Interpreters/BloomFilterHash.h b/src/Interpreters/BloomFilterHash.h
index 73514b18b0c..45098ecff99 100644
--- a/src/Interpreters/BloomFilterHash.h
+++ b/src/Interpreters/BloomFilterHash.h
@@ -251,7 +251,10 @@ struct BloomFilterHash
         static const size_t MIN_BITS_PER_ROW = 2;
         static const size_t MIN_HASH_FUNCTION_COUNT = 2;
 
-        /// Return the smaller possible parameters for false positive rates bigger than 0.283
+        /// Return the smaller possible parameters for false positive rates higher or equal than 0.283
+        /// Otherwise, for those rates the loop won't find any possible values in the lookup table
+        /// returning bits_per_row = 19 & size_of_hash_functions = 13. Which are the most restrictive values
+        /// to be used with the smallest false positive rates.
         if (max_conflict_probability >= 0.283)
             return std::pair<size_t, size_t>(MIN_BITS_PER_ROW, MIN_HASH_FUNCTION_COUNT);
 
diff --git a/src/Interpreters/tests/gtest_calculationBestPractices.cpp b/src/Interpreters/tests/gtest_calculationBestPractices.cpp
new file mode 100644
index 00000000000..1b6a05f8d3b
--- /dev/null
+++ b/src/Interpreters/tests/gtest_calculationBestPractices.cpp
@@ -0,0 +1,57 @@
+#include <gtest/gtest.h>
+
+#include <Interpreters/BloomFilterHash.h>
+
+using namespace DB;
+
+
+TEST(BloomFilterHash, ReasonableProbabilities)
+{
+    const auto & output_1 = BloomFilterHash::calculationBestPractices(0.001);
+
+    ASSERT_EQ(output_1.first, 15);
+    ASSERT_EQ(output_1.second, 7);
+
+    const auto & output_2 = BloomFilterHash::calculationBestPractices(0.025);
+
+    ASSERT_EQ(output_2.first, 8);
+    ASSERT_EQ(output_2.second, 4);
+
+    const auto & output_3 = BloomFilterHash::calculationBestPractices(0.05);
+
+    ASSERT_EQ(output_3.first, 7);
+    ASSERT_EQ(output_3.second, 3);
+
+    const auto & output_4 = BloomFilterHash::calculationBestPractices(0.1);
+
+    ASSERT_EQ(output_4.first, 6);
+    ASSERT_EQ(output_4.second, 2);
+
+    const auto & output_5 = BloomFilterHash::calculationBestPractices(0.2);
+
+    ASSERT_EQ(output_5.first, 4);
+    ASSERT_EQ(output_5.second, 2);
+
+    const auto & output_6 = BloomFilterHash::calculationBestPractices(0.282);
+
+    ASSERT_EQ(output_6.first, 3);
+    ASSERT_EQ(output_6.second, 2);
+}
+
+TEST(BloomFilterHash, HighProbabilities)
+{
+    const auto & output_1 = BloomFilterHash::calculationBestPractices(0.283);
+
+    ASSERT_EQ(output_1.first, 2);
+    ASSERT_EQ(output_1.second, 2);
+
+    const auto & output_2 = BloomFilterHash::calculationBestPractices(0.5);
+
+    ASSERT_EQ(output_2.first, 2);
+    ASSERT_EQ(output_2.second, 2);
+
+    const auto & output_3 = BloomFilterHash::calculationBestPractices(0.8);
+
+    ASSERT_EQ(output_3.first, 2);
+    ASSERT_EQ(output_3.second, 2);
+}

From 3fcc6031a552709652bf795f2e21d9aa4fefc7b5 Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Tue, 21 Nov 2023 20:03:24 +0100
Subject: [PATCH 0902/1097] Fix unit test

---
 src/Interpreters/tests/gtest_calculationBestPractices.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/tests/gtest_calculationBestPractices.cpp b/src/Interpreters/tests/gtest_calculationBestPractices.cpp
index 1b6a05f8d3b..9e25ed2c9bd 100644
--- a/src/Interpreters/tests/gtest_calculationBestPractices.cpp
+++ b/src/Interpreters/tests/gtest_calculationBestPractices.cpp
@@ -24,8 +24,8 @@ TEST(BloomFilterHash, ReasonableProbabilities)
 
     const auto & output_4 = BloomFilterHash::calculationBestPractices(0.1);
 
-    ASSERT_EQ(output_4.first, 6);
-    ASSERT_EQ(output_4.second, 2);
+    ASSERT_EQ(output_4.first, 5);
+    ASSERT_EQ(output_4.second, 3);
 
     const auto & output_5 = BloomFilterHash::calculationBestPractices(0.2);
 

From 7c0198a26c47513e598faccb3cf47c705d0892fb Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 22 Nov 2023 09:48:18 +0000
Subject: [PATCH 0903/1097] Fix clang-tidy warning

---
 src/Client/ConnectionPoolWithFailover.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp
index 84c4f533c56..4406114a955 100644
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@@ -168,9 +168,8 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
 
 ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings & settings)
 {
-    LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
     const size_t offset = settings.load_balancing_first_offset % nested_pools.size();
-    load_balancing = LoadBalancing(settings.load_balancing);
+    const LoadBalancing load_balancing = LoadBalancing(settings.load_balancing);
 
     return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
 }

From d25b556223486ee2a1e3f2363ea05a79c22ab795 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 18:41:05 +0100
Subject: [PATCH 0904/1097] Avoid excessive allocation in Arena

---
 src/Common/Arena.h          | 107 ++++++++++++++++++++++--------------
 src/Common/ArenaAllocator.h |   4 +-
 2 files changed, 69 insertions(+), 42 deletions(-)

diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 0cf343809e8..5870fc83af6 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -4,10 +4,8 @@
 #include <memory>
 #include <vector>
 #include <boost/noncopyable.hpp>
+#include <base/defines.h>
 #include <Core/Defines.h>
-#if __has_include(<sanitizer/asan_interface.h>) && defined(ADDRESS_SANITIZER)
-#   include <sanitizer/asan_interface.h>
-#endif
 #include <Common/memcpySmall.h>
 #include <Common/ProfileEvents.h>
 #include <Common/Allocator.h>
@@ -39,13 +37,36 @@ private:
     /// Contiguous MemoryChunk of memory and pointer to free space inside it. Member of single-linked list.
     struct alignas(16) MemoryChunk : private Allocator<false>    /// empty base optimization
     {
-        char * begin;
-        char * pos;
-        char * end; /// does not include padding.
+        char * begin = nullptr;
+        char * pos = nullptr;
+        char * end = nullptr; /// does not include padding.
 
-        MemoryChunk * prev;
+        std::unique_ptr<MemoryChunk> prev;
 
-        MemoryChunk(size_t size_, MemoryChunk * prev_)
+        MemoryChunk()
+        {
+        }
+
+        void swap(MemoryChunk & other)
+        {
+            std::swap(begin, other.begin);
+            std::swap(pos, other.pos);
+            std::swap(end, other.end);
+            prev.swap(other.prev);
+        }
+
+        MemoryChunk(MemoryChunk && other)
+        {
+            *this = std::move(other);
+        }
+
+        MemoryChunk & operator=(MemoryChunk && other)
+        {
+            swap(other);
+            return *this;
+        }
+
+        MemoryChunk(size_t size_)
         {
             ProfileEvents::increment(ProfileEvents::ArenaAllocChunks);
             ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_);
@@ -53,7 +74,6 @@ private:
             begin = reinterpret_cast<char *>(Allocator<false>::alloc(size_));
             pos = begin;
             end = begin + size_ - pad_right;
-            prev = prev_;
 
             ASAN_POISON_MEMORY_REGION(begin, size_);
         }
@@ -67,19 +87,18 @@ private:
             ASAN_UNPOISON_MEMORY_REGION(begin, size());
 
             Allocator<false>::free(begin, size());
-
-            delete prev;
         }
 
         size_t size() const { return end + pad_right - begin; }
         size_t remaining() const { return end - pos; }
     };
 
+    size_t initial_size;
     size_t growth_factor;
     size_t linear_growth_threshold;
 
     /// Last contiguous MemoryChunk of memory.
-    MemoryChunk * head;
+    MemoryChunk head;
     size_t allocated_bytes;
     size_t used_bytes;
     size_t page_size;
@@ -95,9 +114,13 @@ private:
     {
         size_t size_after_grow = 0;
 
-        if (head->size() < linear_growth_threshold)
+        if (head.size() == 0)
         {
-            size_after_grow = std::max(min_next_size, head->size() * growth_factor);
+            size_after_grow = initial_size;
+        }
+        else if (head.size() < linear_growth_threshold)
+        {
+            size_after_grow = std::max(min_next_size, head.size() * growth_factor);
         }
         else
         {
@@ -119,8 +142,18 @@ private:
     /// Add next contiguous MemoryChunk of memory with size not less than specified.
     void NO_INLINE addMemoryChunk(size_t min_size)
     {
-        head = new MemoryChunk(nextSize(min_size + pad_right), head);
-        allocated_bytes += head->size();
+        size_t next_size = nextSize(min_size + pad_right);
+        if (!head.begin)
+        {
+            head = MemoryChunk(next_size);
+        }
+        else
+        {
+            auto chunk = std::make_unique<MemoryChunk>(next_size);
+            head.swap(*chunk);
+            head.prev = std::move(chunk);
+            allocated_bytes += head.size();
+        }
     }
 
     friend class ArenaAllocator;
@@ -128,29 +161,24 @@ private:
 
 public:
     explicit Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, size_t linear_growth_threshold_ = 128 * 1024 * 1024)
-        : growth_factor(growth_factor_)
+        : initial_size(initial_size_)
+        , growth_factor(growth_factor_)
         , linear_growth_threshold(linear_growth_threshold_)
-        , head(new MemoryChunk(initial_size_, nullptr))
-        , allocated_bytes(head->size())
+        , allocated_bytes(head.size())
         , used_bytes(0)
         , page_size(static_cast<size_t>(::getPageSize()))
     {
     }
 
-    ~Arena()
-    {
-        delete head;
-    }
-
     /// Get piece of memory, without alignment.
     char * alloc(size_t size)
     {
         used_bytes += size;
-        if (unlikely(static_cast<std::ptrdiff_t>(size) > head->end - head->pos))
+        if (unlikely(static_cast<std::ptrdiff_t>(size) > head.end - head.pos))
             addMemoryChunk(size);
 
-        char * res = head->pos;
-        head->pos += size;
+        char * res = head.pos;
+        head.pos += size;
         ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
         return res;
     }
@@ -161,14 +189,14 @@ public:
         used_bytes += size;
         do
         {
-            void * head_pos = head->pos;
-            size_t space = head->end - head->pos;
+            void * head_pos = head.pos;
+            size_t space = head.end - head.pos;
 
             auto * res = static_cast<char *>(std::align(alignment, size, head_pos, space));
             if (res)
             {
-                head->pos = static_cast<char *>(head_pos);
-                head->pos += size;
+                head.pos = static_cast<char *>(head_pos);
+                head.pos += size;
                 ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
                 return res;
             }
@@ -191,9 +219,9 @@ public:
     void * rollback(size_t size)
     {
         used_bytes -= size;
-        head->pos -= size;
-        ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
-        return head->pos;
+        head.pos -= size;
+        ASAN_POISON_MEMORY_REGION(head.pos, size + pad_right);
+        return head.pos;
     }
 
     /** Begin or expand a contiguous range of memory.
@@ -234,10 +262,10 @@ public:
         // This method only works for extending the last allocation. For lack of
         // original size, check a weaker condition: that 'begin' is at least in
         // the current MemoryChunk.
-        assert(range_start >= head->begin);
-        assert(range_start < head->end);
+        assert(range_start >= head.begin);
+        assert(range_start < head.end);
 
-        if (head->pos + additional_bytes <= head->end)
+        if (head.pos + additional_bytes <= head.end)
         {
             // The new size fits into the last MemoryChunk, so just alloc the
             // additional size. We can alloc without alignment here, because it
@@ -254,7 +282,7 @@ public:
         // solved not by complicating this method, but by rethinking the
         // approach to memory management for aggregate function states, so that
         // we can provide a proper realloc().
-        const size_t existing_bytes = head->pos - range_start;
+        const size_t existing_bytes = head.pos - range_start;
         const size_t new_bytes = existing_bytes + additional_bytes;
         const char * old_range = range_start;
 
@@ -317,12 +345,11 @@ public:
     /// yourself having to use this method, probably you're doing something wrong.
     size_t remainingSpaceInCurrentMemoryChunk() const
     {
-        return head->remaining();
+        return head.remaining();
     }
 };
 
 using ArenaPtr = std::shared_ptr<Arena>;
 using Arenas = std::vector<ArenaPtr>;
 
-
 }
diff --git a/src/Common/ArenaAllocator.h b/src/Common/ArenaAllocator.h
index 950e1f85999..36d1772daea 100644
--- a/src/Common/ArenaAllocator.h
+++ b/src/Common/ArenaAllocator.h
@@ -20,7 +20,7 @@ public:
         char const * data = reinterpret_cast<char *>(buf);
 
         // Invariant should be maintained: new_size > old_size
-        if (data + old_size == arena->head->pos)
+        if (data + old_size == arena->head.pos)
         {
             // Consecutive optimization
             arena->allocContinue(new_size - old_size, data);
@@ -59,7 +59,7 @@ public:
     {
         char const * data = reinterpret_cast<char *>(buf);
 
-        if (data + old_size == arena->head->pos)
+        if (data + old_size == arena->head.pos)
         {
             arena->allocContinue(new_size - old_size, data, alignment);
             return reinterpret_cast<void *>(const_cast<char *>(data));

From a3c4c84c8745f87ecd907c8419a9299fe06d2db6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 20:29:40 +0100
Subject: [PATCH 0905/1097] Fix error

---
 src/Common/Arena.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 5870fc83af6..cd8f6546828 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -152,8 +152,8 @@ private:
             auto chunk = std::make_unique<MemoryChunk>(next_size);
             head.swap(*chunk);
             head.prev = std::move(chunk);
-            allocated_bytes += head.size();
         }
+        allocated_bytes += head.size();
     }
 
     friend class ArenaAllocator;

From c72b256c630a403a8415824d220ef3d06e98b8eb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 20:34:20 +0100
Subject: [PATCH 0906/1097] Fix error

---
 src/Common/Arena.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index cd8f6546828..5164ccf8326 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -4,12 +4,15 @@
 #include <memory>
 #include <vector>
 #include <boost/noncopyable.hpp>
-#include <base/defines.h>
 #include <Core/Defines.h>
 #include <Common/memcpySmall.h>
 #include <Common/ProfileEvents.h>
 #include <Common/Allocator.h>
 
+#if __has_include(<sanitizer/asan_interface.h>) && defined(ADDRESS_SANITIZER)
+#   include <sanitizer/asan_interface.h>
+#endif
+
 
 namespace ProfileEvents
 {

From bba3e2a18917700c56231a69ca8dae7b805e7af6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:32:47 +0100
Subject: [PATCH 0907/1097] Faster build time

---
 src/Columns/ColumnString.h   |   3 +-
 src/Columns/IColumnDummy.cpp | 137 +++++++++++++++++++++++++++++++++++
 src/Columns/IColumnDummy.h   | 135 ++++++----------------------------
 src/Common/Arena.h           |  14 ++--
 4 files changed, 171 insertions(+), 118 deletions(-)
 create mode 100644 src/Columns/IColumnDummy.cpp

diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index e8e5ebbcbf9..345513248fd 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -11,7 +11,6 @@
 #include <Common/memcmpSmall.h>
 #include <Common/assert_cast.h>
 #include <Core/Field.h>
-#include <Common/Arena.h>
 
 
 class Collator;
@@ -20,6 +19,8 @@ class Collator;
 namespace DB
 {
 
+class Arena;
+
 /** Column for String values.
   */
 class ColumnString final : public COWHelper<IColumn, ColumnString>
diff --git a/src/Columns/IColumnDummy.cpp b/src/Columns/IColumnDummy.cpp
new file mode 100644
index 00000000000..e2e4b29e0e1
--- /dev/null
+++ b/src/Columns/IColumnDummy.cpp
@@ -0,0 +1,137 @@
+#include <Common/Arena.h>
+#include <Columns/IColumnDummy.h>
+#include <Columns/ColumnsCommon.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
+    extern const int NOT_IMPLEMENTED;
+}
+
+
+Field IColumnDummy::operator[](size_t) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); 
+}
+
+void IColumnDummy::get(size_t, Field &) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName());
+}
+
+void IColumnDummy::insert(const Field &)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert element into {}", getName());
+}
+
+bool IColumnDummy::isDefaultAt(size_t) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "isDefaultAt is not implemented for {}", getName());
+}
+
+StringRef IColumnDummy::serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const
+{
+    /// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
+    char * res = arena.allocContinue(1, begin);
+    *res = 0;
+    return { res, 1 };
+}
+
+const char * IColumnDummy::deserializeAndInsertFromArena(const char * pos)
+{
+    ++s;
+    return pos + 1;
+}
+
+const char * IColumnDummy::skipSerializedInArena(const char * pos) const
+{
+    return pos;
+}
+
+ColumnPtr IColumnDummy::filter(const Filter & filt, ssize_t /*result_size_hint*/) const
+{
+    size_t bytes = countBytesInFilter(filt);
+    return cloneDummy(bytes);
+}
+
+void IColumnDummy::expand(const IColumn::Filter & mask, bool inverted)
+{
+    size_t bytes = countBytesInFilter(mask);
+    if (inverted)
+        bytes = mask.size() - bytes;
+    s = bytes;
+}
+
+ColumnPtr IColumnDummy::permute(const Permutation & perm, size_t limit) const
+{
+    if (s != perm.size())
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of permutation doesn't match size of column.");
+
+    return cloneDummy(limit ? std::min(s, limit) : s);
+}
+
+ColumnPtr IColumnDummy::index(const IColumn & indexes, size_t limit) const
+{
+    if (indexes.size() < limit)
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of indexes is less than required.");
+
+    return cloneDummy(limit ? limit : s);
+}
+
+void IColumnDummy::getPermutation(IColumn::PermutationSortDirection /*direction*/, IColumn::PermutationSortStability /*stability*/,
+                size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const
+{
+    res.resize(s);
+    for (size_t i = 0; i < s; ++i)
+        res[i] = i;
+}
+
+ColumnPtr IColumnDummy::replicate(const Offsets & offsets) const
+{
+    if (s != offsets.size())
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of offsets doesn't match size of column.");
+
+    return cloneDummy(offsets.back());
+}
+
+MutableColumns IColumnDummy::scatter(ColumnIndex num_columns, const Selector & selector) const
+{
+    if (s != selector.size())
+        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of selector doesn't match size of column.");
+
+    std::vector<size_t> counts(num_columns);
+    for (auto idx : selector)
+        ++counts[idx];
+
+    MutableColumns res(num_columns);
+    for (size_t i = 0; i < num_columns; ++i)
+        res[i] = cloneResized(counts[i]);
+
+    return res;
+}
+
+double IColumnDummy::getRatioOfDefaultRows(double) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
+}
+
+UInt64 IColumnDummy::getNumberOfDefaultRows() const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
+}
+
+void IColumnDummy::getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
+}
+
+void IColumnDummy::gather(ColumnGathererStream &)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method gather is not supported for {}", getName());
+}
+
+}
diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h
index 4cadae2bc3d..6715fdb5e1d 100644
--- a/src/Columns/IColumnDummy.h
+++ b/src/Columns/IColumnDummy.h
@@ -1,21 +1,12 @@
 #pragma once
 
-#include <Common/Arena.h>
-#include <Common/PODArray.h>
 #include <Columns/IColumn.h>
-#include <Columns/ColumnsCommon.h>
-#include <Core/Field.h>
 
 
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
-    extern const int NOT_IMPLEMENTED;
-}
-
+class Arena;
 
 /** Base class for columns-constants that contain a value that is not in the `Field`.
   * Not a full-fledged column and is used in a special way.
@@ -42,10 +33,10 @@ public:
 
     bool hasEqualValues() const override { return true; }
 
-    Field operator[](size_t) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); }
-    void get(size_t, Field &) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); }
-    void insert(const Field &) override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert element into {}", getName()); }
-    bool isDefaultAt(size_t) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "isDefaultAt is not implemented for {}", getName()); }
+    Field operator[](size_t) const override;
+    void get(size_t, Field &) const override;
+    void insert(const Field &) override;
+    bool isDefaultAt(size_t) const override;
 
     StringRef getDataAt(size_t) const override
     {
@@ -57,24 +48,9 @@ public:
         ++s;
     }
 
-    StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const override
-    {
-        /// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
-        char * res = arena.allocContinue(1, begin);
-        *res = 0;
-        return { res, 1 };
-    }
-
-    const char * deserializeAndInsertFromArena(const char * pos) override
-    {
-        ++s;
-        return pos + 1;
-    }
-
-    const char * skipSerializedInArena(const char * pos) const override
-    {
-        return pos;
-    }
+    StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const override;
+    const char * deserializeAndInsertFromArena(const char * pos) override;
+    const char * skipSerializedInArena(const char * pos) const override;
 
     void updateHashWithValue(size_t /*n*/, SipHash & /*hash*/) const override
     {
@@ -98,95 +74,32 @@ public:
         s += length;
     }
 
-    ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/) const override
-    {
-        size_t bytes = countBytesInFilter(filt);
-        return cloneDummy(bytes);
-    }
+    ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/) const override;
 
-    void expand(const IColumn::Filter & mask, bool inverted) override
-    {
-        size_t bytes = countBytesInFilter(mask);
-        if (inverted)
-            bytes = mask.size() - bytes;
-        s = bytes;
-    }
+    void expand(const IColumn::Filter & mask, bool inverted) override;
 
-    ColumnPtr permute(const Permutation & perm, size_t limit) const override
-    {
-        if (s != perm.size())
-            throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of permutation doesn't match size of column.");
+    ColumnPtr permute(const Permutation & perm, size_t limit) const override;
 
-        return cloneDummy(limit ? std::min(s, limit) : s);
-    }
-
-    ColumnPtr index(const IColumn & indexes, size_t limit) const override
-    {
-        if (indexes.size() < limit)
-            throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of indexes is less than required.");
-
-        return cloneDummy(limit ? limit : s);
-    }
+    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
 
     void getPermutation(IColumn::PermutationSortDirection /*direction*/, IColumn::PermutationSortStability /*stability*/,
-                    size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const override
-    {
-        res.resize(s);
-        for (size_t i = 0; i < s; ++i)
-            res[i] = i;
-    }
+                    size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const override;
 
     void updatePermutation(IColumn::PermutationSortDirection /*direction*/, IColumn::PermutationSortStability /*stability*/,
-                    size_t, int, Permutation &, EqualRanges&) const override {}
-
-    ColumnPtr replicate(const Offsets & offsets) const override
-    {
-        if (s != offsets.size())
-            throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of offsets doesn't match size of column.");
-
-        return cloneDummy(offsets.back());
-    }
-
-    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
-    {
-        if (s != selector.size())
-            throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of selector doesn't match size of column.");
-
-        std::vector<size_t> counts(num_columns);
-        for (auto idx : selector)
-            ++counts[idx];
-
-        MutableColumns res(num_columns);
-        for (size_t i = 0; i < num_columns; ++i)
-            res[i] = cloneResized(counts[i]);
-
-        return res;
-    }
-
-    double getRatioOfDefaultRows(double) const override
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
-    }
-
-    UInt64 getNumberOfDefaultRows() const override
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
-    }
-
-    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
-    }
-
-    void gather(ColumnGathererStream &) override
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method gather is not supported for {}", getName());
-    }
-
-    void getExtremes(Field &, Field &) const override
+                    size_t, int, Permutation &, EqualRanges&) const override
     {
     }
 
+    ColumnPtr replicate(const Offsets & offsets) const override;
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
+
+    double getRatioOfDefaultRows(double) const override;
+    UInt64 getNumberOfDefaultRows() const override;
+    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override;
+    void gather(ColumnGathererStream &) override;
+    void getExtremes(Field &, Field &) const override;
+
     void addSize(size_t delta)
     {
         s += delta;
diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 5164ccf8326..89b2a0b7160 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -83,6 +83,9 @@ private:
 
         ~MemoryChunk()
         {
+            if (empty())
+                return;
+
             /// We must unpoison the memory before returning to the allocator,
             /// because the allocator might not have asan integration, and the
             /// memory would stay poisoned forever. If the allocator supports
@@ -92,6 +95,7 @@ private:
             Allocator<false>::free(begin, size());
         }
 
+        bool empty() const { return begin == end;}
         size_t size() const { return end + pad_right - begin; }
         size_t remaining() const { return end - pos; }
     };
@@ -102,8 +106,8 @@ private:
 
     /// Last contiguous MemoryChunk of memory.
     MemoryChunk head;
-    size_t allocated_bytes;
-    size_t used_bytes;
+    size_t allocated_bytes = 0;
+    size_t used_bytes = 0;
     size_t page_size;
 
     static size_t roundUpToPageSize(size_t s, size_t page_size)
@@ -117,7 +121,7 @@ private:
     {
         size_t size_after_grow = 0;
 
-        if (head.size() == 0)
+        if (head.empty())
         {
             size_after_grow = initial_size;
         }
@@ -146,7 +150,7 @@ private:
     void NO_INLINE addMemoryChunk(size_t min_size)
     {
         size_t next_size = nextSize(min_size + pad_right);
-        if (!head.begin)
+        if (head.empty())
         {
             head = MemoryChunk(next_size);
         }
@@ -167,8 +171,6 @@ public:
         : initial_size(initial_size_)
         , growth_factor(growth_factor_)
         , linear_growth_threshold(linear_growth_threshold_)
-        , allocated_bytes(head.size())
-        , used_bytes(0)
         , page_size(static_cast<size_t>(::getPageSize()))
     {
     }

From aa41bc49bf639563de87e172fca3000e36ec09fd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:38:06 +0100
Subject: [PATCH 0908/1097] Faster build time

---
 src/Columns/IColumnDummy.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h
index 6715fdb5e1d..27876e28c7f 100644
--- a/src/Columns/IColumnDummy.h
+++ b/src/Columns/IColumnDummy.h
@@ -98,7 +98,10 @@ public:
     UInt64 getNumberOfDefaultRows() const override;
     void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override;
     void gather(ColumnGathererStream &) override;
-    void getExtremes(Field &, Field &) const override;
+
+    void getExtremes(Field &, Field &) const override
+    {
+    }
 
     void addSize(size_t delta)
     {

From 84f669c113e15f1769995738ee9de995a53ffc8b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:52:40 +0100
Subject: [PATCH 0909/1097] Fix error

---
 src/Interpreters/Aggregator.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index e2ddfbe3418..c3cfd0647be 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -11,11 +11,9 @@
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeLowCardinality.h>
-#include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnSparse.h>
 #include <Formats/NativeWriter.h>
-#include <IO/WriteBufferFromFile.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <Interpreters/Aggregator.h>
 #include <AggregateFunctions/Combinators/AggregateFunctionArray.h>
@@ -23,7 +21,6 @@
 #include <IO/Operators.h>
 #include <Interpreters/JIT/compileFunction.h>
 #include <Interpreters/JIT/CompiledExpressionCache.h>
-#include <Core/ProtocolDefines.h>
 #include <Disks/TemporaryFileOnDisk.h>
 #include <Interpreters/TemporaryDataOnDisk.h>
 #include <Common/Stopwatch.h>
@@ -37,13 +34,13 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Common/JSONBuilder.h>
-#include <Common/filesystemHelpers.h>
 #include <Common/scope_guard_safe.h>
 
 #include <Parsers/ASTSelectQuery.h>
 
 #include <Interpreters/AggregationUtils.h>
 
+
 namespace ProfileEvents
 {
     extern const Event ExternalAggregationWritePart;
@@ -1123,7 +1120,9 @@ void NO_INLINE Aggregator::executeImplBatch(
             return;
 
         /// For all rows.
-        AggregateDataPtr place = aggregates_pool->alloc(0);
+
+        /// This pointer is unused, but the logic will compare it for nullptr to check if the cell is set.
+        AggregateDataPtr place = reinterpret_cast<AggregateDataPtr>(0x1);
         if (all_keys_are_const)
         {
             state.emplaceKey(method.data, 0, *aggregates_pool).setMapped(place);

From dd54c7e22419530c7f646dd63cdff9c0a00eefb2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:53:14 +0100
Subject: [PATCH 0910/1097] Fix error

---
 src/Common/Arena.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 89b2a0b7160..6441a6f1cc8 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -176,10 +176,11 @@ public:
     }
 
     /// Get piece of memory, without alignment.
+    /// Note: we expect it will return a non-nullptr even if the size is zero.
     char * alloc(size_t size)
     {
         used_bytes += size;
-        if (unlikely(static_cast<std::ptrdiff_t>(size) > head.end - head.pos))
+        if (unlikely(head.empty() || static_cast<std::ptrdiff_t>(size) > head.end - head.pos))
             addMemoryChunk(size);
 
         char * res = head.pos;

From 9cf7ebeced542eb4d6cb24792a31824fae91ca47 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 21 Nov 2023 23:57:06 +0100
Subject: [PATCH 0911/1097] Minor changes

---
 src/Storages/PartitionedSink.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp
index 71c1dd7ab69..69940990569 100644
--- a/src/Storages/PartitionedSink.cpp
+++ b/src/Storages/PartitionedSink.cpp
@@ -5,11 +5,8 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
-#include <Interpreters/evaluateConstantExpression.h>
 
 #include <Parsers/ASTFunction.h>
-#include <Parsers/ASTInsertQuery.h>
-#include <Parsers/ASTLiteral.h>
 
 #include <Processors/ISource.h>
 

From 77fc95ab67fe8aa9558282f3d4e694280c8d0a12 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 22 Nov 2023 00:08:00 +0100
Subject: [PATCH 0912/1097] Minor changes

---
 src/Columns/IColumnDummy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Columns/IColumnDummy.cpp b/src/Columns/IColumnDummy.cpp
index e2e4b29e0e1..42b66e1156c 100644
--- a/src/Columns/IColumnDummy.cpp
+++ b/src/Columns/IColumnDummy.cpp
@@ -15,7 +15,7 @@ namespace ErrorCodes
 
 Field IColumnDummy::operator[](size_t) const
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); 
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName());
 }
 
 void IColumnDummy::get(size_t, Field &) const

From 132c2e7a46dc5dca01f36557a5b8ba266041d6f3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 22 Nov 2023 01:23:24 +0100
Subject: [PATCH 0913/1097] Fix test

---
 .../02741_hashed_dictionary_load_factor.reference         | 8 ++++----
 .../02741_hashed_dictionary_load_factor.sql.j2            | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference
index abe891cbb9b..81f1bdda20c 100644
--- a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference
+++ b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference
@@ -1,4 +1,4 @@
-test_dictionary_hashed	1000000	0.4768	33558760
-test_dictionary_hashed_load_factor	1000000	0.9537	16781544
-test_dictionary_sparse_hashed	1000000	0.4768	20975848
-test_dictionary_sparse_hashed_load_factor	1000000	0.9537	10490088
+test_dictionary_hashed	1000000	0.4768	34000000
+test_dictionary_hashed_load_factor	1000000	0.9537	17000000
+test_dictionary_sparse_hashed	1000000	0.4768	21000000
+test_dictionary_sparse_hashed_load_factor	1000000	0.9537	10000000
diff --git a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2 b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2
index 870acd54514..41d68216412 100644
--- a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2
+++ b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2
@@ -31,7 +31,7 @@ LIFETIME(0);
 
 SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}};
 SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}}_load_factor;
-SELECT name, element_count, round(load_factor, 4), bytes_allocated FROM system.dictionaries WHERE database = currentDatabase() ORDER BY name;
+SELECT name, element_count, round(load_factor, 4), round(bytes_allocated, -6) FROM system.dictionaries WHERE database = currentDatabase() ORDER BY name;
 
 DROP DICTIONARY IF EXISTS test_dictionary_{{layout}};
 DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}_load_factor;

From 73c5bf197622565804decb61c36747d2973c2893 Mon Sep 17 00:00:00 2001
From: konruvikt <114674272+konruvikt@users.noreply.github.com>
Date: Wed, 22 Nov 2023 15:28:57 +0300
Subject: [PATCH 0914/1097] Update numbers.md

---
 docs/ru/sql-reference/table-functions/numbers.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/table-functions/numbers.md b/docs/ru/sql-reference/table-functions/numbers.md
index 5a6edc0e988..f7e52793a3c 100644
--- a/docs/ru/sql-reference/table-functions/numbers.md
+++ b/docs/ru/sql-reference/table-functions/numbers.md
@@ -7,7 +7,7 @@ sidebar_label: numbers
 # numbers {#numbers}
 
 `numbers(N)` - возвращает таблицу с единственным столбцом `number` (UInt64), содержащим натуральные числа от `0` до `N-1`.
-`numbers(N, M)` - возвращает таблицу с единственным столбцом `number` (UInt64), содержащим натуральные числа от `N` to `(N + M - 1)`.
+`numbers(N, M)` - возвращает таблицу с единственным столбцом `number` (UInt64), содержащим натуральные числа от `N` до `(N + M - 1)`.
 
 Так же как и таблица `system.numbers` может использоваться для тестов и генерации последовательных значений. Функция `numbers(N, M)` работает более эффективно, чем выборка из `system.numbers`.
 

From adf4ade00aa18540bfea7f0076d71501aa924b2f Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Wed, 22 Nov 2023 13:38:15 +0100
Subject: [PATCH 0915/1097] Update tests/integration/test_merge_tree_s3/test.py

---
 tests/integration/test_merge_tree_s3/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index 6a9aa1738d6..1a34a2c3566 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -885,7 +885,7 @@ def test_s3_engine_heavy_write_check_mem(
         "INSERT INTO s3_test SELECT number, toString(number) FROM numbers(50000000)"
         f" SETTINGS "
         f" max_memory_usage={2*memory}"
-        f", max_threads=1" # ParallelFormattingOutputFormat consumption depends on it
+        f", max_threads=1"  # ParallelFormattingOutputFormat consumption depends on it
         f", s3_max_inflight_parts_for_one_file={in_flight}",
         query_id=query_id,
     )

From 6f652133a83835c9d3fafa072e80fd315cd016e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Wed, 15 Nov 2023 13:21:59 +0000
Subject: [PATCH 0916/1097] Install well-known protobuf types

---
 contrib/google-protobuf-cmake/CMakeLists.txt | 16 ++++++++++++++++
 packages/clickhouse-common-static.yaml       |  2 ++
 programs/CMakeLists.txt                      |  8 ++++++++
 3 files changed, 26 insertions(+)

diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt
index 727121e60b5..dda6dfe85e4 100644
--- a/contrib/google-protobuf-cmake/CMakeLists.txt
+++ b/contrib/google-protobuf-cmake/CMakeLists.txt
@@ -385,9 +385,25 @@ endif ()
 
 include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
 
+# These files needs to be installed to make it possible that users can use well-known protobuf types
+set(google_proto_files
+  ${protobuf_source_dir}/src/google/protobuf/any.proto
+  ${protobuf_source_dir}/src/google/protobuf/api.proto
+  ${protobuf_source_dir}/src/google/protobuf/descriptor.proto
+  ${protobuf_source_dir}/src/google/protobuf/duration.proto
+  ${protobuf_source_dir}/src/google/protobuf/empty.proto
+  ${protobuf_source_dir}/src/google/protobuf/field_mask.proto
+  ${protobuf_source_dir}/src/google/protobuf/source_context.proto
+  ${protobuf_source_dir}/src/google/protobuf/struct.proto
+  ${protobuf_source_dir}/src/google/protobuf/timestamp.proto
+  ${protobuf_source_dir}/src/google/protobuf/type.proto
+  ${protobuf_source_dir}/src/google/protobuf/wrappers.proto
+)
+
 add_library(_protobuf INTERFACE)
 target_link_libraries(_protobuf INTERFACE _libprotobuf)
 target_include_directories(_protobuf INTERFACE "${Protobuf_INCLUDE_DIR}")
+set_target_properties(_protobuf PROPERTIES google_proto_files "${google_proto_files}")
 add_library(ch_contrib::protobuf ALIAS _protobuf)
 
 add_library(_protoc INTERFACE)
diff --git a/packages/clickhouse-common-static.yaml b/packages/clickhouse-common-static.yaml
index 95532726d94..238126f95fd 100644
--- a/packages/clickhouse-common-static.yaml
+++ b/packages/clickhouse-common-static.yaml
@@ -44,6 +44,8 @@ contents:
   dst: /usr/bin/clickhouse-odbc-bridge
 - src: root/usr/share/bash-completion/completions
   dst: /usr/share/bash-completion/completions
+- src: root/usr/share/clickhouse
+  dst: /usr/share/clickhouse
 # docs
 - src: ../AUTHORS
   dst: /usr/share/doc/clickhouse-common-static/AUTHORS
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index f17aff65fb5..58595c8aad8 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -457,3 +457,11 @@ endif()
 if (ENABLE_FUZZING)
     add_compile_definitions(FUZZING_MODE=1)
 endif ()
+
+if (TARGET ch_contrib::protobuf)
+    get_property(google_proto_files TARGET ch_contrib::protobuf PROPERTY google_proto_files)
+    message(WARNING "PROTO FILES ${google_proto_files}")
+    foreach (proto_file IN LISTS google_proto_files)
+        install(FILES ${proto_file} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/proto/google/protobuf)
+    endforeach()
+endif ()

From ab935e3dd71c5e1f02993c7be0388d0a13c13455 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Wed, 15 Nov 2023 13:55:43 +0000
Subject: [PATCH 0917/1097] Use the google proto files when importing protobuf
 schemas

---
 programs/client/Client.cpp                    |  4 ++
 programs/server/Server.cpp                    |  4 ++
 programs/server/config.d/path.xml             |  1 +
 programs/server/config.xml                    |  4 ++
 src/Formats/FormatFactory.cpp                 |  1 +
 src/Formats/FormatSettings.h                  |  1 +
 src/Formats/ProtobufSchemas.cpp               | 15 ++++--
 src/Formats/ProtobufSchemas.h                 |  3 +-
 src/Interpreters/Context.cpp                  | 11 ++++
 src/Interpreters/Context.h                    |  4 ++
 .../Formats/Impl/ProtobufListInputFormat.cpp  | 50 ++++++++++---------
 .../Formats/Impl/ProtobufListInputFormat.h    |  4 +-
 .../Formats/Impl/ProtobufListOutputFormat.cpp | 17 ++++---
 .../Formats/Impl/ProtobufListOutputFormat.h   |  3 +-
 .../Formats/Impl/ProtobufRowInputFormat.cpp   | 46 ++++++++++-------
 .../Formats/Impl/ProtobufRowInputFormat.h     |  4 +-
 .../Formats/Impl/ProtobufRowOutputFormat.cpp  |  3 +-
 17 files changed, 115 insertions(+), 60 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 7f48a9987c7..cc39fadb39e 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -306,6 +306,10 @@ void Client::initialize(Poco::Util::Application & self)
     /// Set path for format schema files
     if (config().has("format_schema_path"))
         global_context->setFormatSchemaPath(fs::weakly_canonical(config().getString("format_schema_path")));
+
+    /// Set the path for google proto files
+    if (config().has("google_protos_path"))
+        global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
 }
 
 
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index efb5ccb3203..d28629960df 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1573,6 +1573,10 @@ try
     global_context->setFormatSchemaPath(format_schema_path);
     fs::create_directories(format_schema_path);
 
+    /// Set the path for google proto files
+    if (config().has("google_protos_path"))
+        global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
+
     /// Set path for filesystem caches
     fs::path filesystem_caches_path(config().getString("filesystem_caches_path", ""));
     if (!filesystem_caches_path.empty())
diff --git a/programs/server/config.d/path.xml b/programs/server/config.d/path.xml
index 46af5bfb64b..c5c12af532f 100644
--- a/programs/server/config.d/path.xml
+++ b/programs/server/config.d/path.xml
@@ -3,6 +3,7 @@
     <tmp_path replace="replace">./tmp/</tmp_path>
     <user_files_path replace="replace">./user_files/</user_files_path>
     <format_schema_path replace="replace">./format_schemas/</format_schema_path>
+    <google_protos_path replace="replace">./google_protos/</google_protos_path>
     <access_control_path replace="replace">./access/</access_control_path>
     <top_level_domains_path replace="replace">./top_level_domains/</top_level_domains_path>
 </clickhouse>
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 7003111c193..11936540824 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1428,6 +1428,10 @@
       -->
     <format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
 
+    <!-- Directory containing the proto files for the well-known Protobuf types.
+      -->
+    <google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
+
     <!-- Default query masking rules, matching lines would be replaced with something else in the logs
         (both text logs and system.query_log).
         name - name for the rule (optional)
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 7fb355b6c43..3c397fb914d 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -151,6 +151,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers;
     format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference;
     format_settings.protobuf.use_autogenerated_schema = settings.format_protobuf_use_autogenerated_schema;
+    format_settings.protobuf.google_protos_path = context->getGoogleProtosPath();
     format_settings.regexp.escaping_rule = settings.format_regexp_escaping_rule;
     format_settings.regexp.regexp = settings.format_regexp;
     format_settings.regexp.skip_unmatched = settings.format_regexp_skip_unmatched;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 9f99a47d4d5..d9e3a420502 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -295,6 +295,7 @@ struct FormatSettings
         bool allow_multiple_rows_without_delimiter = false;
         bool skip_fields_with_unsupported_types_in_schema_inference = false;
         bool use_autogenerated_schema = true;
+        std::string google_protos_path;
     } protobuf;
 
     struct
diff --git a/src/Formats/ProtobufSchemas.cpp b/src/Formats/ProtobufSchemas.cpp
index 5d1144e76ea..1e355d0484f 100644
--- a/src/Formats/ProtobufSchemas.cpp
+++ b/src/Formats/ProtobufSchemas.cpp
@@ -30,11 +30,11 @@ void ProtobufSchemas::clear()
 class ProtobufSchemas::ImporterWithSourceTree : public google::protobuf::compiler::MultiFileErrorCollector
 {
 public:
-    explicit ImporterWithSourceTree(const String & schema_directory, WithEnvelope with_envelope_)
-        : importer(&disk_source_tree, this)
-        , with_envelope(with_envelope_)
+    explicit ImporterWithSourceTree(const String & schema_directory, const String & google_protos_path, WithEnvelope with_envelope_)
+        : importer(&disk_source_tree, this), with_envelope(with_envelope_)
     {
         disk_source_tree.MapPath("", schema_directory);
+        disk_source_tree.MapPath("", google_protos_path);
     }
 
     ~ImporterWithSourceTree() override = default;
@@ -112,12 +112,17 @@ private:
 };
 
 
-const google::protobuf::Descriptor * ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope)
+const google::protobuf::Descriptor *
+ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path)
 {
     std::lock_guard lock(mutex);
     auto it = importers.find(info.schemaDirectory());
     if (it == importers.end())
-        it = importers.emplace(info.schemaDirectory(), std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), with_envelope)).first;
+        it = importers
+                 .emplace(
+                     info.schemaDirectory(),
+                     std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
+                 .first;
     auto * importer = it->second.get();
     return importer->import(info.schemaPath(), info.messageName());
 }
diff --git a/src/Formats/ProtobufSchemas.h b/src/Formats/ProtobufSchemas.h
index 966dffbd6b5..48683cbdd1b 100644
--- a/src/Formats/ProtobufSchemas.h
+++ b/src/Formats/ProtobufSchemas.h
@@ -59,7 +59,8 @@ public:
 
     /// Parses the format schema, then parses the corresponding proto file, and returns the descriptor of the message type.
     /// The function never returns nullptr, it throws an exception if it cannot load or parse the file.
-    const google::protobuf::Descriptor * getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope);
+    const google::protobuf::Descriptor *
+    getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path);
 
 private:
     class ImporterWithSourceTree;
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 185f9782da5..eed5a0d4063 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -325,6 +325,7 @@ struct ContextSharedPart : boost::noncopyable
     std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
     /// No lock required for format_schema_path modified only during initialization
     String format_schema_path;                              /// Path to a directory that contains schema files used by input formats.
+    String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types.
     mutable OnceFlag action_locks_manager_initialized;
     ActionLocksManagerPtr action_locks_manager;             /// Set of storages' action lockers
     OnceFlag system_logs_initialized;
@@ -4116,6 +4117,16 @@ void Context::setFormatSchemaPath(const String & path)
     shared->format_schema_path = path;
 }
 
+String Context::getGoogleProtosPath() const
+{
+    return shared->google_protos_path;
+}
+
+void Context::setGoogleProtosPath(const String & path)
+{
+    shared->google_protos_path = path;
+}
+
 Context::SampleBlockCache & Context::getSampleBlockCache() const
 {
     assert(hasQueryContext());
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 7ae567548dd..555a539527c 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1140,6 +1140,10 @@ public:
     String getFormatSchemaPath() const;
     void setFormatSchemaPath(const String & path);
 
+    /// Path to the folder containing the proto files for the well-known Protobuf types
+    String getGoogleProtosPath() const;
+    void setGoogleProtosPath(const String & path);
+
     SampleBlockCache & getSampleBlockCache() const;
 
     /// Query parameters for prepared statements.
diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp
index 62d70689ddf..220a24b3c8c 100644
--- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp
@@ -15,18 +15,20 @@ ProtobufListInputFormat::ProtobufListInputFormat(
     const Block & header_,
     const Params & params_,
     const ProtobufSchemaInfo & schema_info_,
-    bool flatten_google_wrappers_)
+    bool flatten_google_wrappers_,
+    const String & google_protos_path)
     : IRowInputFormat(header_, in_, params_)
     , reader(std::make_unique<ProtobufReader>(in_))
     , serializer(ProtobufSerializer::create(
-        header_.getNames(),
-        header_.getDataTypes(),
-        missing_column_indices,
-        *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes),
-        /* with_length_delimiter = */ true,
-        /* with_envelope = */ true,
-        flatten_google_wrappers_,
-         *reader))
+          header_.getNames(),
+          header_.getDataTypes(),
+          missing_column_indices,
+          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+              schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes, google_protos_path),
+          /* with_length_delimiter = */ true,
+          /* with_envelope = */ true,
+          flatten_google_wrappers_,
+          *reader))
 {
 }
 
@@ -83,33 +85,33 @@ size_t ProtobufListInputFormat::countRows(size_t max_block_size)
 
 ProtobufListSchemaReader::ProtobufListSchemaReader(const FormatSettings & format_settings)
     : schema_info(
-          format_settings.schema.format_schema,
-          "Protobuf",
-          true,
-          format_settings.schema.is_server,
-          format_settings.schema.format_schema_path)
+        format_settings.schema.format_schema, "Protobuf", true, format_settings.schema.is_server, format_settings.schema.format_schema_path)
     , skip_unsopported_fields(format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference)
+    , google_protos_path(format_settings.protobuf.google_protos_path)
 {
 }
 
 NamesAndTypesList ProtobufListSchemaReader::readSchema()
 {
-    const auto * message_descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::Yes);
+    const auto * message_descriptor
+        = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::Yes, google_protos_path);
     return protobufSchemaToCHSchema(message_descriptor, skip_unsopported_fields);
 }
 
 void registerInputFormatProtobufList(FormatFactory & factory)
 {
     factory.registerInputFormat(
-            "ProtobufList",
-            [](ReadBuffer &buf,
-                const Block & sample,
-                RowInputFormatParams params,
-                const FormatSettings & settings)
-            {
-                return std::make_shared<ProtobufListInputFormat>(buf, sample, std::move(params),
-                    ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema), settings.protobuf.input_flatten_google_wrappers);
-            });
+        "ProtobufList",
+        [](ReadBuffer & buf, const Block & sample, RowInputFormatParams params, const FormatSettings & settings)
+        {
+            return std::make_shared<ProtobufListInputFormat>(
+                buf,
+                sample,
+                std::move(params),
+                ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema),
+                settings.protobuf.input_flatten_google_wrappers,
+                settings.protobuf.google_protos_path);
+        });
     factory.markFormatSupportsSubsetOfColumns("ProtobufList");
     factory.registerAdditionalInfoForSchemaCacheGetter(
         "ProtobufList",
diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.h b/src/Processors/Formats/Impl/ProtobufListInputFormat.h
index 4d0478087b6..947696bba82 100644
--- a/src/Processors/Formats/Impl/ProtobufListInputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.h
@@ -29,7 +29,8 @@ public:
         const Block & header_,
         const Params & params_,
         const ProtobufSchemaInfo & schema_info_,
-        bool flatten_google_wrappers_);
+        bool flatten_google_wrappers_,
+        const String & google_protos_path);
 
     String getName() const override { return "ProtobufListInputFormat"; }
 
@@ -56,6 +57,7 @@ public:
 private:
     const FormatSchemaInfo schema_info;
     bool skip_unsopported_fields;
+    const String google_protos_path;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp
index ae0b9db7357..927301fb1b0 100644
--- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp
@@ -13,13 +13,15 @@ ProtobufListOutputFormat::ProtobufListOutputFormat(
     WriteBuffer & out_,
     const Block & header_,
     const ProtobufSchemaInfo & schema_info_,
-    bool defaults_for_nullable_google_wrappers_)
+    bool defaults_for_nullable_google_wrappers_,
+    const String & google_protos_path)
     : IRowOutputFormat(header_, out_)
     , writer(std::make_unique<ProtobufWriter>(out))
     , serializer(ProtobufSerializer::create(
           header_.getNames(),
           header_.getDataTypes(),
-          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes),
+          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+              schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes, google_protos_path),
           /* with_length_delimiter = */ true,
           /* with_envelope = */ true,
           defaults_for_nullable_google_wrappers_,
@@ -49,13 +51,14 @@ void registerOutputFormatProtobufList(FormatFactory & factory)
 {
     factory.registerOutputFormat(
         "ProtobufList",
-        [](WriteBuffer & buf,
-           const Block & header,
-           const FormatSettings & settings)
+        [](WriteBuffer & buf, const Block & header, const FormatSettings & settings)
         {
             return std::make_shared<ProtobufListOutputFormat>(
-                buf, header, ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema),
-                settings.protobuf.output_nullables_with_google_wrappers);
+                buf,
+                header,
+                ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema),
+                settings.protobuf.output_nullables_with_google_wrappers,
+                settings.protobuf.google_protos_path);
         });
 }
 
diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h
index e7765590d51..d82e2773e0b 100644
--- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h
@@ -27,7 +27,8 @@ public:
         WriteBuffer & out_,
         const Block & header_,
         const ProtobufSchemaInfo & schema_info_,
-        bool defaults_for_nullable_google_wrappers_);
+        bool defaults_for_nullable_google_wrappers_,
+        const String & google_protos_path);
 
     String getName() const override { return "ProtobufListOutputFormat"; }
 
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
index 3cba8004f23..7340faf785c 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
@@ -10,10 +10,17 @@
 namespace DB
 {
 
-ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_,
-    const ProtobufSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_)
+ProtobufRowInputFormat::ProtobufRowInputFormat(
+    ReadBuffer & in_,
+    const Block & header_,
+    const Params & params_,
+    const ProtobufSchemaInfo & schema_info_,
+    bool with_length_delimiter_,
+    bool flatten_google_wrappers_,
+    const String & google_protos_path)
     : IRowInputFormat(header_, in_, params_)
-    , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No))
+    , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+          schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No, google_protos_path))
     , with_length_delimiter(with_length_delimiter_)
     , flatten_google_wrappers(flatten_google_wrappers_)
 {
@@ -98,34 +105,35 @@ void registerInputFormatProtobuf(FormatFactory & factory)
 {
     for (bool with_length_delimiter : {false, true})
     {
-        factory.registerInputFormat(with_length_delimiter ? "Protobuf" : "ProtobufSingle", [with_length_delimiter](
-            ReadBuffer & buf,
-            const Block & sample,
-            IRowInputFormat::Params params,
-            const FormatSettings & settings)
-        {
-            return std::make_shared<ProtobufRowInputFormat>(buf, sample, std::move(params),
-                ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema),
-                with_length_delimiter,
-                settings.protobuf.input_flatten_google_wrappers);
-        });
+        factory.registerInputFormat(
+            with_length_delimiter ? "Protobuf" : "ProtobufSingle",
+            [with_length_delimiter](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings)
+            {
+                return std::make_shared<ProtobufRowInputFormat>(
+                    buf,
+                    sample,
+                    std::move(params),
+                    ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema),
+                    with_length_delimiter,
+                    settings.protobuf.input_flatten_google_wrappers,
+                    settings.protobuf.google_protos_path);
+            });
         factory.markFormatSupportsSubsetOfColumns(with_length_delimiter ? "Protobuf" : "ProtobufSingle");
     }
 }
 
 ProtobufSchemaReader::ProtobufSchemaReader(const FormatSettings & format_settings)
     : schema_info(
-          format_settings.schema.format_schema,
-          "Protobuf",
-          true,
-          format_settings.schema.is_server, format_settings.schema.format_schema_path)
+        format_settings.schema.format_schema, "Protobuf", true, format_settings.schema.is_server, format_settings.schema.format_schema_path)
     , skip_unsupported_fields(format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference)
+    , google_protos_path(format_settings.protobuf.google_protos_path)
 {
 }
 
 NamesAndTypesList ProtobufSchemaReader::readSchema()
 {
-    const auto * message_descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::No);
+    const auto * message_descriptor
+        = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::No, google_protos_path);
     return protobufSchemaToCHSchema(message_descriptor, skip_unsupported_fields);
 }
 
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
index 3f118227928..10ce37f9087 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
@@ -35,7 +35,8 @@ public:
         const Params & params_,
         const ProtobufSchemaInfo & schema_info_,
         bool with_length_delimiter_,
-        bool flatten_google_wrappers_);
+        bool flatten_google_wrappers_,
+        const String & google_protos_path);
 
     String getName() const override { return "ProtobufRowInputFormat"; }
 
@@ -71,6 +72,7 @@ public:
 private:
     const FormatSchemaInfo schema_info;
     bool skip_unsupported_fields;
+    String google_protos_path;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
index 7b4cc1bf0be..1deec264a56 100644
--- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
@@ -27,7 +27,8 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat(
     , serializer(ProtobufSerializer::create(
           header_.getNames(),
           header_.getDataTypes(),
-          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No),
+          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+              schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No, settings_.protobuf.google_protos_path),
           with_length_delimiter_,
           /* with_envelope = */ false,
           settings_.protobuf.output_nullables_with_google_wrappers,

From e1904cbdf1986ae06b28a20c2b2efaa7b3c984e6 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 22 Nov 2023 14:43:52 +0100
Subject: [PATCH 0918/1097] Cancel PipelineExecutor properly in case of
 exception in spawnThreads

---
 src/Processors/Executors/PipelineExecutor.cpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index 812b64ccdb8..580aaa2b259 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -300,8 +300,18 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie
             context.processing_time_ns += processing_time_watch.elapsed();
 #endif
 
-            /// Upscale if possible.
-            spawnThreads();
+            try
+            {
+                /// Upscale if possible.
+                spawnThreads();
+            }
+            catch (...)
+            {
+                /// spawnThreads can throw an exception, for example CANNOT_SCHEDULE_TASK.
+                /// We should cancel execution properly before rethrow.
+                cancel();
+                throw;
+            }
 
             /// We have executed single processor. Check if we need to yield execution.
             if (yield_flag && *yield_flag)

From 9b549fd4fcb63c60990790b6fba04d27bfea8669 Mon Sep 17 00:00:00 2001
From: Andrej Hoos <andrej.hoos@gmail.com>
Date: Tue, 21 Nov 2023 13:18:58 +0100
Subject: [PATCH 0919/1097] Optimize query uniqueness check in ProcessList

---
 src/Interpreters/ProcessList.cpp | 15 ++++++++-------
 src/Interpreters/ProcessList.h   |  5 +++++
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 8572470abc1..9b17c31f7e6 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -183,14 +183,11 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q
         }
 
         /// Check other users running query with our query_id
-        for (const auto & user_process_list : user_to_queries)
+        if (auto query_user = queries_to_user.find(client_info.current_query_id); query_user != queries_to_user.end() && query_user->second != client_info.current_user)
         {
-            if (user_process_list.first == client_info.current_user)
-                continue;
-            if (auto running_query = user_process_list.second.queries.find(client_info.current_query_id); running_query != user_process_list.second.queries.end())
-                throw Exception(ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING,
-                                "Query with id = {} is already running by user {}",
-                                client_info.current_query_id, user_process_list.first);
+            throw Exception(ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING,
+                            "Query with id = {} is already running by user {}",
+                            client_info.current_query_id, query_user->second);
         }
 
         auto user_process_list_it = user_to_queries.find(client_info.current_user);
@@ -259,6 +256,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q
         (*process_it)->setUserProcessList(&user_process_list);
 
         user_process_list.queries.emplace(client_info.current_query_id, res->getQueryStatus());
+        queries_to_user.emplace(client_info.current_query_id, client_info.current_user);
 
         /// Track memory usage for all simultaneously running queries from single user.
         user_process_list.user_memory_tracker.setOrRaiseHardLimit(settings.max_memory_usage_for_user);
@@ -313,6 +311,9 @@ ProcessListEntry::~ProcessListEntry()
         }
     }
 
+    if (auto query_user = parent.queries_to_user.find(query_id); query_user != parent.queries_to_user.end())
+        parent.queries_to_user.erase(query_user);
+
     /// Wait for the query if it is in the cancellation right now.
     parent.cancelled_cv.wait(lock.lock, [&]() { return process_list_element_ptr->is_cancelling == false; });
 
diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h
index 9af14d3e720..7b0196db981 100644
--- a/src/Interpreters/ProcessList.h
+++ b/src/Interpreters/ProcessList.h
@@ -345,6 +345,8 @@ public:
 
     /// User -> queries
     using UserToQueries = std::unordered_map<String, ProcessListForUser>;
+    /// query_id -> User
+    using QueriesToUser = std::unordered_map<String, String>;
 
     using QueryKindAmounts = std::unordered_map<IAST::QueryKind, QueryAmount>;
 
@@ -366,6 +368,9 @@ protected:
     /// Stores per-user info: queries, statistics and limits
     UserToQueries user_to_queries;
 
+    /// Stores query IDs and associated users, used for query ID uniqueness check
+    QueriesToUser queries_to_user;
+
     /// Stores info about queries grouped by their priority
     QueryPriorities priorities;
 

From f184d691155339b0bc49ff6869b5b6c79990432d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 22 Nov 2023 14:26:46 +0000
Subject: [PATCH 0920/1097] Remove equivalent_integer typedef

---
 src/Functions/fromDaysSinceYearZero.cpp | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp
index f065b3c2d20..628949570a0 100644
--- a/src/Functions/fromDaysSinceYearZero.cpp
+++ b/src/Functions/fromDaysSinceYearZero.cpp
@@ -92,15 +92,12 @@ public:
         auto & dst_data = result_column.getData();
         dst_data.resize(rows_count);
 
-        using equivalent_integer = typename std::conditional_t<sizeof(T) == 4, UInt32, UInt64>;
-
         for (size_t i = 0; i < rows_count; ++i)
         {
-            auto raw_value = src_data[i];
-            if (raw_value < 0)
-                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Expected a non-negative integer, got: {}", std::to_string(raw_value));
-            auto value = static_cast<equivalent_integer>(raw_value); /// NOLINT(bugprone-signed-char-misuse,cert-str34-c)
-            dst_data[i] = static_cast<RawReturnType>(value - ToDaysSinceYearZeroImpl::DAYS_BETWEEN_YEARS_0_AND_1970);
+            auto value = src_data[i];
+            if (value < 0)
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Expected a non-negative integer, got: {}", std::to_string(value));
+            dst_data[i] = static_cast<RawReturnType>(value) - static_cast<RawReturnType>(ToDaysSinceYearZeroImpl::DAYS_BETWEEN_YEARS_0_AND_1970);
         }
     }
 };

From 51aaee95e343f20863017fe1ee566ea73b95623a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 22 Nov 2023 14:34:13 +0000
Subject: [PATCH 0921/1097] 'x'-> 'u'(nsigned), 'y' -> 's'(igned)

---
 .../02907_fromDaysSinceYearZero.reference     |  2 +-
 .../02907_fromDaysSinceYearZero.sql           | 32 +++++++++----------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
index d6f19af007e..6f62c3f8403 100644
--- a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
+++ b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
@@ -1,5 +1,5 @@
 -- negative tests
--- const and non-const arguments
+-- UInt32 and Int32 arguments, both const and non-const
 719527	719527	2149-06-06	2149-06-06	2149-06-06	2149-06-06
 719528	719528	1970-01-01	1970-01-01	1970-01-01	1970-01-01
 719529	719529	1970-01-02	1970-01-02	1970-01-02	1970-01-02
diff --git a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
index 053cc8563c6..44fd498fce8 100644
--- a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
+++ b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
@@ -10,25 +10,25 @@ SELECT fromDaysSinceYearZero32('needs a number'); -- { serverError ILLEGAL_TYPE_
 SELECT fromDaysSinceYearZero(-3); -- { serverError ARGUMENT_OUT_OF_BOUND }
 SELECT fromDaysSinceYearZero32(-3); -- { serverError ARGUMENT_OUT_OF_BOUND }
 
-SELECT '-- const and non-const arguments';
-SELECT 719527 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y)); -- outside Date's range
-SELECT 719528 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
-SELECT 719529 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
-SELECT 785062 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
-SELECT 785063 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y));
-SELECT 785064 AS x, toInt32(x) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero(materialize(x)), fromDaysSinceYearZero(y), fromDaysSinceYearZero(materialize(y)); -- outside Date's range
+SELECT '-- UInt32 and Int32 arguments, both const and non-const';
+SELECT 719527 AS u, toInt32(u) AS s, fromDaysSinceYearZero(u), fromDaysSinceYearZero(materialize(u)), fromDaysSinceYearZero(s), fromDaysSinceYearZero(materialize(s)); -- outside Date's range
+SELECT 719528 AS u, toInt32(u) AS s, fromDaysSinceYearZero(u), fromDaysSinceYearZero(materialize(u)), fromDaysSinceYearZero(s), fromDaysSinceYearZero(materialize(s));
+SELECT 719529 AS u, toInt32(u) AS s, fromDaysSinceYearZero(u), fromDaysSinceYearZero(materialize(u)), fromDaysSinceYearZero(s), fromDaysSinceYearZero(materialize(s));
+SELECT 785062 AS u, toInt32(u) AS s, fromDaysSinceYearZero(u), fromDaysSinceYearZero(materialize(u)), fromDaysSinceYearZero(s), fromDaysSinceYearZero(materialize(s));
+SELECT 785063 AS u, toInt32(u) AS s, fromDaysSinceYearZero(u), fromDaysSinceYearZero(materialize(u)), fromDaysSinceYearZero(s), fromDaysSinceYearZero(materialize(s));
+SELECT 785064 AS u, toInt32(u) AS s, fromDaysSinceYearZero(u), fromDaysSinceYearZero(materialize(u)), fromDaysSinceYearZero(s), fromDaysSinceYearZero(materialize(s)); -- outside Date's range
 
-SELECT 693960 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y)); -- outside Date32's range
-SELECT 693961 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
-SELECT 693962 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
-SELECT 840056 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
-SELECT 840057 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y));
-SELECT 840058 AS x, toInt32(x) AS y, fromDaysSinceYearZero32(x), fromDaysSinceYearZero32(materialize(x)), fromDaysSinceYearZero32(y), fromDaysSinceYearZero32(materialize(y)); -- outside Date32's range
+SELECT 693960 AS u, toInt32(u) AS s, fromDaysSinceYearZero32(u), fromDaysSinceYearZero32(materialize(u)), fromDaysSinceYearZero32(s), fromDaysSinceYearZero32(materialize(s)); -- outside Date32's range
+SELECT 693961 AS u, toInt32(u) AS s, fromDaysSinceYearZero32(u), fromDaysSinceYearZero32(materialize(u)), fromDaysSinceYearZero32(s), fromDaysSinceYearZero32(materialize(s));
+SELECT 693962 AS u, toInt32(u) AS s, fromDaysSinceYearZero32(u), fromDaysSinceYearZero32(materialize(u)), fromDaysSinceYearZero32(s), fromDaysSinceYearZero32(materialize(s));
+SELECT 840056 AS u, toInt32(u) AS s, fromDaysSinceYearZero32(u), fromDaysSinceYearZero32(materialize(u)), fromDaysSinceYearZero32(s), fromDaysSinceYearZero32(materialize(s));
+SELECT 840057 AS u, toInt32(u) AS s, fromDaysSinceYearZero32(u), fromDaysSinceYearZero32(materialize(u)), fromDaysSinceYearZero32(s), fromDaysSinceYearZero32(materialize(s));
+SELECT 840058 AS u, toInt32(u) AS s, fromDaysSinceYearZero32(u), fromDaysSinceYearZero32(materialize(u)), fromDaysSinceYearZero32(s), fromDaysSinceYearZero32(materialize(s)); -- outside Date32's range
 
 SELECT '-- integer types != (U)Int32';
-SELECT toUInt8(255) AS x,  toInt8(127) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x), fromDaysSinceYearZero(y), fromDaysSinceYearZero32(y); -- outside Date's range for all (U)Int8-s
-SELECT toUInt16(65535) AS x, toInt16(32767) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x), fromDaysSinceYearZero(y), fromDaysSinceYearZero32(y); -- outside Date's range for all (U)Int16-s
-SELECT toUInt64(719529) AS x, toInt64(719529) AS y, fromDaysSinceYearZero(x), fromDaysSinceYearZero32(x), fromDaysSinceYearZero(y), fromDaysSinceYearZero32(y); -- something useful
+SELECT toUInt8(255) AS u,  toInt8(127) AS s, fromDaysSinceYearZero(u), fromDaysSinceYearZero32(u), fromDaysSinceYearZero(s), fromDaysSinceYearZero32(s); -- outside Date's range for all (U)Int8-s
+SELECT toUInt16(65535) AS u, toInt16(32767) AS s, fromDaysSinceYearZero(u), fromDaysSinceYearZero32(u), fromDaysSinceYearZero(s), fromDaysSinceYearZero32(s); -- outside Date's range for all (U)Int16-s
+SELECT toUInt64(719529) AS u, toInt64(719529) AS s, fromDaysSinceYearZero(u), fromDaysSinceYearZero32(u), fromDaysSinceYearZero(s), fromDaysSinceYearZero32(s); -- something useful
 
 SELECT '-- NULL handling';
 SELECT fromDaysSinceYearZero(NULL), fromDaysSinceYearZero32(NULL);

From 44a3dc1cd8d126e1b6a8b3bef92bf8aa6d77e7ed Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 22 Nov 2023 15:02:34 +0000
Subject: [PATCH 0922/1097] add new method to check if two types are exactly
 identical

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/DataTypes/DataTypeCustom.h                |  8 +++++++
 .../DataTypeCustomSimpleAggregateFunction.cpp | 15 +++++++++++++
 .../DataTypeCustomSimpleAggregateFunction.h   |  1 +
 src/DataTypes/DataTypeNested.cpp              | 15 +++++++++++++
 src/DataTypes/DataTypeNested.h                |  1 +
 src/DataTypes/IDataType.cpp                   | 13 ++++++++++++
 src/DataTypes/IDataType.h                     |  6 ++++++
 src/Storages/ColumnsDescription.cpp           | 21 +++++++++++++++++++
 src/Storages/ColumnsDescription.h             |  2 ++
 src/Storages/StorageReplicatedMergeTree.cpp   |  4 ++--
 ...eplicated_with_simple_aggregate_column.sql |  2 +-
 11 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/src/DataTypes/DataTypeCustom.h b/src/DataTypes/DataTypeCustom.h
index cf1e943d8e9..675673ee1fd 100644
--- a/src/DataTypes/DataTypeCustom.h
+++ b/src/DataTypes/DataTypeCustom.h
@@ -22,6 +22,8 @@ public:
     virtual ~IDataTypeCustomName() = default;
 
     virtual String getName() const = 0;
+
+    virtual bool identical(const IDataTypeCustomName & rhs) const = 0;
 };
 
 using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
@@ -51,6 +53,12 @@ private:
 public:
     explicit DataTypeCustomFixedName(String name_) : name(name_) {}
     String getName() const override { return name; }
+    bool identical(const IDataTypeCustomName & rhs_) const override
+    {
+        if (const auto * rhs = typeid_cast<decltype(this)>(&rhs_))
+            return name == rhs->getName();
+        return false;
+    }
 };
 
 }
diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
index 4e50be0a0cc..42e94291094 100644
--- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
@@ -167,4 +167,19 @@ void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory)
     factory.registerDataTypeCustom("SimpleAggregateFunction", create);
 }
 
+bool DataTypeCustomSimpleAggregateFunction::identical(const IDataTypeCustomName & rhs_) const
+{
+    if (const auto * rhs = typeid_cast<decltype(this)>(&rhs_))
+    {
+        if (parameters != rhs->parameters)
+            return false;
+        if (argument_types.size() != rhs->argument_types.size())
+            return false;
+        for (size_t i = 0; i < argument_types.size(); ++i)
+            if (!argument_types[i]->identical(*rhs->argument_types[i]))
+                return false;
+        return function->getName() == rhs->function->getName();
+    }
+    return false;
+}
 }
diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
index 926dfd9cc82..dd68756d82e 100644
--- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
+++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
@@ -36,6 +36,7 @@ public:
 
     AggregateFunctionPtr getFunction() const { return function; }
     String getName() const override;
+    bool identical(const IDataTypeCustomName & rhs_) const override;
     static void checkSupportedFunctions(const AggregateFunctionPtr & function);
 };
 
diff --git a/src/DataTypes/DataTypeNested.cpp b/src/DataTypes/DataTypeNested.cpp
index a4b7442393c..a916b2a5a2b 100644
--- a/src/DataTypes/DataTypeNested.cpp
+++ b/src/DataTypes/DataTypeNested.cpp
@@ -72,4 +72,19 @@ DataTypePtr createNested(const DataTypes & types, const Names & names)
     return DataTypeFactory::instance().getCustom(std::move(custom_desc));
 }
 
+bool DataTypeNestedCustomName::identical(const IDataTypeCustomName & rhs_) const
+{
+    if (const auto * rhs = typeid_cast<decltype(this)>(&rhs_))
+    {
+        if (names != rhs->names)
+            return false;
+        if (elems.size() != rhs->elems.size())
+            return false;
+        for (size_t i = 0; i < elems.size(); ++i)
+            if (!elems[i]->identical(*rhs->elems[i]))
+                return false;
+        return true;
+    }
+    return false;
+}
 }
diff --git a/src/DataTypes/DataTypeNested.h b/src/DataTypes/DataTypeNested.h
index 1ad06477a6e..5b4aa290091 100644
--- a/src/DataTypes/DataTypeNested.h
+++ b/src/DataTypes/DataTypeNested.h
@@ -19,6 +19,7 @@ public:
     }
 
     String getName() const override;
+    bool identical(const IDataTypeCustomName & rhs_) const override;
 };
 
 DataTypePtr createNested(const DataTypes & types, const Names & names);
diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
index 2a7e0f246de..ad3c1a78aed 100644
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@@ -252,4 +252,17 @@ SerializationPtr IDataType::getSerialization(const NameAndTypePair & column)
     return column.type->getDefaultSerialization();
 }
 
+bool IDataType::identical(const IDataType & rhs) const
+{
+    const auto * rhs_custom_name = rhs.getCustomName();
+    if (custom_name && rhs_custom_name)
+    {
+        return custom_name->identical(*rhs_custom_name);
+    }
+    else if (custom_name || rhs_custom_name)
+        return false;
+    else
+        return equals(rhs);
+}
+
 }
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index 7f5cd88fa2b..81524c5a697 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -177,6 +177,12 @@ public:
     /// Checks that two instances belong to the same type
     virtual bool equals(const IDataType & rhs) const = 0;
 
+    /** Checks that two types is exactly identical. Note that two types may equals but not identical.
+      * For example, `SimpleAggregateFunction(max, String)` and `SimpleAggregateFunction(anyLast, String)`
+      * This is used when creating replicated table. Column types in different replicas must be identical
+      */
+    virtual bool identical(const IDataType & rhs) const;
+
     /// Various properties on behaviour of data type.
 
     /** The data type is dependent on parameters and types with different parameters are different.
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index dafd341a9f8..5b21b8f120b 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -53,6 +53,18 @@ ColumnDescription::ColumnDescription(String name_, DataTypePtr type_)
 {
 }
 
+bool ColumnDescription::identical(const ColumnDescription & other) const
+{
+    auto ast_to_str = [](const ASTPtr & ast) { return ast ? queryToString(ast) : String{}; };
+
+    return name == other.name
+        && type->identical(*other.type)
+        && default_desc == other.default_desc
+        && comment == other.comment
+        && ast_to_str(codec) == ast_to_str(other.codec)
+        && ast_to_str(ttl) == ast_to_str(other.ttl);
+}
+
 bool ColumnDescription::operator==(const ColumnDescription & other) const
 {
     auto ast_to_str = [](const ASTPtr & ast) { return ast ? queryToString(ast) : String{}; };
@@ -877,4 +889,13 @@ Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const N
     }
 }
 
+bool ColumnsDescription::identical(const ColumnsDescription & other) const
+{
+    if (columns.size() != other.columns.size())
+        return false;
+    for (auto it1 = columns.begin(), it2 = other.columns.begin(); it1 != columns.end(); ++it1, ++it2)
+        if (!it1->identical(*it2))
+            return false;
+    return true;
+}
 }
diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h
index 06a2b476144..e982ed93b3b 100644
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@@ -89,6 +89,7 @@ struct ColumnDescription
     ColumnDescription(const ColumnDescription &) = default;
     ColumnDescription(String name_, DataTypePtr type_);
 
+    bool identical(const ColumnDescription & other) const;
     bool operator==(const ColumnDescription & other) const;
     bool operator!=(const ColumnDescription & other) const { return !(*this == other); }
 
@@ -123,6 +124,7 @@ public:
     /// NOTE Must correspond with Nested::flatten function.
     void flattenNested(); /// TODO: remove, insert already flattened Nested columns.
 
+    bool identical(const ColumnsDescription & other) const;
     bool operator==(const ColumnsDescription & other) const { return columns == other.columns; }
     bool operator!=(const ColumnsDescription & other) const { return !(*this == other); }
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 837c0c6bbbf..2b91499b567 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1230,9 +1230,9 @@ bool StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr
     const auto & old_columns = metadata_snapshot->getColumns();
 
     /// Replicated tables on different replicas must have exactly same column definitions
-    /// We cannot just compare column descriptions here because data types like SimpleAggregateFunction
+    /// We cannot compare column descriptions with `==` here because data types like SimpleAggregateFunction
     /// may have different aggregate function in 1st argument but still compatible if 2nd argument is same.
-    if (columns_from_zk.toString() == old_columns.toString())
+    if (columns_from_zk.identical(old_columns))
         return true;
 
     if (!strict_check && metadata_stat.version != 0)
diff --git a/tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.sql b/tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.sql
index 90da4725ace..84250059c58 100644
--- a/tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.sql
+++ b/tests/queries/0_stateless/02910_replicated_with_simple_aggregate_column.sql
@@ -14,4 +14,4 @@ CREATE TABLE t_r2
 )
 ENGINE = ReplicatedAggregatingMergeTree('/tables/{database}/t', 'r2')
 ORDER BY id
-SETTINGS index_granularity = 8192; -- { serverError INCOMPATIBLE_COLUMNS }
\ No newline at end of file
+SETTINGS index_granularity = 8192; -- { serverError INCOMPATIBLE_COLUMNS }

From 09496715b07ea15ab07a953637b1eb4d51969344 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 22 Nov 2023 15:04:54 +0000
Subject: [PATCH 0923/1097] Add a check for empty metadata path

---
 src/Interpreters/DatabaseCatalog.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 6d978c2839f..c82ed888d9f 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -1052,7 +1052,11 @@ String DatabaseCatalog::getPathForMetadata(const StorageID & table_id) const
     if (!database_ptr)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to get metadata path from database {}", table_id.getDatabaseName());
 
-    return database_ptr->getMetadataPath() + escapeForFileName(table_id.getTableName()) + ".sql";
+    auto metadata_path = database_ptr->getMetadataPath();
+    if (metadata_path.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty metadata path in database {}", table_id.getDatabaseName());
+
+    return metadata_path + escapeForFileName(table_id.getTableName()) + ".sql";
 }
 
 void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr table, String dropped_metadata_path, bool ignore_delay)

From a6b659cf04f18cf28d508809ed53754bf6e5c0b2 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 22 Nov 2023 15:21:27 +0000
Subject: [PATCH 0924/1097] Fix buffer overflow in Gorilla codec

---
 src/Compression/CompressionCodecGorilla.cpp | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp
index a41a3d1fe8e..d2d96f3f975 100644
--- a/src/Compression/CompressionCodecGorilla.cpp
+++ b/src/Compression/CompressionCodecGorilla.cpp
@@ -264,9 +264,10 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest,
 }
 
 template <typename T>
-void decompressDataForType(const char * source, UInt32 source_size, char * dest)
+void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     const char * const source_end = source + source_size;
+    const char * const dest_end = dest + dest_size;
 
     if (source + sizeof(UInt32) > source_end)
         return;
@@ -280,6 +281,9 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
     if (source + sizeof(T) > source_end || items_count < 1)
         return;
 
+    if (dest + items_count * sizeof(T) > dest_end)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data: corrupted input data.");
+
     prev_value = unalignedLoadLittleEndian<T>(source);
     unalignedStoreLittleEndian<T>(dest, prev_value);
 
@@ -422,22 +426,28 @@ void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 sourc
     if (static_cast<UInt32>(2 + bytes_to_skip) > source_size)
         throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header");
 
+    if (bytes_to_skip >= uncompressed_size)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header");
+
     memcpy(dest, &source[2], bytes_to_skip);
     UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
+    UInt32 uncompressed_size_left = uncompressed_size - bytes_to_skip;
     switch (bytes_size) // NOLINT(bugprone-switch-missing-default-case)
     {
     case 1:
-        decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
+        decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left);
         break;
     case 2:
-        decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
+        decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left);
         break;
     case 4:
-        decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
+        decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left);
         break;
     case 8:
-        decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
+        decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], uncompressed_size_left);
         break;
+    default:
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data. File has wrong header");
     }
 }
 

From 533655a7ddfeb4409578e2406925a77620df726c Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 22 Nov 2023 15:35:22 +0000
Subject: [PATCH 0925/1097] Add test

---
 .../0_stateless/02918_gorilla_invalid_file.reference      | 1 +
 tests/queries/0_stateless/02918_gorilla_invalid_file.sh   | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100755 tests/queries/0_stateless/02918_gorilla_invalid_file.reference
 create mode 100755 tests/queries/0_stateless/02918_gorilla_invalid_file.sh

diff --git a/tests/queries/0_stateless/02918_gorilla_invalid_file.reference b/tests/queries/0_stateless/02918_gorilla_invalid_file.reference
new file mode 100755
index 00000000000..2574a09f166
--- /dev/null
+++ b/tests/queries/0_stateless/02918_gorilla_invalid_file.reference
@@ -0,0 +1 @@
+Exc
diff --git a/tests/queries/0_stateless/02918_gorilla_invalid_file.sh b/tests/queries/0_stateless/02918_gorilla_invalid_file.sh
new file mode 100755
index 00000000000..b877e59b483
--- /dev/null
+++ b/tests/queries/0_stateless/02918_gorilla_invalid_file.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+echo -ne 'checksumchecksum\x95\xd3\x02\x00\x00\x01\x00\x00\x00\x0800\xff\xff\xff\xff\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08\x02\x03\x04\x05\x06\x07\x08' |
+    ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- 2>&1 | grep -oF 'Exc'

From 4925cb7edae0e75cfd82c2912a40e8d9c18d3fd3 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 22 Nov 2023 16:33:07 +0100
Subject: [PATCH 0926/1097] Allow HashedDictionary/FunctionsConversion as large
 TU

In case of -DOMIT_HEAVY_DEBUG_SYMBOLS=OFF they are large.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 utils/check-style/check-large-objects.sh | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh
index c598ff0e99c..c92f399849a 100755
--- a/utils/check-style/check-large-objects.sh
+++ b/utils/check-style/check-large-objects.sh
@@ -2,8 +2,21 @@
 
 # Check that there are no new translation units compiled to an object file larger than a certain size.
 
+TU_EXCLUDES=(
+    CastOverloadResolver
+    AggregateFunctionMax
+    AggregateFunctionMin
+    AggregateFunctionUniq
+    FunctionsConversion
+
+    RangeHashedDictionary
+    HashedDictionary
+
+    Aggregator
+)
+
 if find $1 -name '*.o' | xargs wc -c | grep -v total | sort -rn | awk '{ if ($1 > 50000000) print }' \
-    | grep -v -P 'CastOverloadResolver|AggregateFunctionMax|AggregateFunctionMin|RangeHashedDictionary|Aggregator|AggregateFunctionUniq'
+    | grep -v -f <(printf "%s\n" "${TU_EXCLUDES[@]}")
 then
     echo "^ It's not allowed to have so large translation units."
     exit 1

From 69efc1b05fbea6e4edcc0c63feef2c479e57738f Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 22 Nov 2023 15:53:35 +0000
Subject: [PATCH 0927/1097] Fix style

---
 tests/queries/0_stateless/02918_gorilla_invalid_file.reference | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 tests/queries/0_stateless/02918_gorilla_invalid_file.reference

diff --git a/tests/queries/0_stateless/02918_gorilla_invalid_file.reference b/tests/queries/0_stateless/02918_gorilla_invalid_file.reference
old mode 100755
new mode 100644

From 21d3ebcf356c898dd86f324a3381daab39a3a221 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 22 Nov 2023 17:19:59 +0100
Subject: [PATCH 0928/1097] Disable checksums for builds with fuzzer

---
 cmake/fuzzer.cmake                         | 4 ++--
 src/Compression/CompressedReadBufferBase.h | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/cmake/fuzzer.cmake b/cmake/fuzzer.cmake
index 52f301ab8ad..dd0c4b080fe 100644
--- a/cmake/fuzzer.cmake
+++ b/cmake/fuzzer.cmake
@@ -4,8 +4,8 @@ if (FUZZER)
         # NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends.
         # NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them
         # (tests) have entry point for fuzzer and it's not checked.
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
-        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
+        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
 
         # NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable
         if (NOT LIB_FUZZING_ENGINE)
diff --git a/src/Compression/CompressedReadBufferBase.h b/src/Compression/CompressedReadBufferBase.h
index 0a995f012fd..4a164a6ce68 100644
--- a/src/Compression/CompressedReadBufferBase.h
+++ b/src/Compression/CompressedReadBufferBase.h
@@ -25,7 +25,11 @@ protected:
     char * compressed_buffer = nullptr;
 
     /// Don't checksum on decompressing.
+#if defined(FUZZER)
+    bool disable_checksum = true;
+#else
     bool disable_checksum = false;
+#endif
 
     /// Allow reading data, compressed by different codecs from one file.
     bool allow_different_codecs;

From 7fb4a90d0bea11e78ab5b9f2e3903474211b6fc9 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 22 Nov 2023 16:34:31 +0000
Subject: [PATCH 0929/1097] Analyzer: Fix logical error in
 LogicalExpressionOptimizerVisitor

---
 src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp          | 2 +-
 .../02668_logical_optimizer_removing_redundant_checks.reference | 1 +
 .../02668_logical_optimizer_removing_redundant_checks.sql       | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 081a27eb8fa..6fa6c8b0e78 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -439,7 +439,7 @@ private:
             }
 
             /// otherwise add a stub 0 to make OR correct
-            or_operands.push_back(std::make_shared<ConstantNode>(static_cast<UInt8>(0)));
+            or_operands.push_back(std::make_shared<ConstantNode>(static_cast<UInt8>(0), function_node.getResultType()));
         }
 
         auto or_function_resolver = FunctionFactory::instance().get("or", getContext());
diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
index d083e178586..60ff2d76995 100644
--- a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
+++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
@@ -87,3 +87,4 @@ QUERY id: 0
         LIST id: 6, nodes: 2
           COLUMN id: 7, column_name: a, result_type: Int32, source_id: 3
           CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8
+1
diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql
index f20ef412215..eebea322dbf 100644
--- a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql
+++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql
@@ -24,3 +24,5 @@ EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 3 AND b = 'an
 
 SELECT * FROM 02668_logical_optimizer WHERE a = 2 AND 2 = a;
 EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 2 AND 2 = a;
+
+SELECT (k = 3) OR ( (k = 1) OR (k = 2) OR ( (NULL OR 1) = k ) ) FROM ( SELECT materialize(1) AS k );

From 761b55ccf4e2a4cd449381fa5742face4c256001 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 22 Nov 2023 16:33:34 +0000
Subject: [PATCH 0930/1097] Make overflow not possible

---
 src/Compression/CompressionCodecGorilla.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp
index d2d96f3f975..29148c5cdff 100644
--- a/src/Compression/CompressionCodecGorilla.cpp
+++ b/src/Compression/CompressionCodecGorilla.cpp
@@ -267,7 +267,6 @@ template <typename T>
 void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     const char * const source_end = source + source_size;
-    const char * const dest_end = dest + dest_size;
 
     if (source + sizeof(UInt32) > source_end)
         return;
@@ -281,7 +280,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
     if (source + sizeof(T) > source_end || items_count < 1)
         return;
 
-    if (dest + items_count * sizeof(T) > dest_end)
+    if (static_cast<UInt64>(items_count) * sizeof(T) > dest_size)
         throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress Gorilla-encoded data: corrupted input data.");
 
     prev_value = unalignedLoadLittleEndian<T>(source);

From 1919ec723d9abb76e2c782ea1136fb30632bb17f Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 22 Nov 2023 17:31:29 +0000
Subject: [PATCH 0931/1097] Cancel executor in ~CreatingSetsTransform

---
 src/Processors/Transforms/CreatingSetsTransform.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp
index b59c02be32e..cc0b5926e66 100644
--- a/src/Processors/Transforms/CreatingSetsTransform.cpp
+++ b/src/Processors/Transforms/CreatingSetsTransform.cpp
@@ -38,6 +38,18 @@ CreatingSetsTransform::~CreatingSetsTransform()
             tryLogCurrentException(log, "Failed to set_exception for promise");
         }
     }
+
+    if (executor)
+    {
+        try
+        {
+            executor->cancel();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Failed to cancel PushingPipelineExecutor");
+        }
+    }
 }
 
 CreatingSetsTransform::CreatingSetsTransform(

From ce0cd6a82810689bd5caad7adb82ee57996ec73a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 22 Nov 2023 17:41:56 +0000
Subject: [PATCH 0932/1097] Add a test.

---
 tests/queries/0_stateless/02916_glogal_in_cancel.reference | 0
 tests/queries/0_stateless/02916_glogal_in_cancel.sql       | 2 ++
 2 files changed, 2 insertions(+)
 create mode 100644 tests/queries/0_stateless/02916_glogal_in_cancel.reference
 create mode 100644 tests/queries/0_stateless/02916_glogal_in_cancel.sql

diff --git a/tests/queries/0_stateless/02916_glogal_in_cancel.reference b/tests/queries/0_stateless/02916_glogal_in_cancel.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02916_glogal_in_cancel.sql b/tests/queries/0_stateless/02916_glogal_in_cancel.sql
new file mode 100644
index 00000000000..ad54f1ecdec
--- /dev/null
+++ b/tests/queries/0_stateless/02916_glogal_in_cancel.sql
@@ -0,0 +1,2 @@
+set max_execution_time = 0.5, timeout_overflow_mode = 'break';
+SELECT number FROM remote('127.0.0.{3|2}', numbers(1)) WHERE number GLOBAL IN (SELECT number FROM numbers(10000000000.)) format Null;

From d277bfb22507d845287f63ca84575ee169f3e5f4 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 22 Nov 2023 18:48:25 +0100
Subject: [PATCH 0933/1097] setActualPartName for executeReplaceRange

---
 .../MergeTree/MergeFromLogEntryTask.cpp       |  5 +-
 .../MergeTree/MutateFromLogEntryTask.cpp      |  5 +-
 src/Storages/StorageReplicatedMergeTree.cpp   | 76 +++++++------------
 src/Storages/StorageReplicatedMergeTree.h     |  5 +-
 ..._last_granula_adjust_logical_error.sql.j2} |  0
 5 files changed, 34 insertions(+), 57 deletions(-)
 rename tests/queries/0_stateless/{02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 => 02052_last_granula_adjust_logical_error.sql.j2} (100%)

diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
index 883cfee89c8..2a6bf4a12cb 100644
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@@ -208,8 +208,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
     {
         if (auto disk = reserved_space->getDisk(); disk->supportZeroCopyReplication())
         {
-            String dummy;
-            if (!storage.findReplicaHavingCoveringPart(entry.new_part_name, true, dummy).empty())
+            if (storage.findReplicaHavingCoveringPart(entry.new_part_name, true))
             {
                 LOG_DEBUG(log, "Merge of part {} finished by some other replica, will fetch merged part", entry.new_part_name);
                 /// We found covering part, no checks for missing part.
@@ -259,7 +258,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
                     .part_log_writer = {}
                 };
             }
-            else if (!storage.findReplicaHavingCoveringPart(entry.new_part_name, /* active */ false, dummy).empty())
+            else if (storage.findReplicaHavingCoveringPart(entry.new_part_name, /* active */ false))
             {
                 /// Why this if still needed? We can check for part in zookeeper, don't find it and sleep for any amount of time. During this sleep part will be actually committed from other replica
                 /// and exclusive zero copy lock will be released. We will take the lock and execute merge one more time, while it was possible just to download the part from other replica.
diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
index 164b541d2b8..a9ff687fe4d 100644
--- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
@@ -118,8 +118,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()
     {
         if (auto disk = reserved_space->getDisk(); disk->supportZeroCopyReplication())
         {
-            String dummy;
-            if (!storage.findReplicaHavingCoveringPart(entry.new_part_name, true, dummy).empty())
+            if (storage.findReplicaHavingCoveringPart(entry.new_part_name, true))
             {
                 LOG_DEBUG(log, "Mutation of part {} finished by some other replica, will download mutated part", entry.new_part_name);
                 return PrepareResult{
@@ -167,7 +166,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()
                     .part_log_writer = {}
                 };
             }
-            else if (!storage.findReplicaHavingCoveringPart(entry.new_part_name, /* active */ false, dummy).empty())
+            else if (storage.findReplicaHavingCoveringPart(entry.new_part_name, /* active */ false))
             {
                 /// Why this if still needed? We can check for part in zookeeper, don't find it and sleep for any amount of time. During this sleep part will be actually committed from other replica
                 /// and exclusive zero copy lock will be released. We will take the lock and execute mutation one more time, while it was possible just to download the part from other replica.
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 97f6d2b9303..77b4452e1f2 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2236,11 +2236,12 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
 {
     LOG_TRACE(log, "Executing DROP_RANGE {}", entry.new_part_name);
 
+    auto drop_range_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version);
+
     /// Wait for loading of outdated parts because DROP_RANGE
     /// command must be applied to all parts on disk.
-    waitForOutdatedPartsToBeLoaded();
+    waitForOutdatedPartsToBeLoadedIfNotDisjoint(drop_range_info);
 
-    auto drop_range_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version);
     getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range_info.partition_id, drop_range_info.max_block);
     {
         auto pause_checking_parts = part_check_thread.pausePartsCheck();
@@ -2303,7 +2304,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
 }
 
 
-bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
+bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry)
 {
     Stopwatch watch;
     ProfileEventsScope profile_events_scope;
@@ -2313,14 +2314,15 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
               entry.znode_name, entry_replace.drop_range_part_name, entry_replace.new_part_names.size(),
               entry_replace.from_database, entry_replace.from_table);
 
+    MergeTreePartInfo drop_range = MergeTreePartInfo::fromPartName(entry_replace.drop_range_part_name, format_version);
+
     /// Wait for loading of outdated parts because REPLACE_RANGE
     /// command must be applied to all parts on disk.
-    waitForOutdatedPartsToBeLoaded();
+    waitForOutdatedPartsToBeLoadedIfNotDisjoint(drop_range);
 
     auto metadata_snapshot = getInMemoryMetadataPtr();
     auto storage_settings_ptr = getSettings();
 
-    MergeTreePartInfo drop_range = MergeTreePartInfo::fromPartName(entry_replace.drop_range_part_name, format_version);
     /// Range with only one block has special meaning: it's ATTACH PARTITION or MOVE PARTITION, so there is no drop range
     bool replace = !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range);
 
@@ -2529,7 +2531,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
             LOG_DEBUG(log, "Part {} is not found on remote replicas", part_desc->new_part_name);
 
             /// Fallback to covering part
-            replica = findReplicaHavingCoveringPart(part_desc->new_part_name, true, found_part_name);
+            replica = findReplicaHavingCoveringPartImplLowLevel(&entry, part_desc->new_part_name, found_part_name, true);
 
             if (replica.empty())
             {
@@ -2537,6 +2539,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
                 LOG_DEBUG(log, "Parts covering {} are not found on remote replicas", part_desc->new_part_name);
                 continue;
             }
+
+            LOG_TRACE(log, "Found part {} covering {} on replica {}", found_part_name, part_desc->new_part_name, replica);
         }
         else
         {
@@ -4322,6 +4326,12 @@ std::set<MergeTreePartInfo> StorageReplicatedMergeTree::findReplicaUniqueParts(c
 }
 
 String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(LogEntry & entry, bool active)
+{
+    String dummy;
+    return findReplicaHavingCoveringPartImplLowLevel(&entry, entry.new_part_name, dummy, active);
+}
+
+String StorageReplicatedMergeTree::findReplicaHavingCoveringPartImplLowLevel(LogEntry * entry, const String & part_name, String & found_part_name, bool active)
 {
     auto zookeeper = getZooKeeper();
     Strings replicas = zookeeper->getChildren(fs::path(zookeeper_path) / "replicas");
@@ -4341,8 +4351,8 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(LogEntry & entr
         Strings parts = zookeeper->getChildren(fs::path(zookeeper_path) / "replicas" / replica / "parts");
         for (const String & part_on_replica : parts)
         {
-            if (part_on_replica == entry.new_part_name
-                || MergeTreePartInfo::contains(part_on_replica, entry.new_part_name, format_version))
+            if (part_on_replica == part_name
+                || MergeTreePartInfo::contains(part_on_replica, part_name, format_version))
             {
                 if (largest_part_found.empty()
                     || MergeTreePartInfo::contains(part_on_replica, largest_part_found, format_version))
@@ -4354,15 +4364,16 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(LogEntry & entr
 
         if (!largest_part_found.empty())
         {
-            bool the_same_part = largest_part_found == entry.new_part_name;
+            found_part_name = largest_part_found;
+            bool the_same_part = largest_part_found == part_name;
 
             /// Make a check in case if selected part differs from source part
-            if (!the_same_part)
+            if (!the_same_part && entry)
             {
                 String reject_reason;
-                if (!queue.addFuturePartIfNotCoveredByThem(largest_part_found, entry, reject_reason))
+                if (!queue.addFuturePartIfNotCoveredByThem(largest_part_found, *entry, reject_reason))
                 {
-                    LOG_INFO(log, "Will not fetch part {} covering {}. {}", largest_part_found, entry.new_part_name, reject_reason);
+                    LOG_INFO(log, "Will not fetch part {} covering {}. {}", largest_part_found, part_name, reject_reason);
                     return {};
                 }
             }
@@ -4375,44 +4386,11 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(LogEntry & entr
 }
 
 
-String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(
-    const String & part_name, bool active, String & found_part_name)
+bool StorageReplicatedMergeTree::findReplicaHavingCoveringPart(
+    const String & part_name, bool active)
 {
-    auto zookeeper = getZooKeeper();
-    Strings replicas = zookeeper->getChildren(fs::path(zookeeper_path) / "replicas");
-
-    /// Select replicas in uniformly random order.
-    std::shuffle(replicas.begin(), replicas.end(), thread_local_rng);
-
-    String largest_part_found;
-    String largest_replica_found;
-
-    for (const String & replica : replicas)
-    {
-        if (replica == replica_name)
-            continue;
-
-        if (active && !zookeeper->exists(fs::path(zookeeper_path) / "replicas" / replica / "is_active"))
-            continue;
-
-        Strings parts = zookeeper->getChildren(fs::path(zookeeper_path) / "replicas" / replica / "parts");
-        for (const String & part_on_replica : parts)
-        {
-            if (part_on_replica == part_name
-                || MergeTreePartInfo::contains(part_on_replica, part_name, format_version))
-            {
-                if (largest_part_found.empty()
-                    || MergeTreePartInfo::contains(part_on_replica, largest_part_found, format_version))
-                {
-                    largest_part_found = part_on_replica;
-                    largest_replica_found = replica;
-                }
-            }
-        }
-    }
-
-    found_part_name = largest_part_found;
-    return largest_replica_found;
+    String dummy;
+    return !findReplicaHavingCoveringPartImplLowLevel(/* entry */ nullptr, part_name, dummy, active).empty();
 }
 
 
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index b2a67572adc..429f381dfe0 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -698,7 +698,7 @@ private:
     /// If fetch was not successful, clears entry.actual_new_part_name.
     bool executeFetch(LogEntry & entry, bool need_to_check_missing_part=true);
 
-    bool executeReplaceRange(const LogEntry & entry);
+    bool executeReplaceRange(LogEntry & entry);
     void executeClonePartFromShard(const LogEntry & entry);
 
     /** Updates the queue.
@@ -783,7 +783,8 @@ private:
       * If not found, returns empty string.
       */
     String findReplicaHavingCoveringPart(LogEntry & entry, bool active);
-    String findReplicaHavingCoveringPart(const String & part_name, bool active, String & found_part_name);
+    bool findReplicaHavingCoveringPart(const String & part_name, bool active);
+    String findReplicaHavingCoveringPartImplLowLevel(LogEntry * entry, const String & part_name, String & found_part_name, bool active);
     static std::set<MergeTreePartInfo> findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_);
 
     /** Download the specified part from the specified replica.
diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 b/tests/queries/0_stateless/02052_last_granula_adjust_logical_error.sql.j2
similarity index 100%
rename from tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2
rename to tests/queries/0_stateless/02052_last_granula_adjust_logical_error.sql.j2

From cf3cd099a5a691d0940f3958844290cd0922fedf Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 22 Nov 2023 17:40:41 +0100
Subject: [PATCH 0934/1097] Split HashedDictionary CU

Before HashedDictionary.cpp exceeds 50MiB, now:

    -rw-r--r-- 1 azat azat  37M Nov 22 17:56 SparseHashedDictionary.cpp.o
    -rw-r--r-- 1 azat azat  34M Nov 22 17:56 HashedDictionary.cpp.o
    -rw-r--r-- 1 azat azat 716K Nov 22 17:56 registerHashedDictionary.cpp.o

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Dictionaries/CMakeLists.txt               |    1 +
 src/Dictionaries/HashedDictionary.cpp         | 1257 +----------------
 src/Dictionaries/HashedDictionary.h           | 1013 ++++++++++++-
 .../HashedDictionaryCollectionTraits.h        |    7 +-
 .../HashedDictionaryCollectionType.h          |    7 +-
 .../HashedDictionaryParallelLoader.h          |  189 +++
 src/Dictionaries/SparseHashedDictionary.cpp   |   12 +
 src/Dictionaries/registerHashedDictionary.cpp |  132 ++
 utils/check-style/check-large-objects.sh      |    1 -
 9 files changed, 1332 insertions(+), 1287 deletions(-)
 create mode 100644 src/Dictionaries/HashedDictionaryParallelLoader.h
 create mode 100644 src/Dictionaries/SparseHashedDictionary.cpp
 create mode 100644 src/Dictionaries/registerHashedDictionary.cpp

diff --git a/src/Dictionaries/CMakeLists.txt b/src/Dictionaries/CMakeLists.txt
index 90d2fedceac..569acd9231a 100644
--- a/src/Dictionaries/CMakeLists.txt
+++ b/src/Dictionaries/CMakeLists.txt
@@ -9,6 +9,7 @@ if (OMIT_HEAVY_DEBUG_SYMBOLS)
     set_source_files_properties(
         FlatDictionary.cpp
         HashedDictionary.cpp
+        SparseHashedDictionary.cpp
         HashedArrayDictionary.cpp
         CacheDictionary.cpp
         RangeHashedDictionary.cpp
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 9c5dfeef6ca..e5841220568 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -1,1261 +1,12 @@
 #include <Dictionaries/HashedDictionary.h>
 
-#include <numeric>
-#include <boost/noncopyable.hpp>
-
-#include <Common/ArenaUtils.h>
-#include <Common/ConcurrentBoundedQueue.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/MemoryTrackerBlockerInThread.h>
-#include <Common/ThreadPool.h>
-#include <Common/logger_useful.h>
-#include <Common/scope_guard_safe.h>
-#include <Common/setThreadName.h>
-
-#include <Core/Defines.h>
-
-#include <DataTypes/DataTypesDecimal.h>
-
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnNullable.h>
-#include <Functions/FunctionHelpers.h>
-
-#include <Dictionaries/ClickHouseDictionarySource.h>
-#include <Dictionaries/DictionarySource.h>
-#include <Dictionaries/DictionarySourceHelpers.h>
-#include <Dictionaries/DictionaryFactory.h>
-#include <Dictionaries/HierarchyDictionariesUtils.h>
-#include <Dictionaries/HashedDictionaryCollectionTraits.h>
-
-
-namespace CurrentMetrics
-{
-    extern const Metric HashedDictionaryThreads;
-    extern const Metric HashedDictionaryThreadsActive;
-    extern const Metric HashedDictionaryThreadsScheduled;
-}
-
 namespace DB
 {
 
-using namespace HashedDictionaryImpl;
+template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ false, /* sharded= */ false >;
+template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ false, /* sharded= */ true  >;
 
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int DICTIONARY_IS_EMPTY;
-    extern const int UNSUPPORTED_METHOD;
-    extern const int LOGICAL_ERROR;
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded> class HashedDictionary;
-
-/// Implementation parallel dictionary load for SHARDS
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-class ParallelDictionaryLoader : public boost::noncopyable
-{
-    using HashedDictionary = HashedDictionary<dictionary_key_type, sparse, sharded>;
-
-public:
-    explicit ParallelDictionaryLoader(HashedDictionary & dictionary_)
-        : dictionary(dictionary_)
-        , shards(dictionary.configuration.shards)
-        , pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards)
-        , shards_queues(shards)
-    {
-        UInt64 backlog = dictionary.configuration.shard_load_queue_backlog;
-        LOG_TRACE(dictionary.log, "Will load the dictionary using {} threads (with {} backlog)", shards, backlog);
-
-        shards_slots.resize(shards);
-        std::iota(shards_slots.begin(), shards_slots.end(), 0);
-
-        for (size_t shard = 0; shard < shards; ++shard)
-        {
-            shards_queues[shard].emplace(backlog);
-            pool.scheduleOrThrowOnError([this, shard, thread_group = CurrentThread::getGroup()]
-            {
-                SCOPE_EXIT_SAFE(
-                    if (thread_group)
-                        CurrentThread::detachFromGroupIfNotDetached();
-                );
-
-                /// Do not account memory that was occupied by the dictionaries for the query/user context.
-                MemoryTrackerBlockerInThread memory_blocker;
-
-                if (thread_group)
-                    CurrentThread::attachToGroupIfDetached(thread_group);
-                setThreadName("HashedDictLoad");
-
-                threadWorker(shard);
-            });
-        }
-    }
-
-    void addBlock(Block block)
-    {
-        IColumn::Selector selector = createShardSelector(block, shards_slots);
-        Blocks shards_blocks = splitBlock(selector, block);
-
-        for (size_t shard = 0; shard < shards; ++shard)
-        {
-            if (!shards_queues[shard]->push(std::move(shards_blocks[shard])))
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to shards queue #{}", shard);
-        }
-    }
-
-    void finish()
-    {
-        for (auto & queue : shards_queues)
-            queue->finish();
-
-        Stopwatch watch;
-        pool.wait();
-        UInt64 elapsed_ms = watch.elapsedMilliseconds();
-        LOG_TRACE(dictionary.log, "Processing the tail took {}ms", elapsed_ms);
-    }
-
-    ~ParallelDictionaryLoader()
-    {
-        try
-        {
-            for (auto & queue : shards_queues)
-                queue->clearAndFinish();
-
-            /// NOTE: It is OK to not pass the exception next, since on success finish() should be called which will call wait()
-            pool.wait();
-        }
-        catch (...)
-        {
-            tryLogCurrentException(dictionary.log, "Exception had been thrown during parallel load of the dictionary");
-        }
-    }
-
-private:
-    HashedDictionary & dictionary;
-    const size_t shards;
-    ThreadPool pool;
-    std::vector<std::optional<ConcurrentBoundedQueue<Block>>> shards_queues;
-    std::vector<UInt64> shards_slots;
-    DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
-
-    void threadWorker(size_t shard)
-    {
-        Block block;
-        DictionaryKeysArenaHolder<dictionary_key_type> arena_holder_;
-        auto & shard_queue = *shards_queues[shard];
-
-        while (shard_queue.pop(block))
-        {
-            Stopwatch watch;
-            dictionary.blockToAttributes(block, arena_holder_, shard);
-            UInt64 elapsed_ms = watch.elapsedMilliseconds();
-            if (elapsed_ms > 1'000)
-                LOG_TRACE(dictionary.log, "Block processing for shard #{} is slow {}ms (rows {}).", shard, elapsed_ms, block.rows());
-        }
-
-        if (!shard_queue.isFinished())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not pull non finished shards queue #{}", shard);
-    }
-
-    /// Split block to shards smaller block, using 'selector'.
-    Blocks splitBlock(const IColumn::Selector & selector, const Block & block)
-    {
-        Blocks out_blocks(shards);
-        for (size_t shard = 0; shard < shards; ++shard)
-            out_blocks[shard] = block.cloneEmpty();
-
-        size_t columns = block.columns();
-        for (size_t col = 0; col < columns; ++col)
-        {
-            MutableColumns splitted_columns = block.getByPosition(col).column->scatter(shards, selector);
-            for (size_t shard = 0; shard < shards; ++shard)
-                out_blocks[shard].getByPosition(col).column = std::move(splitted_columns[shard]);
-        }
-
-        return out_blocks;
-    }
-
-    IColumn::Selector createShardSelector(const Block & block, const std::vector<UInt64> & slots)
-    {
-        size_t num_rows = block.rows();
-        IColumn::Selector selector(num_rows);
-
-        size_t skip_keys_size_offset = dictionary.dict_struct.getKeysSize();
-        Columns key_columns;
-        key_columns.reserve(skip_keys_size_offset);
-        for (size_t i = 0; i < skip_keys_size_offset; ++i)
-            key_columns.emplace_back(block.safeGetByPosition(i).column);
-
-        DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
-        for (size_t i = 0; i < num_rows; ++i)
-        {
-            auto key = keys_extractor.extractCurrentKey();
-            size_t shard = dictionary.getShard(key);
-            selector[i] = slots[shard];
-            keys_extractor.rollbackCurrentKey();
-        }
-
-        return selector;
-    }
-};
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-HashedDictionary<dictionary_key_type, sparse, sharded>::HashedDictionary(
-    const StorageID & dict_id_,
-    const DictionaryStructure & dict_struct_,
-    DictionarySourcePtr source_ptr_,
-    const HashedDictionaryConfiguration & configuration_,
-    BlockPtr update_field_loaded_block_)
-    : IDictionary(dict_id_)
-    , log(&Poco::Logger::get("HashedDictionary"))
-    , dict_struct(dict_struct_)
-    , source_ptr(std::move(source_ptr_))
-    , configuration(configuration_)
-    , update_field_loaded_block(std::move(update_field_loaded_block_))
-{
-    createAttributes();
-    loadData();
-    buildHierarchyParentToChildIndexIfNeeded();
-    calculateBytesAllocated();
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-HashedDictionary<dictionary_key_type, sparse, sharded>::~HashedDictionary()
-{
-    /// Do a regular sequential destroy in case of non sharded dictionary
-    ///
-    /// Note, that even in non-sharded dictionaries you can have multiple hash
-    /// tables, since each attribute is stored in a separate hash table.
-    if constexpr (!sharded)
-        return;
-
-    size_t shards = std::max<size_t>(configuration.shards, 1);
-    ThreadPool pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards);
-
-    size_t hash_tables_count = 0;
-    auto schedule_destroy = [&hash_tables_count, &pool](auto & container)
-    {
-        if (container.empty())
-            return;
-
-        pool.trySchedule([&container, thread_group = CurrentThread::getGroup()]
-        {
-            SCOPE_EXIT_SAFE(
-                if (thread_group)
-                    CurrentThread::detachFromGroupIfNotDetached();
-            );
-
-            /// Do not account memory that was occupied by the dictionaries for the query/user context.
-            MemoryTrackerBlockerInThread memory_blocker;
-
-            if (thread_group)
-                CurrentThread::attachToGroupIfDetached(thread_group);
-            setThreadName("HashedDictDtor");
-
-            clearContainer(container);
-        });
-
-        ++hash_tables_count;
-    };
-
-    if (attributes.empty())
-    {
-        for (size_t shard = 0; shard < shards; ++shard)
-        {
-            schedule_destroy(no_attributes_containers[shard]);
-        }
-    }
-    else
-    {
-        for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
-        {
-            getAttributeContainers(attribute_index, [&](auto & containers)
-            {
-                for (size_t shard = 0; shard < shards; ++shard)
-                {
-                    schedule_destroy(containers[shard]);
-                }
-            });
-        }
-    }
-
-    LOG_TRACE(log, "Destroying {} non empty hash tables (using {} threads)", hash_tables_count, pool.getMaxThreads());
-    pool.wait();
-    LOG_TRACE(log, "Hash tables destroyed");
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
-    const std::string & attribute_name,
-    const DataTypePtr & result_type,
-    const Columns & key_columns,
-    const DataTypes & key_types [[maybe_unused]],
-    const ColumnPtr & default_values_column) const
-{
-    if (dictionary_key_type == DictionaryKeyType::Complex)
-        dict_struct.validateKeyTypes(key_types);
-
-    ColumnPtr result;
-
-    DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
-    DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
-
-    const size_t size = extractor.getKeysSize();
-
-    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
-    const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
-    auto & attribute = attributes[attribute_index];
-
-    bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
-
-    ColumnUInt8::MutablePtr col_null_map_to;
-    ColumnUInt8::Container * vec_null_map_to = nullptr;
-    if (is_attribute_nullable)
-    {
-        col_null_map_to = ColumnUInt8::create(size, false);
-        vec_null_map_to = &col_null_map_to->getData();
-    }
-
-    auto type_call = [&](const auto & dictionary_attribute_type)
-    {
-        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
-        using AttributeType = typename Type::AttributeType;
-        using ValueType = DictionaryValueType<AttributeType>;
-        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
-
-        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(dictionary_attribute.null_value, default_values_column);
-
-        auto column = ColumnProvider::getColumn(dictionary_attribute, size);
-
-        if constexpr (std::is_same_v<ValueType, Array>)
-        {
-            auto * out = column.get();
-
-            getItemsImpl<ValueType, false>(
-                attribute,
-                extractor,
-                [&](const size_t, const Array & value, bool) { out->insert(value); },
-                default_value_extractor);
-        }
-        else if constexpr (std::is_same_v<ValueType, StringRef>)
-        {
-            auto * out = column.get();
-
-            if (is_attribute_nullable)
-                getItemsImpl<ValueType, true>(
-                    attribute,
-                    extractor,
-                    [&](size_t row, StringRef value, bool is_null)
-                    {
-                        (*vec_null_map_to)[row] = is_null;
-                        out->insertData(value.data, value.size);
-                    },
-                    default_value_extractor);
-            else
-                getItemsImpl<ValueType, false>(
-                    attribute,
-                    extractor,
-                    [&](size_t, StringRef value, bool) { out->insertData(value.data, value.size); },
-                    default_value_extractor);
-        }
-        else
-        {
-            auto & out = column->getData();
-
-            if (is_attribute_nullable)
-                getItemsImpl<ValueType, true>(
-                    attribute,
-                    extractor,
-                    [&](size_t row, const auto value, bool is_null)
-                    {
-                        (*vec_null_map_to)[row] = is_null;
-                        out[row] = value;
-                    },
-                    default_value_extractor);
-            else
-                getItemsImpl<ValueType, false>(
-                    attribute,
-                    extractor,
-                    [&](size_t row, const auto value, bool) { out[row] = value; },
-                    default_value_extractor);
-        }
-
-        result = std::move(column);
-    };
-
-    callOnDictionaryAttributeType(attribute.type, type_call);
-
-    if (is_attribute_nullable)
-        result = ColumnNullable::create(result, std::move(col_null_map_to));
-
-    return result;
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
-{
-    if (dictionary_key_type == DictionaryKeyType::Complex)
-        dict_struct.validateKeyTypes(key_types);
-
-    DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
-    DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
-
-    size_t keys_size = extractor.getKeysSize();
-
-    auto result = ColumnUInt8::create(keys_size, false);
-    auto & out = result->getData();
-
-    size_t keys_found = 0;
-
-    if (unlikely(attributes.empty()))
-    {
-        for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
-        {
-            auto key = extractor.extractCurrentKey();
-            const auto & container = no_attributes_containers[getShard(key)];
-            out[requested_key_index] = container.find(key) != container.end();
-            keys_found += out[requested_key_index];
-            extractor.rollbackCurrentKey();
-        }
-
-        query_count.fetch_add(keys_size, std::memory_order_relaxed);
-        found_count.fetch_add(keys_found, std::memory_order_relaxed);
-        return result;
-    }
-
-    const auto & attribute = attributes.front();
-    bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
-
-    getAttributeContainers(0 /*attribute_index*/, [&](const auto & containers)
-    {
-        for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
-        {
-            auto key = extractor.extractCurrentKey();
-            auto shard = getShard(key);
-            const auto & container = containers[shard];
-
-            out[requested_key_index] = container.find(key) != container.end();
-            if (is_attribute_nullable && !out[requested_key_index])
-                out[requested_key_index] = (*attribute.is_nullable_sets)[shard].find(key) != nullptr;
-
-            keys_found += out[requested_key_index];
-
-            extractor.rollbackCurrentKey();
-        }
-    });
-
-    query_count.fetch_add(keys_size, std::memory_order_relaxed);
-    found_count.fetch_add(keys_found, std::memory_order_relaxed);
-
-    return result;
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const
-{
-    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
-    {
-        PaddedPODArray<UInt64> keys_backup_storage;
-        const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
-
-        size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
-
-        const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
-        const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
-
-        std::optional<UInt64> null_value;
-
-        if (!dictionary_attribute.null_value.isNull())
-            null_value = dictionary_attribute.null_value.get<UInt64>();
-
-        const CollectionsHolder<UInt64> & child_key_to_parent_key_maps = std::get<CollectionsHolder<UInt64>>(hierarchical_attribute.containers);
-
-        auto is_key_valid_func = [&](auto & hierarchy_key)
-        {
-            auto shard = getShard(hierarchy_key);
-
-            if (unlikely(hierarchical_attribute.is_nullable_sets) && (*hierarchical_attribute.is_nullable_sets)[shard].find(hierarchy_key))
-                return true;
-
-            const auto & map = child_key_to_parent_key_maps[shard];
-            return map.find(hierarchy_key) != map.end();
-        };
-
-        size_t keys_found = 0;
-
-        auto get_parent_func = [&](auto & hierarchy_key)
-        {
-            std::optional<UInt64> result;
-
-            const auto & map = child_key_to_parent_key_maps[getShard(hierarchy_key)];
-            auto it = map.find(hierarchy_key);
-            if (it == map.end())
-                return result;
-
-            UInt64 parent_key = getValueFromCell(it);
-            if (null_value && *null_value == parent_key)
-                return result;
-
-            result = parent_key;
-            keys_found += 1;
-
-            return result;
-        };
-
-        auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func);
-
-        query_count.fetch_add(keys.size(), std::memory_order_relaxed);
-        found_count.fetch_add(keys_found, std::memory_order_relaxed);
-
-        return dictionary_hierarchy_array;
-    }
-    else
-    {
-        return nullptr;
-    }
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::isInHierarchy(
-    ColumnPtr key_column [[maybe_unused]],
-    ColumnPtr in_key_column [[maybe_unused]],
-    const DataTypePtr &) const
-{
-    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
-    {
-        if (key_column->isNullable())
-            key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
-
-        PaddedPODArray<UInt64> keys_backup_storage;
-        const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
-
-        PaddedPODArray<UInt64> keys_in_backup_storage;
-        const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
-
-        size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
-
-        const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
-        auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
-
-        std::optional<UInt64> null_value;
-
-        if (!dictionary_attribute.null_value.isNull())
-            null_value = dictionary_attribute.null_value.get<UInt64>();
-
-        const CollectionsHolder<UInt64> & child_key_to_parent_key_maps = std::get<CollectionsHolder<UInt64>>(hierarchical_attribute.containers);
-
-        auto is_key_valid_func = [&](auto & hierarchy_key)
-        {
-            auto shard = getShard(hierarchy_key);
-
-            if (unlikely(hierarchical_attribute.is_nullable_sets) && (*hierarchical_attribute.is_nullable_sets)[shard].find(hierarchy_key))
-                return true;
-
-            const auto & map = child_key_to_parent_key_maps[shard];
-            return map.find(hierarchy_key) != map.end();
-        };
-
-        size_t keys_found = 0;
-
-        auto get_parent_key_func = [&](auto & hierarchy_key)
-        {
-            std::optional<UInt64> result;
-
-            const auto & map = child_key_to_parent_key_maps[getShard(hierarchy_key)];
-            auto it = map.find(hierarchy_key);
-            if (it == map.end())
-                return result;
-
-            UInt64 parent_key = getValueFromCell(it);
-            if (null_value && *null_value == parent_key)
-                return result;
-
-            result = parent_key;
-            keys_found += 1;
-
-            return result;
-        };
-
-        auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
-
-        query_count.fetch_add(keys.size(), std::memory_order_relaxed);
-        found_count.fetch_add(keys_found, std::memory_order_relaxed);
-
-        return result;
-    }
-    else
-        return nullptr;
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-DictionaryHierarchyParentToChildIndexPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getHierarchicalIndex() const
-{
-    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
-    {
-        if (hierarchical_index)
-            return hierarchical_index;
-
-        size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
-        const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
-        const CollectionsHolder<UInt64> & child_key_to_parent_key_maps = std::get<CollectionsHolder<UInt64>>(hierarchical_attribute.containers);
-
-        size_t size = 0;
-        for (const auto & map : child_key_to_parent_key_maps)
-            size += map.size();
-
-        DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child;
-        parent_to_child.reserve(size);
-
-        for (const auto & map : child_key_to_parent_key_maps)
-        {
-            for (const auto & [child_key, parent_key] : map)
-            {
-                parent_to_child[parent_key].emplace_back(child_key);
-            }
-        }
-
-        return std::make_shared<DictionaryHierarchicalParentToChildIndex>(parent_to_child);
-    }
-    else
-    {
-        return nullptr;
-    }
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getDescendants(
-    ColumnPtr key_column [[maybe_unused]],
-    const DataTypePtr &,
-    size_t level [[maybe_unused]],
-    DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const
-{
-    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
-    {
-        PaddedPODArray<UInt64> keys_backup;
-        const auto & keys = getColumnVectorData(this, key_column, keys_backup);
-
-        size_t keys_found;
-        auto result = getKeysDescendantsArray(keys, *parent_to_child_index, level, keys_found);
-
-        query_count.fetch_add(keys.size(), std::memory_order_relaxed);
-        found_count.fetch_add(keys_found, std::memory_order_relaxed);
-
-        return result;
-    }
-    else
-    {
-        return nullptr;
-    }
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
-{
-    const auto size = dict_struct.attributes.size();
-    attributes.reserve(size);
-
-    HashTableGrowerWithPrecalculationAndMaxLoadFactor grower(configuration.max_load_factor);
-
-    for (const auto & dictionary_attribute : dict_struct.attributes)
-    {
-        auto type_call = [&, this](const auto & dictionary_attribute_type)
-        {
-            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
-            using AttributeType = typename Type::AttributeType;
-            using ValueType = DictionaryValueType<AttributeType>;
-
-            auto is_nullable_sets = dictionary_attribute.is_nullable ? std::make_optional<NullableSets>(configuration.shards) : std::optional<NullableSets>{};
-            if constexpr (IsBuiltinHashTable<typename CollectionsHolder<ValueType>::value_type>)
-            {
-                CollectionsHolder<ValueType> collections;
-                collections.reserve(configuration.shards);
-                for (size_t i = 0; i < configuration.shards; ++i)
-                    collections.emplace_back(grower);
-
-                Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), std::move(collections)};
-                attributes.emplace_back(std::move(attribute));
-            }
-            else
-            {
-                Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
-                for (auto & container : std::get<CollectionsHolder<ValueType>>(attribute.containers))
-                    container.max_load_factor(configuration.max_load_factor);
-                attributes.emplace_back(std::move(attribute));
-            }
-
-            if constexpr (IsBuiltinHashTable<typename CollectionsHolder<ValueType>::value_type>)
-                LOG_TRACE(log, "Using builtin hash table for {} attribute", dictionary_attribute.name);
-            else
-                LOG_TRACE(log, "Using sparsehash for {} attribute", dictionary_attribute.name);
-        };
-
-        callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
-    }
-
-    if (unlikely(attributes.size()) == 0)
-    {
-        no_attributes_containers.reserve(configuration.shards);
-        for (size_t i = 0; i < configuration.shards; ++i)
-            no_attributes_containers.emplace_back(grower);
-    }
-
-    string_arenas.resize(configuration.shards);
-    for (auto & arena : string_arenas)
-        arena = std::make_unique<Arena>();
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::updateData()
-{
-    /// NOTE: updateData() does not preallocation since it may increase memory usage.
-
-    if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
-    {
-        QueryPipeline pipeline(source_ptr->loadUpdatedAll());
-        DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor);
-        update_field_loaded_block.reset();
-        Block block;
-
-        while (executor.pull(block))
-        {
-            if (!block.rows())
-                continue;
-
-            convertToFullIfSparse(block);
-
-            /// We are using this to keep saved data if input stream consists of multiple blocks
-            if (!update_field_loaded_block)
-                update_field_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
-
-            for (size_t attribute_index = 0; attribute_index < block.columns(); ++attribute_index)
-            {
-                const IColumn & update_column = *block.getByPosition(attribute_index).column.get();
-                MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(attribute_index).column->assumeMutable();
-                saved_column->insertRangeFrom(update_column, 0, update_column.size());
-            }
-        }
-    }
-    else
-    {
-        auto pipe = source_ptr->loadUpdatedAll();
-        mergeBlockWithPipe<dictionary_key_type>(
-            dict_struct.getKeysSize(),
-            *update_field_loaded_block,
-            std::move(pipe));
-    }
-
-    if (update_field_loaded_block)
-    {
-        resize(update_field_loaded_block->rows());
-        DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
-        blockToAttributes(*update_field_loaded_block.get(), arena_holder, /* shard= */ 0);
-    }
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(const Block & block, DictionaryKeysArenaHolder<dictionary_key_type> & arena_holder, UInt64 shard)
-{
-    size_t skip_keys_size_offset = dict_struct.getKeysSize();
-    size_t new_element_count = 0;
-
-    Columns key_columns;
-    key_columns.reserve(skip_keys_size_offset);
-
-    /// Split into keys columns and attribute columns
-    for (size_t i = 0; i < skip_keys_size_offset; ++i)
-        key_columns.emplace_back(block.safeGetByPosition(i).column);
-
-    DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
-    const size_t keys_size = keys_extractor.getKeysSize();
-
-    Field column_value_to_insert;
-
-    size_t attributes_size = attributes.size();
-
-    if (unlikely(attributes_size == 0))
-    {
-        for (size_t key_index = 0; key_index < keys_size; ++key_index)
-        {
-            auto key = keys_extractor.extractCurrentKey();
-
-            if constexpr (std::is_same_v<KeyType, StringRef>)
-                key = copyStringInArena(*string_arenas[shard], key);
-
-            no_attributes_containers[shard].insert(key);
-            keys_extractor.rollbackCurrentKey();
-            ++new_element_count;
-        }
-
-        element_count += new_element_count;
-        return;
-    }
-
-    for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
-    {
-        const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column;
-        auto & attribute = attributes[attribute_index];
-        bool attribute_is_nullable = attribute.is_nullable_sets.has_value();
-
-        /// Number of elements should not take into account multiple attributes.
-        new_element_count = 0;
-
-        getAttributeContainers(attribute_index, [&](auto & containers)
-        {
-            using ContainerType = std::decay_t<decltype(containers.front())>;
-            using AttributeValueType = typename ContainerType::mapped_type;
-
-            for (size_t key_index = 0; key_index < keys_size; ++key_index)
-            {
-                auto key = keys_extractor.extractCurrentKey();
-                auto & container = containers[shard];
-
-                auto it = container.find(key);
-                bool key_is_nullable_and_already_exists = attribute_is_nullable && (*attribute.is_nullable_sets)[shard].find(key) != nullptr;
-
-                if (key_is_nullable_and_already_exists || it != container.end())
-                {
-                    keys_extractor.rollbackCurrentKey();
-                    continue;
-                }
-
-                if constexpr (std::is_same_v<KeyType, StringRef>)
-                    key = copyStringInArena(*string_arenas[shard], key);
-
-                attribute_column.get(key_index, column_value_to_insert);
-
-                if (attribute_is_nullable && column_value_to_insert.isNull())
-                {
-                    (*attribute.is_nullable_sets)[shard].insert(key);
-                    ++new_element_count;
-                    keys_extractor.rollbackCurrentKey();
-                    continue;
-                }
-
-                if constexpr (std::is_same_v<AttributeValueType, StringRef>)
-                {
-                    String & value_to_insert = column_value_to_insert.get<String>();
-                    StringRef arena_value = copyStringInArena(*string_arenas[shard], value_to_insert);
-                    container.insert({key, arena_value});
-                }
-                else
-                {
-                    auto value_to_insert = static_cast<AttributeValueType>(column_value_to_insert.get<AttributeValueType>());
-                    container.insert({key, value_to_insert});
-                }
-
-                ++new_element_count;
-                keys_extractor.rollbackCurrentKey();
-            }
-
-            keys_extractor.reset();
-        });
-    }
-
-    element_count += new_element_count;
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::resize(size_t added_rows)
-{
-    if (unlikely(!added_rows))
-        return;
-
-    /// In multi shards configuration it is pointless.
-    if constexpr (sharded)
-        return;
-
-    size_t attributes_size = attributes.size();
-
-    if (unlikely(attributes_size == 0))
-    {
-        size_t reserve_size = added_rows + no_attributes_containers.front().size();
-        resizeContainer(no_attributes_containers.front(), reserve_size);
-        return;
-    }
-
-    for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
-    {
-        getAttributeContainers(attribute_index, [added_rows](auto & containers)
-        {
-            auto & container = containers.front();
-            size_t reserve_size = added_rows + container.size();
-            resizeContainer(container, reserve_size);
-        });
-    }
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
-    const Attribute & attribute,
-    DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
-    ValueSetter && set_value [[maybe_unused]],
-    DefaultValueExtractor & default_value_extractor) const
-{
-    const auto & attribute_containers = std::get<CollectionsHolder<AttributeType>>(attribute.containers);
-    const size_t keys_size = keys_extractor.getKeysSize();
-
-    size_t keys_found = 0;
-
-    for (size_t key_index = 0; key_index < keys_size; ++key_index)
-    {
-        auto key = keys_extractor.extractCurrentKey();
-        auto shard = getShard(key);
-
-        const auto & container = attribute_containers[getShard(key)];
-        const auto it = container.find(key);
-
-        if (it != container.end())
-        {
-            set_value(key_index, getValueFromCell(it), false);
-            ++keys_found;
-        }
-        else
-        {
-            if constexpr (is_nullable)
-            {
-                bool is_value_nullable = ((*attribute.is_nullable_sets)[shard].find(key) != nullptr) || default_value_extractor.isNullAt(key_index);
-                set_value(key_index, default_value_extractor[key_index], is_value_nullable);
-            }
-            else
-            {
-                set_value(key_index, default_value_extractor[key_index], false);
-            }
-        }
-
-        keys_extractor.rollbackCurrentKey();
-    }
-
-    query_count.fetch_add(keys_size, std::memory_order_relaxed);
-    found_count.fetch_add(keys_found, std::memory_order_relaxed);
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::loadData()
-{
-    if (!source_ptr->hasUpdateField())
-    {
-        std::optional<ParallelDictionaryLoader<dictionary_key_type, sparse, sharded>> parallel_loader;
-        if constexpr (sharded)
-            parallel_loader.emplace(*this);
-
-        QueryPipeline pipeline = QueryPipeline(source_ptr->loadAll());
-
-        DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor);
-        Block block;
-        DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
-
-        while (executor.pull(block))
-        {
-            resize(block.rows());
-            if (parallel_loader)
-                parallel_loader->addBlock(block);
-            else
-                blockToAttributes(block, arena_holder, /* shard= */ 0);
-        }
-
-        if (parallel_loader)
-            parallel_loader->finish();
-    }
-    else
-    {
-        updateData();
-    }
-
-    if (configuration.require_nonempty && 0 == element_count)
-        throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY,
-            "{}: dictionary source is empty and 'require_nonempty' property is set.",
-            getFullName());
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::buildHierarchyParentToChildIndexIfNeeded()
-{
-    if (!dict_struct.hierarchical_attribute_index)
-        return;
-
-    if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional)
-        hierarchical_index = getHierarchicalIndex();
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAllocated()
-{
-    size_t attributes_size = attributes.size();
-    bytes_allocated += attributes_size * sizeof(attributes.front());
-
-    for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
-    {
-        /// bucket_count should be a sum over all shards (CollectionsHolder),
-        /// but it should not be a sum over all attributes, since it is used to
-        /// calculate load_factor like this:
-        ///
-        ///    element_count / bucket_count
-        ///
-        /// While element_count is a sum over all shards, not over all attributes.
-        bucket_count = 0;
-
-        getAttributeContainers(attribute_index, [&](const auto & containers)
-        {
-            for (const auto & container : containers)
-            {
-                bytes_allocated += sizeof(container);
-                bytes_allocated += getBufferSizeInBytes(container);
-                bucket_count += getBufferSizeInCells(container);
-            }
-        });
-
-        const auto & attribute = attributes[attribute_index];
-        bytes_allocated += sizeof(attribute.is_nullable_sets);
-
-        if (attribute.is_nullable_sets.has_value())
-        {
-            for (auto & is_nullable_set : *attribute.is_nullable_sets)
-                bytes_allocated += is_nullable_set.getBufferSizeInBytes();
-        }
-    }
-
-    if (unlikely(attributes_size == 0))
-    {
-        for (const auto & container : no_attributes_containers)
-        {
-            bytes_allocated += sizeof(container);
-            bytes_allocated += getBufferSizeInBytes(container);
-            bucket_count += getBufferSizeInCells(container);
-        }
-    }
-
-    if (update_field_loaded_block)
-        bytes_allocated += update_field_loaded_block->allocatedBytes();
-
-    if (hierarchical_index)
-    {
-        hierarchical_index_bytes_allocated = hierarchical_index->getSizeInBytes();
-        bytes_allocated += hierarchical_index_bytes_allocated;
-    }
-
-    for (const auto & arena : string_arenas)
-        bytes_allocated += arena->allocatedBytes();
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-Pipe HashedDictionary<dictionary_key_type, sparse, sharded>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
-{
-    PaddedPODArray<HashedDictionary::KeyType> keys;
-
-    /// NOTE: could read multiple shards in parallel
-    if (!attributes.empty())
-    {
-        const auto & attribute = attributes.front();
-
-        getAttributeContainers(0 /*attribute_index*/, [&](auto & containers)
-        {
-            for (const auto & container : containers)
-            {
-                keys.reserve(container.size());
-
-                for (const auto & [key, _] : container)
-                {
-                    keys.emplace_back(key);
-                }
-            }
-        });
-
-        if (attribute.is_nullable_sets)
-        {
-            for (auto & is_nullable_set : *attribute.is_nullable_sets)
-            {
-                keys.reserve(is_nullable_set.size());
-
-                for (auto & node : is_nullable_set)
-                    keys.emplace_back(node.getKey());
-            }
-        }
-    }
-    else
-    {
-        for (const auto & container : no_attributes_containers)
-        {
-            keys.reserve(keys.size() + container.size());
-
-            for (const auto & key : container)
-                keys.emplace_back(getSetKeyFromCell(key));
-        }
-    }
-
-    ColumnsWithTypeAndName key_columns;
-
-    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
-    {
-        auto keys_column = getColumnFromPODArray(std::move(keys));
-        key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
-    }
-    else
-    {
-        key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
-    }
-
-    std::shared_ptr<const IDictionary> dictionary = shared_from_this();
-    auto coordinator = std::make_shared<DictionarySourceCoordinator>(dictionary, column_names, std::move(key_columns), max_block_size);
-    auto result = coordinator->read(num_streams);
-
-    return result;
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-template <typename GetContainersFunc>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func)
-{
-    assert(attribute_index < attributes.size());
-
-    auto & attribute = attributes[attribute_index];
-
-    auto type_call = [&](const auto & dictionary_attribute_type)
-    {
-        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
-        using AttributeType = typename Type::AttributeType;
-        using ValueType = DictionaryValueType<AttributeType>;
-
-        auto & attribute_containers = std::get<CollectionsHolder<ValueType>>(attribute.containers);
-        std::forward<GetContainersFunc>(get_containers_func)(attribute_containers);
-    };
-
-    callOnDictionaryAttributeType(attribute.type, type_call);
-}
-
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-template <typename GetContainersFunc>
-void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func) const
-{
-    const_cast<std::decay_t<decltype(*this)> *>(this)->getAttributeContainers(attribute_index, [&](auto & attribute_containers)
-    {
-        std::forward<GetContainersFunc>(get_containers_func)(attribute_containers);
-    });
-}
-
-template class HashedDictionary<DictionaryKeyType::Simple, false, /*sparse*/ false /*sharded*/>;
-template class HashedDictionary<DictionaryKeyType::Simple, false /*sparse*/, true /*sharded*/>;
-template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, false /*sharded*/>;
-template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, true /*sharded*/>;
-
-template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, false /*sharded*/>;
-template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, true /*sharded*/>;
-template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, false /*sharded*/>;
-template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, true /*sharded*/>;
-
-void registerDictionaryHashed(DictionaryFactory & factory)
-{
-    auto create_layout = [](const std::string & full_name,
-                             const DictionaryStructure & dict_struct,
-                             const Poco::Util::AbstractConfiguration & config,
-                             const std::string & config_prefix,
-                             DictionarySourcePtr source_ptr,
-                             ContextPtr global_context,
-                             DictionaryKeyType dictionary_key_type,
-                             bool sparse) -> DictionaryPtr
-    {
-        if (dictionary_key_type == DictionaryKeyType::Simple && dict_struct.key)
-            throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for simple key hashed dictionary");
-        else if (dictionary_key_type == DictionaryKeyType::Complex && dict_struct.id)
-            throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for complex key hashed dictionary");
-
-        if (dict_struct.range_min || dict_struct.range_max)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                "{}: elements .structure.range_min and .structure.range_max should be defined only "
-                "for a dictionary of layout 'range_hashed'",
-                full_name);
-
-        const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
-        const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
-        const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
-
-        std::string dictionary_layout_name;
-
-        if (dictionary_key_type == DictionaryKeyType::Simple)
-            dictionary_layout_name = sparse ? "sparse_hashed" : "hashed";
-        else
-            dictionary_layout_name = sparse ? "complex_key_sparse_hashed" : "complex_key_hashed";
-
-        const std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name;
-        const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false);
-        if (preallocate)
-            LOG_WARNING(&Poco::Logger::get("HashedDictionary"), "'prellocate' attribute is obsolete, consider looking at 'shards'");
-
-        Int64 shards = config.getInt(config_prefix + dictionary_layout_prefix + ".shards", 1);
-        if (shards <= 0 || shards > 128)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [1, 128]", full_name);
-
-        Int64 shard_load_queue_backlog = config.getInt(config_prefix + dictionary_layout_prefix + ".shard_load_queue_backlog", 10000);
-        if (shard_load_queue_backlog <= 0)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name);
-
-        float max_load_factor = static_cast<float>(config.getDouble(config_prefix + dictionary_layout_prefix + ".max_load_factor", 0.5));
-        if (max_load_factor < 0.5f || max_load_factor > 0.99f)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: max_load_factor parameter should be within [0.5, 0.99], got {}", full_name, max_load_factor);
-
-        ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
-        const auto & settings = context->getSettingsRef();
-
-        const auto * clickhouse_source = dynamic_cast<const ClickHouseDictionarySource *>(source_ptr.get());
-        bool use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor;
-
-        HashedDictionaryConfiguration configuration{
-            static_cast<UInt64>(shards),
-            static_cast<UInt64>(shard_load_queue_backlog),
-            max_load_factor,
-            require_nonempty,
-            dict_lifetime,
-            use_async_executor,
-        };
-
-        if (source_ptr->hasUpdateField() && shards > 1)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter does not supports for updatable source (UPDATE_FIELD)", full_name);
-
-        if (dictionary_key_type == DictionaryKeyType::Simple)
-        {
-            if (sparse)
-            {
-                if (shards > 1)
-                    return std::make_unique<HashedDictionary<DictionaryKeyType::Simple, true, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
-                else
-                    return std::make_unique<HashedDictionary<DictionaryKeyType::Simple, true, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
-            }
-            else
-            {
-                if (shards > 1)
-                    return std::make_unique<HashedDictionary<DictionaryKeyType::Simple, false, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
-                else
-                    return std::make_unique<HashedDictionary<DictionaryKeyType::Simple, false, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
-            }
-        }
-        else
-        {
-            if (sparse)
-            {
-                if (shards > 1)
-                    return std::make_unique<HashedDictionary<DictionaryKeyType::Complex, true, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
-                else
-                    return std::make_unique<HashedDictionary<DictionaryKeyType::Complex, true, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
-            }
-            else
-            {
-                if (shards > 1)
-                    return std::make_unique<HashedDictionary<DictionaryKeyType::Complex, false, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
-                else
-                    return std::make_unique<HashedDictionary<DictionaryKeyType::Complex, false, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
-            }
-        }
-    };
-
-    factory.registerLayout("hashed",
-        [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Simple, /* sparse = */ false); }, false);
-    factory.registerLayout("sparse_hashed",
-        [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Simple, /* sparse = */ true); }, false);
-    factory.registerLayout("complex_key_hashed",
-        [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Complex, /* sparse = */ false); }, true);
-    factory.registerLayout("complex_key_sparse_hashed",
-        [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Complex, /* sparse = */ true); }, true);
-
-}
+template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ false, /* sharded= */ false >;
+template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ false, /* sharded= */ true  >;
 
 }
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index 3302e667de4..376637189dd 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -1,26 +1,63 @@
 #pragma once
 
-#include <atomic>
-#include <memory>
-#include <variant>
-#include <optional>
-
-#include <Core/Block.h>
-
 #include <Dictionaries/DictionaryStructure.h>
 #include <Dictionaries/IDictionary.h>
 #include <Dictionaries/IDictionarySource.h>
 #include <Dictionaries/DictionaryHelpers.h>
+#include <Dictionaries/ClickHouseDictionarySource.h>
+#include <Dictionaries/DictionarySource.h>
+#include <Dictionaries/DictionarySourceHelpers.h>
+#include <Dictionaries/HierarchyDictionariesUtils.h>
 #include <Dictionaries/HashedDictionaryCollectionType.h>
+#include <Dictionaries/HashedDictionaryCollectionTraits.h>
+#include <Dictionaries/HashedDictionaryParallelLoader.h>
+
+#include <Core/Block.h>
+#include <Core/Defines.h>
+
+#include <Common/ArenaUtils.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/MemoryTrackerBlockerInThread.h>
+#include <Common/ThreadPool.h>
+#include <Common/logger_useful.h>
+#include <Common/scope_guard_safe.h>
+#include <Common/setThreadName.h>
+
+#include <DataTypes/DataTypesDecimal.h>
+
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNullable.h>
+#include <Functions/FunctionHelpers.h>
+
+#include <atomic>
+#include <memory>
+#include <variant>
+#include <optional>
+#include <numeric>
+
 
 /** This dictionary stores all content in a hash table in memory
   * (a separate Key -> Value map for each attribute)
   * Two variants of hash table are supported: a fast HashMap and memory efficient sparse_hash_map.
   */
 
+namespace CurrentMetrics
+{
+    extern const Metric HashedDictionaryThreads;
+    extern const Metric HashedDictionaryThreadsActive;
+    extern const Metric HashedDictionaryThreadsScheduled;
+}
+
 namespace DB
 {
 
+using namespace HashedDictionaryImpl;
+
+namespace ErrorCodes
+{
+    extern const int DICTIONARY_IS_EMPTY;
+}
+
 struct HashedDictionaryConfiguration
 {
     const UInt64 shards;
@@ -31,13 +68,10 @@ struct HashedDictionaryConfiguration
     bool use_async_executor = false;
 };
 
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
-class ParallelDictionaryLoader;
-
 template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
 class HashedDictionary final : public IDictionary
 {
-    friend class ParallelDictionaryLoader<dictionary_key_type, sparse, sharded>;
+    friend class HashedDictionaryParallelLoader<dictionary_key_type, sparse, sharded>;
 
 public:
     using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::Simple, UInt64, StringRef>;
@@ -135,7 +169,7 @@ public:
 
 private:
     template <typename Value>
-    using CollectionsHolder = std::vector<typename HashedDictionaryImpl::HashedDictionaryMapType<dictionary_key_type, sparse, KeyType, Value>::Type>;
+    using CollectionsHolder = std::vector<typename HashedDictionaryMapType<dictionary_key_type, sparse, KeyType, Value>::Type>;
 
     using NullableSet = HashSet<KeyType, DefaultHash<KeyType>>;
     using NullableSets = std::vector<NullableSet>;
@@ -233,18 +267,955 @@ private:
 
     BlockPtr update_field_loaded_block;
     std::vector<std::unique_ptr<Arena>> string_arenas;
-    std::vector<typename HashedDictionaryImpl::HashedDictionarySetType<dictionary_key_type, sparse, KeyType>::Type> no_attributes_containers;
+    std::vector<typename HashedDictionarySetType<dictionary_key_type, sparse, KeyType>::Type> no_attributes_containers;
     DictionaryHierarchicalParentToChildIndexPtr hierarchical_index;
 };
 
-extern template class HashedDictionary<DictionaryKeyType::Simple, false, /*sparse*/ false /*sharded*/>;
-extern template class HashedDictionary<DictionaryKeyType::Simple, false /*sparse*/, true /*sharded*/>;
-extern template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, false /*sharded*/>;
-extern template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, true /*sharded*/>;
+/// hashed
+extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ false, /* sharded= */ false >;
+extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ false, /* sharded= */ true  >;
+extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ false, /* sharded= */ false >;
+extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ false, /* sharded= */ true  >;
 
-extern template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, false /*sharded*/>;
-extern template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, true /*sharded*/>;
-extern template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, false /*sharded*/>;
-extern template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, true /*sharded*/>;
+/// sparse_hashed
+extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ true, /* sharded= */ false >;
+extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ true, /* sharded= */ true  >;
+extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ true, /* sharded= */ false >;
+extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ true, /* sharded= */ true  >;
+
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+HashedDictionary<dictionary_key_type, sparse, sharded>::HashedDictionary(
+    const StorageID & dict_id_,
+    const DictionaryStructure & dict_struct_,
+    DictionarySourcePtr source_ptr_,
+    const HashedDictionaryConfiguration & configuration_,
+    BlockPtr update_field_loaded_block_)
+    : IDictionary(dict_id_)
+    , log(&Poco::Logger::get("HashedDictionary"))
+    , dict_struct(dict_struct_)
+    , source_ptr(std::move(source_ptr_))
+    , configuration(configuration_)
+    , update_field_loaded_block(std::move(update_field_loaded_block_))
+{
+    createAttributes();
+    loadData();
+    buildHierarchyParentToChildIndexIfNeeded();
+    calculateBytesAllocated();
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+HashedDictionary<dictionary_key_type, sparse, sharded>::~HashedDictionary()
+{
+    /// Do a regular sequential destroy in case of non sharded dictionary
+    ///
+    /// Note, that even in non-sharded dictionaries you can have multiple hash
+    /// tables, since each attribute is stored in a separate hash table.
+    if constexpr (!sharded)
+        return;
+
+    size_t shards = std::max<size_t>(configuration.shards, 1);
+    ThreadPool pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards);
+
+    size_t hash_tables_count = 0;
+    auto schedule_destroy = [&hash_tables_count, &pool](auto & container)
+    {
+        if (container.empty())
+            return;
+
+        pool.trySchedule([&container, thread_group = CurrentThread::getGroup()]
+        {
+            SCOPE_EXIT_SAFE(
+                if (thread_group)
+                    CurrentThread::detachFromGroupIfNotDetached();
+            );
+
+            /// Do not account memory that was occupied by the dictionaries for the query/user context.
+            MemoryTrackerBlockerInThread memory_blocker;
+
+            if (thread_group)
+                CurrentThread::attachToGroupIfDetached(thread_group);
+            setThreadName("HashedDictDtor");
+
+            clearContainer(container);
+        });
+
+        ++hash_tables_count;
+    };
+
+    if (attributes.empty())
+    {
+        for (size_t shard = 0; shard < shards; ++shard)
+        {
+            schedule_destroy(no_attributes_containers[shard]);
+        }
+    }
+    else
+    {
+        for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
+        {
+            getAttributeContainers(attribute_index, [&](auto & containers)
+            {
+                for (size_t shard = 0; shard < shards; ++shard)
+                {
+                    schedule_destroy(containers[shard]);
+                }
+            });
+        }
+    }
+
+    LOG_TRACE(log, "Destroying {} non empty hash tables (using {} threads)", hash_tables_count, pool.getMaxThreads());
+    pool.wait();
+    LOG_TRACE(log, "Hash tables destroyed");
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr & result_type,
+    const Columns & key_columns,
+    const DataTypes & key_types [[maybe_unused]],
+    const ColumnPtr & default_values_column) const
+{
+    if (dictionary_key_type == DictionaryKeyType::Complex)
+        dict_struct.validateKeyTypes(key_types);
+
+    ColumnPtr result;
+
+    DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
+    DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
+
+    const size_t size = extractor.getKeysSize();
+
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
+    const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
+    auto & attribute = attributes[attribute_index];
+
+    bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
+
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (is_attribute_nullable)
+    {
+        col_null_map_to = ColumnUInt8::create(size, false);
+        vec_null_map_to = &col_null_map_to->getData();
+    }
+
+    auto type_call = [&](const auto & dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(dictionary_attribute.null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, size);
+
+        if constexpr (std::is_same_v<ValueType, Array>)
+        {
+            auto * out = column.get();
+
+            getItemsImpl<ValueType, false>(
+                attribute,
+                extractor,
+                [&](const size_t, const Array & value, bool) { out->insert(value); },
+                default_value_extractor);
+        }
+        else if constexpr (std::is_same_v<ValueType, StringRef>)
+        {
+            auto * out = column.get();
+
+            if (is_attribute_nullable)
+                getItemsImpl<ValueType, true>(
+                    attribute,
+                    extractor,
+                    [&](size_t row, StringRef value, bool is_null)
+                    {
+                        (*vec_null_map_to)[row] = is_null;
+                        out->insertData(value.data, value.size);
+                    },
+                    default_value_extractor);
+            else
+                getItemsImpl<ValueType, false>(
+                    attribute,
+                    extractor,
+                    [&](size_t, StringRef value, bool) { out->insertData(value.data, value.size); },
+                    default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+
+            if (is_attribute_nullable)
+                getItemsImpl<ValueType, true>(
+                    attribute,
+                    extractor,
+                    [&](size_t row, const auto value, bool is_null)
+                    {
+                        (*vec_null_map_to)[row] = is_null;
+                        out[row] = value;
+                    },
+                    default_value_extractor);
+            else
+                getItemsImpl<ValueType, false>(
+                    attribute,
+                    extractor,
+                    [&](size_t row, const auto value, bool) { out[row] = value; },
+                    default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    if (is_attribute_nullable)
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+
+    return result;
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
+{
+    if (dictionary_key_type == DictionaryKeyType::Complex)
+        dict_struct.validateKeyTypes(key_types);
+
+    DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
+    DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
+
+    size_t keys_size = extractor.getKeysSize();
+
+    auto result = ColumnUInt8::create(keys_size, false);
+    auto & out = result->getData();
+
+    size_t keys_found = 0;
+
+    if (unlikely(attributes.empty()))
+    {
+        for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
+        {
+            auto key = extractor.extractCurrentKey();
+            const auto & container = no_attributes_containers[getShard(key)];
+            out[requested_key_index] = container.find(key) != container.end();
+            keys_found += out[requested_key_index];
+            extractor.rollbackCurrentKey();
+        }
+
+        query_count.fetch_add(keys_size, std::memory_order_relaxed);
+        found_count.fetch_add(keys_found, std::memory_order_relaxed);
+        return result;
+    }
+
+    const auto & attribute = attributes.front();
+    bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
+
+    getAttributeContainers(0 /*attribute_index*/, [&](const auto & containers)
+    {
+        for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
+        {
+            auto key = extractor.extractCurrentKey();
+            auto shard = getShard(key);
+            const auto & container = containers[shard];
+
+            out[requested_key_index] = container.find(key) != container.end();
+            if (is_attribute_nullable && !out[requested_key_index])
+                out[requested_key_index] = (*attribute.is_nullable_sets)[shard].find(key) != nullptr;
+
+            keys_found += out[requested_key_index];
+
+            extractor.rollbackCurrentKey();
+        }
+    });
+
+    query_count.fetch_add(keys_size, std::memory_order_relaxed);
+    found_count.fetch_add(keys_found, std::memory_order_relaxed);
+
+    return result;
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const
+{
+    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
+    {
+        PaddedPODArray<UInt64> keys_backup_storage;
+        const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
+
+        size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
+
+        const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
+        const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
+
+        std::optional<UInt64> null_value;
+
+        if (!dictionary_attribute.null_value.isNull())
+            null_value = dictionary_attribute.null_value.get<UInt64>();
+
+        const CollectionsHolder<UInt64> & child_key_to_parent_key_maps = std::get<CollectionsHolder<UInt64>>(hierarchical_attribute.containers);
+
+        auto is_key_valid_func = [&](auto & hierarchy_key)
+        {
+            auto shard = getShard(hierarchy_key);
+
+            if (unlikely(hierarchical_attribute.is_nullable_sets) && (*hierarchical_attribute.is_nullable_sets)[shard].find(hierarchy_key))
+                return true;
+
+            const auto & map = child_key_to_parent_key_maps[shard];
+            return map.find(hierarchy_key) != map.end();
+        };
+
+        size_t keys_found = 0;
+
+        auto get_parent_func = [&](auto & hierarchy_key)
+        {
+            std::optional<UInt64> result;
+
+            const auto & map = child_key_to_parent_key_maps[getShard(hierarchy_key)];
+            auto it = map.find(hierarchy_key);
+            if (it == map.end())
+                return result;
+
+            UInt64 parent_key = getValueFromCell(it);
+            if (null_value && *null_value == parent_key)
+                return result;
+
+            result = parent_key;
+            keys_found += 1;
+
+            return result;
+        };
+
+        auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func);
+
+        query_count.fetch_add(keys.size(), std::memory_order_relaxed);
+        found_count.fetch_add(keys_found, std::memory_order_relaxed);
+
+        return dictionary_hierarchy_array;
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::isInHierarchy(
+    ColumnPtr key_column [[maybe_unused]],
+    ColumnPtr in_key_column [[maybe_unused]],
+    const DataTypePtr &) const
+{
+    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
+    {
+        if (key_column->isNullable())
+            key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
+
+        PaddedPODArray<UInt64> keys_backup_storage;
+        const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
+
+        PaddedPODArray<UInt64> keys_in_backup_storage;
+        const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
+
+        size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
+
+        const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
+        auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
+
+        std::optional<UInt64> null_value;
+
+        if (!dictionary_attribute.null_value.isNull())
+            null_value = dictionary_attribute.null_value.get<UInt64>();
+
+        const CollectionsHolder<UInt64> & child_key_to_parent_key_maps = std::get<CollectionsHolder<UInt64>>(hierarchical_attribute.containers);
+
+        auto is_key_valid_func = [&](auto & hierarchy_key)
+        {
+            auto shard = getShard(hierarchy_key);
+
+            if (unlikely(hierarchical_attribute.is_nullable_sets) && (*hierarchical_attribute.is_nullable_sets)[shard].find(hierarchy_key))
+                return true;
+
+            const auto & map = child_key_to_parent_key_maps[shard];
+            return map.find(hierarchy_key) != map.end();
+        };
+
+        size_t keys_found = 0;
+
+        auto get_parent_key_func = [&](auto & hierarchy_key)
+        {
+            std::optional<UInt64> result;
+
+            const auto & map = child_key_to_parent_key_maps[getShard(hierarchy_key)];
+            auto it = map.find(hierarchy_key);
+            if (it == map.end())
+                return result;
+
+            UInt64 parent_key = getValueFromCell(it);
+            if (null_value && *null_value == parent_key)
+                return result;
+
+            result = parent_key;
+            keys_found += 1;
+
+            return result;
+        };
+
+        auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
+
+        query_count.fetch_add(keys.size(), std::memory_order_relaxed);
+        found_count.fetch_add(keys_found, std::memory_order_relaxed);
+
+        return result;
+    }
+    else
+        return nullptr;
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+DictionaryHierarchyParentToChildIndexPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getHierarchicalIndex() const
+{
+    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
+    {
+        if (hierarchical_index)
+            return hierarchical_index;
+
+        size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
+        const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
+        const CollectionsHolder<UInt64> & child_key_to_parent_key_maps = std::get<CollectionsHolder<UInt64>>(hierarchical_attribute.containers);
+
+        size_t size = 0;
+        for (const auto & map : child_key_to_parent_key_maps)
+            size += map.size();
+
+        DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child;
+        parent_to_child.reserve(size);
+
+        for (const auto & map : child_key_to_parent_key_maps)
+        {
+            for (const auto & [child_key, parent_key] : map)
+            {
+                parent_to_child[parent_key].emplace_back(child_key);
+            }
+        }
+
+        return std::make_shared<DictionaryHierarchicalParentToChildIndex>(parent_to_child);
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getDescendants(
+    ColumnPtr key_column [[maybe_unused]],
+    const DataTypePtr &,
+    size_t level [[maybe_unused]],
+    DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const
+{
+    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
+    {
+        PaddedPODArray<UInt64> keys_backup;
+        const auto & keys = getColumnVectorData(this, key_column, keys_backup);
+
+        size_t keys_found;
+        auto result = getKeysDescendantsArray(keys, *parent_to_child_index, level, keys_found);
+
+        query_count.fetch_add(keys.size(), std::memory_order_relaxed);
+        found_count.fetch_add(keys_found, std::memory_order_relaxed);
+
+        return result;
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
+{
+    const auto size = dict_struct.attributes.size();
+    attributes.reserve(size);
+
+    HashTableGrowerWithPrecalculationAndMaxLoadFactor grower(configuration.max_load_factor);
+
+    for (const auto & dictionary_attribute : dict_struct.attributes)
+    {
+        auto type_call = [&, this](const auto & dictionary_attribute_type)
+        {
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
+            using ValueType = DictionaryValueType<AttributeType>;
+
+            auto is_nullable_sets = dictionary_attribute.is_nullable ? std::make_optional<NullableSets>(configuration.shards) : std::optional<NullableSets>{};
+            if constexpr (IsBuiltinHashTable<typename CollectionsHolder<ValueType>::value_type>)
+            {
+                CollectionsHolder<ValueType> collections;
+                collections.reserve(configuration.shards);
+                for (size_t i = 0; i < configuration.shards; ++i)
+                    collections.emplace_back(grower);
+
+                Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), std::move(collections)};
+                attributes.emplace_back(std::move(attribute));
+            }
+            else
+            {
+                Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
+                for (auto & container : std::get<CollectionsHolder<ValueType>>(attribute.containers))
+                    container.max_load_factor(configuration.max_load_factor);
+                attributes.emplace_back(std::move(attribute));
+            }
+
+            if constexpr (IsBuiltinHashTable<typename CollectionsHolder<ValueType>::value_type>)
+                LOG_TRACE(log, "Using builtin hash table for {} attribute", dictionary_attribute.name);
+            else
+                LOG_TRACE(log, "Using sparsehash for {} attribute", dictionary_attribute.name);
+        };
+
+        callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
+    }
+
+    if (unlikely(attributes.size()) == 0)
+    {
+        no_attributes_containers.reserve(configuration.shards);
+        for (size_t i = 0; i < configuration.shards; ++i)
+            no_attributes_containers.emplace_back(grower);
+    }
+
+    string_arenas.resize(configuration.shards);
+    for (auto & arena : string_arenas)
+        arena = std::make_unique<Arena>();
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::updateData()
+{
+    /// NOTE: updateData() does not preallocation since it may increase memory usage.
+
+    if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
+    {
+        QueryPipeline pipeline(source_ptr->loadUpdatedAll());
+        DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor);
+        update_field_loaded_block.reset();
+        Block block;
+
+        while (executor.pull(block))
+        {
+            if (!block.rows())
+                continue;
+
+            convertToFullIfSparse(block);
+
+            /// We are using this to keep saved data if input stream consists of multiple blocks
+            if (!update_field_loaded_block)
+                update_field_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
+
+            for (size_t attribute_index = 0; attribute_index < block.columns(); ++attribute_index)
+            {
+                const IColumn & update_column = *block.getByPosition(attribute_index).column.get();
+                MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(attribute_index).column->assumeMutable();
+                saved_column->insertRangeFrom(update_column, 0, update_column.size());
+            }
+        }
+    }
+    else
+    {
+        auto pipe = source_ptr->loadUpdatedAll();
+        mergeBlockWithPipe<dictionary_key_type>(
+            dict_struct.getKeysSize(),
+            *update_field_loaded_block,
+            std::move(pipe));
+    }
+
+    if (update_field_loaded_block)
+    {
+        resize(update_field_loaded_block->rows());
+        DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
+        blockToAttributes(*update_field_loaded_block.get(), arena_holder, /* shard= */ 0);
+    }
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(const Block & block, DictionaryKeysArenaHolder<dictionary_key_type> & arena_holder, UInt64 shard)
+{
+    size_t skip_keys_size_offset = dict_struct.getKeysSize();
+    size_t new_element_count = 0;
+
+    Columns key_columns;
+    key_columns.reserve(skip_keys_size_offset);
+
+    /// Split into keys columns and attribute columns
+    for (size_t i = 0; i < skip_keys_size_offset; ++i)
+        key_columns.emplace_back(block.safeGetByPosition(i).column);
+
+    DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
+    const size_t keys_size = keys_extractor.getKeysSize();
+
+    Field column_value_to_insert;
+
+    size_t attributes_size = attributes.size();
+
+    if (unlikely(attributes_size == 0))
+    {
+        for (size_t key_index = 0; key_index < keys_size; ++key_index)
+        {
+            auto key = keys_extractor.extractCurrentKey();
+
+            if constexpr (std::is_same_v<KeyType, StringRef>)
+                key = copyStringInArena(*string_arenas[shard], key);
+
+            no_attributes_containers[shard].insert(key);
+            keys_extractor.rollbackCurrentKey();
+            ++new_element_count;
+        }
+
+        element_count += new_element_count;
+        return;
+    }
+
+    for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
+    {
+        const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column;
+        auto & attribute = attributes[attribute_index];
+        bool attribute_is_nullable = attribute.is_nullable_sets.has_value();
+
+        /// Number of elements should not take into account multiple attributes.
+        new_element_count = 0;
+
+        getAttributeContainers(attribute_index, [&](auto & containers)
+        {
+            using ContainerType = std::decay_t<decltype(containers.front())>;
+            using AttributeValueType = typename ContainerType::mapped_type;
+
+            for (size_t key_index = 0; key_index < keys_size; ++key_index)
+            {
+                auto key = keys_extractor.extractCurrentKey();
+                auto & container = containers[shard];
+
+                auto it = container.find(key);
+                bool key_is_nullable_and_already_exists = attribute_is_nullable && (*attribute.is_nullable_sets)[shard].find(key) != nullptr;
+
+                if (key_is_nullable_and_already_exists || it != container.end())
+                {
+                    keys_extractor.rollbackCurrentKey();
+                    continue;
+                }
+
+                if constexpr (std::is_same_v<KeyType, StringRef>)
+                    key = copyStringInArena(*string_arenas[shard], key);
+
+                attribute_column.get(key_index, column_value_to_insert);
+
+                if (attribute_is_nullable && column_value_to_insert.isNull())
+                {
+                    (*attribute.is_nullable_sets)[shard].insert(key);
+                    ++new_element_count;
+                    keys_extractor.rollbackCurrentKey();
+                    continue;
+                }
+
+                if constexpr (std::is_same_v<AttributeValueType, StringRef>)
+                {
+                    String & value_to_insert = column_value_to_insert.get<String>();
+                    StringRef arena_value = copyStringInArena(*string_arenas[shard], value_to_insert);
+                    container.insert({key, arena_value});
+                }
+                else
+                {
+                    auto value_to_insert = static_cast<AttributeValueType>(column_value_to_insert.get<AttributeValueType>());
+                    container.insert({key, value_to_insert});
+                }
+
+                ++new_element_count;
+                keys_extractor.rollbackCurrentKey();
+            }
+
+            keys_extractor.reset();
+        });
+    }
+
+    element_count += new_element_count;
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::resize(size_t added_rows)
+{
+    if (unlikely(!added_rows))
+        return;
+
+    /// In multi shards configuration it is pointless.
+    if constexpr (sharded)
+        return;
+
+    size_t attributes_size = attributes.size();
+
+    if (unlikely(attributes_size == 0))
+    {
+        size_t reserve_size = added_rows + no_attributes_containers.front().size();
+        resizeContainer(no_attributes_containers.front(), reserve_size);
+        return;
+    }
+
+    for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
+    {
+        getAttributeContainers(attribute_index, [added_rows](auto & containers)
+        {
+            auto & container = containers.front();
+            size_t reserve_size = added_rows + container.size();
+            resizeContainer(container, reserve_size);
+        });
+    }
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
+    const Attribute & attribute,
+    DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
+    ValueSetter && set_value [[maybe_unused]],
+    DefaultValueExtractor & default_value_extractor) const
+{
+    const auto & attribute_containers = std::get<CollectionsHolder<AttributeType>>(attribute.containers);
+    const size_t keys_size = keys_extractor.getKeysSize();
+
+    size_t keys_found = 0;
+
+    for (size_t key_index = 0; key_index < keys_size; ++key_index)
+    {
+        auto key = keys_extractor.extractCurrentKey();
+        auto shard = getShard(key);
+
+        const auto & container = attribute_containers[getShard(key)];
+        const auto it = container.find(key);
+
+        if (it != container.end())
+        {
+            set_value(key_index, getValueFromCell(it), false);
+            ++keys_found;
+        }
+        else
+        {
+            if constexpr (is_nullable)
+            {
+                bool is_value_nullable = ((*attribute.is_nullable_sets)[shard].find(key) != nullptr) || default_value_extractor.isNullAt(key_index);
+                set_value(key_index, default_value_extractor[key_index], is_value_nullable);
+            }
+            else
+            {
+                set_value(key_index, default_value_extractor[key_index], false);
+            }
+        }
+
+        keys_extractor.rollbackCurrentKey();
+    }
+
+    query_count.fetch_add(keys_size, std::memory_order_relaxed);
+    found_count.fetch_add(keys_found, std::memory_order_relaxed);
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::loadData()
+{
+    if (!source_ptr->hasUpdateField())
+    {
+        std::optional<HashedDictionaryParallelLoader<dictionary_key_type, sparse, sharded>> parallel_loader;
+        if constexpr (sharded)
+            parallel_loader.emplace(*this);
+
+        QueryPipeline pipeline = QueryPipeline(source_ptr->loadAll());
+
+        DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor);
+        Block block;
+        DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
+
+        while (executor.pull(block))
+        {
+            resize(block.rows());
+            if (parallel_loader)
+                parallel_loader->addBlock(block);
+            else
+                blockToAttributes(block, arena_holder, /* shard= */ 0);
+        }
+
+        if (parallel_loader)
+            parallel_loader->finish();
+    }
+    else
+    {
+        updateData();
+    }
+
+    if (configuration.require_nonempty && 0 == element_count)
+        throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY,
+            "{}: dictionary source is empty and 'require_nonempty' property is set.",
+            getFullName());
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::buildHierarchyParentToChildIndexIfNeeded()
+{
+    if (!dict_struct.hierarchical_attribute_index)
+        return;
+
+    if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional)
+        hierarchical_index = getHierarchicalIndex();
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAllocated()
+{
+    size_t attributes_size = attributes.size();
+    bytes_allocated += attributes_size * sizeof(attributes.front());
+
+    for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
+    {
+        /// bucket_count should be a sum over all shards (CollectionsHolder),
+        /// but it should not be a sum over all attributes, since it is used to
+        /// calculate load_factor like this:
+        ///
+        ///    element_count / bucket_count
+        ///
+        /// While element_count is a sum over all shards, not over all attributes.
+        bucket_count = 0;
+
+        getAttributeContainers(attribute_index, [&](const auto & containers)
+        {
+            for (const auto & container : containers)
+            {
+                bytes_allocated += sizeof(container);
+                bytes_allocated += getBufferSizeInBytes(container);
+                bucket_count += getBufferSizeInCells(container);
+            }
+        });
+
+        const auto & attribute = attributes[attribute_index];
+        bytes_allocated += sizeof(attribute.is_nullable_sets);
+
+        if (attribute.is_nullable_sets.has_value())
+        {
+            for (auto & is_nullable_set : *attribute.is_nullable_sets)
+                bytes_allocated += is_nullable_set.getBufferSizeInBytes();
+        }
+    }
+
+    if (unlikely(attributes_size == 0))
+    {
+        for (const auto & container : no_attributes_containers)
+        {
+            bytes_allocated += sizeof(container);
+            bytes_allocated += getBufferSizeInBytes(container);
+            bucket_count += getBufferSizeInCells(container);
+        }
+    }
+
+    if (update_field_loaded_block)
+        bytes_allocated += update_field_loaded_block->allocatedBytes();
+
+    if (hierarchical_index)
+    {
+        hierarchical_index_bytes_allocated = hierarchical_index->getSizeInBytes();
+        bytes_allocated += hierarchical_index_bytes_allocated;
+    }
+
+    for (const auto & arena : string_arenas)
+        bytes_allocated += arena->allocatedBytes();
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+Pipe HashedDictionary<dictionary_key_type, sparse, sharded>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
+{
+    PaddedPODArray<HashedDictionary::KeyType> keys;
+
+    /// NOTE: could read multiple shards in parallel
+    if (!attributes.empty())
+    {
+        const auto & attribute = attributes.front();
+
+        getAttributeContainers(0 /*attribute_index*/, [&](auto & containers)
+        {
+            for (const auto & container : containers)
+            {
+                keys.reserve(container.size());
+
+                for (const auto & [key, _] : container)
+                {
+                    keys.emplace_back(key);
+                }
+            }
+        });
+
+        if (attribute.is_nullable_sets)
+        {
+            for (auto & is_nullable_set : *attribute.is_nullable_sets)
+            {
+                keys.reserve(is_nullable_set.size());
+
+                for (auto & node : is_nullable_set)
+                    keys.emplace_back(node.getKey());
+            }
+        }
+    }
+    else
+    {
+        for (const auto & container : no_attributes_containers)
+        {
+            keys.reserve(keys.size() + container.size());
+
+            for (const auto & key : container)
+                keys.emplace_back(getSetKeyFromCell(key));
+        }
+    }
+
+    ColumnsWithTypeAndName key_columns;
+
+    if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
+    {
+        auto keys_column = getColumnFromPODArray(std::move(keys));
+        key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
+    }
+    else
+    {
+        key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
+    }
+
+    std::shared_ptr<const IDictionary> dictionary = shared_from_this();
+    auto coordinator = std::make_shared<DictionarySourceCoordinator>(dictionary, column_names, std::move(key_columns), max_block_size);
+    auto result = coordinator->read(num_streams);
+
+    return result;
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+template <typename GetContainersFunc>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func)
+{
+    assert(attribute_index < attributes.size());
+
+    auto & attribute = attributes[attribute_index];
+
+    auto type_call = [&](const auto & dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+
+        auto & attribute_containers = std::get<CollectionsHolder<ValueType>>(attribute.containers);
+        std::forward<GetContainersFunc>(get_containers_func)(attribute_containers);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+}
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+template <typename GetContainersFunc>
+void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func) const
+{
+    const_cast<std::decay_t<decltype(*this)> *>(this)->getAttributeContainers(attribute_index, [&](auto & attribute_containers)
+    {
+        std::forward<GetContainersFunc>(get_containers_func)(attribute_containers);
+    });
+}
 
 }
diff --git a/src/Dictionaries/HashedDictionaryCollectionTraits.h b/src/Dictionaries/HashedDictionaryCollectionTraits.h
index 4989664890a..50a3059aca2 100644
--- a/src/Dictionaries/HashedDictionaryCollectionTraits.h
+++ b/src/Dictionaries/HashedDictionaryCollectionTraits.h
@@ -7,10 +7,7 @@
 #include <Common/HashTable/HashSet.h>
 #include <Common/HashTable/PackedHashMap.h>
 
-namespace DB
-{
-
-namespace HashedDictionaryImpl
+namespace DB::HashedDictionaryImpl
 {
 
 /// sparse_hash_map/sparse_hash_set
@@ -103,5 +100,3 @@ template <typename C> void clearContainer(C & c) requires (IsBuiltinHashTable<C>
 // NOLINTEND(*)
 
 }
-
-}
diff --git a/src/Dictionaries/HashedDictionaryCollectionType.h b/src/Dictionaries/HashedDictionaryCollectionType.h
index 0746d42bd1f..4a0085a14b3 100644
--- a/src/Dictionaries/HashedDictionaryCollectionType.h
+++ b/src/Dictionaries/HashedDictionaryCollectionType.h
@@ -9,10 +9,7 @@
 #include <sparsehash/sparse_hash_set>
 #include <type_traits>
 
-namespace DB
-{
-
-namespace HashedDictionaryImpl
+namespace DB::HashedDictionaryImpl
 {
 
 /// Return true if the type is POD [1] for the purpose of layout (this is not
@@ -258,5 +255,3 @@ struct HashedDictionarySetType<dictionary_key_type, /* sparse= */ true, Key>
 };
 
 }
-
-}
diff --git a/src/Dictionaries/HashedDictionaryParallelLoader.h b/src/Dictionaries/HashedDictionaryParallelLoader.h
new file mode 100644
index 00000000000..b52158c7fcb
--- /dev/null
+++ b/src/Dictionaries/HashedDictionaryParallelLoader.h
@@ -0,0 +1,189 @@
+#pragma once
+
+#include <Dictionaries/IDictionary.h>
+#include <Common/CurrentThread.h>
+#include <Common/scope_guard_safe.h>
+#include <Common/ConcurrentBoundedQueue.h>
+#include <Common/ThreadPool.h>
+#include <Common/MemoryTrackerBlockerInThread.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/setThreadName.h>
+#include <Core/Block.h>
+#include <base/types.h>
+#include <boost/noncopyable.hpp>
+#include <numeric>
+#include <optional>
+#include <vector>
+
+namespace CurrentMetrics
+{
+    extern const Metric HashedDictionaryThreads;
+    extern const Metric HashedDictionaryThreadsActive;
+    extern const Metric HashedDictionaryThreadsScheduled;
+}
+
+namespace DB
+{
+
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded> class HashedDictionary;
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+}
+
+namespace DB::HashedDictionaryImpl
+{
+
+/// Implementation parallel dictionary load for SHARDS
+template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+class HashedDictionaryParallelLoader : public boost::noncopyable
+{
+    using HashedDictionary = HashedDictionary<dictionary_key_type, sparse, sharded>;
+
+public:
+    explicit HashedDictionaryParallelLoader(HashedDictionary & dictionary_)
+        : dictionary(dictionary_)
+        , shards(dictionary.configuration.shards)
+        , pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards)
+        , shards_queues(shards)
+    {
+        UInt64 backlog = dictionary.configuration.shard_load_queue_backlog;
+        LOG_TRACE(dictionary.log, "Will load the dictionary using {} threads (with {} backlog)", shards, backlog);
+
+        shards_slots.resize(shards);
+        std::iota(shards_slots.begin(), shards_slots.end(), 0);
+
+        for (size_t shard = 0; shard < shards; ++shard)
+        {
+            shards_queues[shard].emplace(backlog);
+            pool.scheduleOrThrowOnError([this, shard, thread_group = CurrentThread::getGroup()]
+            {
+                SCOPE_EXIT_SAFE(
+                    if (thread_group)
+                        CurrentThread::detachFromGroupIfNotDetached();
+                );
+
+                /// Do not account memory that was occupied by the dictionaries for the query/user context.
+                MemoryTrackerBlockerInThread memory_blocker;
+
+                if (thread_group)
+                    CurrentThread::attachToGroupIfDetached(thread_group);
+                setThreadName("HashedDictLoad");
+
+                threadWorker(shard);
+            });
+        }
+    }
+
+    void addBlock(Block block)
+    {
+        IColumn::Selector selector = createShardSelector(block, shards_slots);
+        Blocks shards_blocks = splitBlock(selector, block);
+
+        for (size_t shard = 0; shard < shards; ++shard)
+        {
+            if (!shards_queues[shard]->push(std::move(shards_blocks[shard])))
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to shards queue #{}", shard);
+        }
+    }
+
+    void finish()
+    {
+        for (auto & queue : shards_queues)
+            queue->finish();
+
+        Stopwatch watch;
+        pool.wait();
+        UInt64 elapsed_ms = watch.elapsedMilliseconds();
+        LOG_TRACE(dictionary.log, "Processing the tail took {}ms", elapsed_ms);
+    }
+
+    ~HashedDictionaryParallelLoader()
+    {
+        try
+        {
+            for (auto & queue : shards_queues)
+                queue->clearAndFinish();
+
+            /// NOTE: It is OK to not pass the exception next, since on success finish() should be called which will call wait()
+            pool.wait();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(dictionary.log, "Exception had been thrown during parallel load of the dictionary");
+        }
+    }
+
+private:
+    HashedDictionary & dictionary;
+    const size_t shards;
+    ThreadPool pool;
+    std::vector<std::optional<ConcurrentBoundedQueue<Block>>> shards_queues;
+    std::vector<UInt64> shards_slots;
+    DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
+
+    void threadWorker(size_t shard)
+    {
+        Block block;
+        DictionaryKeysArenaHolder<dictionary_key_type> arena_holder_;
+        auto & shard_queue = *shards_queues[shard];
+
+        while (shard_queue.pop(block))
+        {
+            Stopwatch watch;
+            dictionary.blockToAttributes(block, arena_holder_, shard);
+            UInt64 elapsed_ms = watch.elapsedMilliseconds();
+            if (elapsed_ms > 1'000)
+                LOG_TRACE(dictionary.log, "Block processing for shard #{} is slow {}ms (rows {}).", shard, elapsed_ms, block.rows());
+        }
+
+        if (!shard_queue.isFinished())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not pull non finished shards queue #{}", shard);
+    }
+
+    /// Split block to shards smaller block, using 'selector'.
+    Blocks splitBlock(const IColumn::Selector & selector, const Block & block)
+    {
+        Blocks out_blocks(shards);
+        for (size_t shard = 0; shard < shards; ++shard)
+            out_blocks[shard] = block.cloneEmpty();
+
+        size_t columns = block.columns();
+        for (size_t col = 0; col < columns; ++col)
+        {
+            MutableColumns splitted_columns = block.getByPosition(col).column->scatter(shards, selector);
+            for (size_t shard = 0; shard < shards; ++shard)
+                out_blocks[shard].getByPosition(col).column = std::move(splitted_columns[shard]);
+        }
+
+        return out_blocks;
+    }
+
+    IColumn::Selector createShardSelector(const Block & block, const std::vector<UInt64> & slots)
+    {
+        size_t num_rows = block.rows();
+        IColumn::Selector selector(num_rows);
+
+        size_t skip_keys_size_offset = dictionary.dict_struct.getKeysSize();
+        Columns key_columns;
+        key_columns.reserve(skip_keys_size_offset);
+        for (size_t i = 0; i < skip_keys_size_offset; ++i)
+            key_columns.emplace_back(block.safeGetByPosition(i).column);
+
+        DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
+        for (size_t i = 0; i < num_rows; ++i)
+        {
+            auto key = keys_extractor.extractCurrentKey();
+            size_t shard = dictionary.getShard(key);
+            selector[i] = slots[shard];
+            keys_extractor.rollbackCurrentKey();
+        }
+
+        return selector;
+    }
+};
+
+}
diff --git a/src/Dictionaries/SparseHashedDictionary.cpp b/src/Dictionaries/SparseHashedDictionary.cpp
new file mode 100644
index 00000000000..7c2321a167a
--- /dev/null
+++ b/src/Dictionaries/SparseHashedDictionary.cpp
@@ -0,0 +1,12 @@
+#include <Dictionaries/HashedDictionary.h>
+
+namespace DB
+{
+
+template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ true, /* sharded= */ false >;
+template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ true, /* sharded= */ true  >;
+
+template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ true, /* sharded= */ false >;
+template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ true, /* sharded= */ true  >;
+
+}
diff --git a/src/Dictionaries/registerHashedDictionary.cpp b/src/Dictionaries/registerHashedDictionary.cpp
new file mode 100644
index 00000000000..f511cad04b0
--- /dev/null
+++ b/src/Dictionaries/registerHashedDictionary.cpp
@@ -0,0 +1,132 @@
+#include <Dictionaries/HashedDictionary.h>
+#include <Dictionaries/DictionaryFactory.h>
+#include <Dictionaries/DictionarySourceHelpers.h>
+#include <Dictionaries/ClickHouseDictionarySource.h>
+
+#include <Interpreters/Context.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int UNSUPPORTED_METHOD;
+}
+
+void registerDictionaryHashed(DictionaryFactory & factory)
+{
+    auto create_layout = [](const std::string & full_name,
+                             const DictionaryStructure & dict_struct,
+                             const Poco::Util::AbstractConfiguration & config,
+                             const std::string & config_prefix,
+                             DictionarySourcePtr source_ptr,
+                             ContextPtr global_context,
+                             DictionaryKeyType dictionary_key_type,
+                             bool sparse) -> DictionaryPtr
+    {
+        if (dictionary_key_type == DictionaryKeyType::Simple && dict_struct.key)
+            throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for simple key hashed dictionary");
+        else if (dictionary_key_type == DictionaryKeyType::Complex && dict_struct.id)
+            throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for complex key hashed dictionary");
+
+        if (dict_struct.range_min || dict_struct.range_max)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "{}: elements .structure.range_min and .structure.range_max should be defined only "
+                "for a dictionary of layout 'range_hashed'",
+                full_name);
+
+        const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
+        const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
+        const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
+
+        std::string dictionary_layout_name;
+
+        if (dictionary_key_type == DictionaryKeyType::Simple)
+            dictionary_layout_name = sparse ? "sparse_hashed" : "hashed";
+        else
+            dictionary_layout_name = sparse ? "complex_key_sparse_hashed" : "complex_key_hashed";
+
+        const std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name;
+        const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false);
+        if (preallocate)
+            LOG_WARNING(&Poco::Logger::get("HashedDictionary"), "'prellocate' attribute is obsolete, consider looking at 'shards'");
+
+        Int64 shards = config.getInt(config_prefix + dictionary_layout_prefix + ".shards", 1);
+        if (shards <= 0 || shards > 128)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [1, 128]", full_name);
+
+        Int64 shard_load_queue_backlog = config.getInt(config_prefix + dictionary_layout_prefix + ".shard_load_queue_backlog", 10000);
+        if (shard_load_queue_backlog <= 0)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name);
+
+        float max_load_factor = static_cast<float>(config.getDouble(config_prefix + dictionary_layout_prefix + ".max_load_factor", 0.5));
+        if (max_load_factor < 0.5f || max_load_factor > 0.99f)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: max_load_factor parameter should be within [0.5, 0.99], got {}", full_name, max_load_factor);
+
+        ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
+        const auto & settings = context->getSettingsRef();
+
+        const auto * clickhouse_source = dynamic_cast<const ClickHouseDictionarySource *>(source_ptr.get());
+        bool use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor;
+
+        HashedDictionaryConfiguration configuration{
+            static_cast<UInt64>(shards),
+            static_cast<UInt64>(shard_load_queue_backlog),
+            max_load_factor,
+            require_nonempty,
+            dict_lifetime,
+            use_async_executor,
+        };
+
+        if (source_ptr->hasUpdateField() && shards > 1)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter does not supports for updatable source (UPDATE_FIELD)", full_name);
+
+        if (dictionary_key_type == DictionaryKeyType::Simple)
+        {
+            if (sparse)
+            {
+                if (shards > 1)
+                    return std::make_unique<HashedDictionary<DictionaryKeyType::Simple, true, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+                else
+                    return std::make_unique<HashedDictionary<DictionaryKeyType::Simple, true, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+            }
+            else
+            {
+                if (shards > 1)
+                    return std::make_unique<HashedDictionary<DictionaryKeyType::Simple, false, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+                else
+                    return std::make_unique<HashedDictionary<DictionaryKeyType::Simple, false, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+            }
+        }
+        else
+        {
+            if (sparse)
+            {
+                if (shards > 1)
+                    return std::make_unique<HashedDictionary<DictionaryKeyType::Complex, true, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+                else
+                    return std::make_unique<HashedDictionary<DictionaryKeyType::Complex, true, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+            }
+            else
+            {
+                if (shards > 1)
+                    return std::make_unique<HashedDictionary<DictionaryKeyType::Complex, false, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+                else
+                    return std::make_unique<HashedDictionary<DictionaryKeyType::Complex, false, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+            }
+        }
+    };
+
+    factory.registerLayout("hashed",
+        [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Simple, /* sparse = */ false); }, false);
+    factory.registerLayout("sparse_hashed",
+        [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Simple, /* sparse = */ true); }, false);
+    factory.registerLayout("complex_key_hashed",
+        [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Complex, /* sparse = */ false); }, true);
+    factory.registerLayout("complex_key_sparse_hashed",
+        [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Complex, /* sparse = */ true); }, true);
+
+}
+
+}
diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh
index c92f399849a..6b3fe86d310 100755
--- a/utils/check-style/check-large-objects.sh
+++ b/utils/check-style/check-large-objects.sh
@@ -10,7 +10,6 @@ TU_EXCLUDES=(
     FunctionsConversion
 
     RangeHashedDictionary
-    HashedDictionary
 
     Aggregator
 )

From 007353a2dda0f111e4a85c216c8e99eb76ee8d7b Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 22 Nov 2023 18:12:36 +0000
Subject: [PATCH 0935/1097] Add _size virtual column to
 s3/file/hdfs/url/azureBlobStorage engines

---
 src/Storages/HDFS/StorageHDFS.cpp             |   5 +-
 src/Storages/HDFS/StorageHDFS.h               |   1 +
 src/Storages/HDFS/StorageHDFSCluster.cpp      |   2 +-
 src/Storages/S3Queue/S3QueueSource.cpp        |   2 +-
 src/Storages/S3Queue/StorageS3Queue.cpp       |   2 +-
 src/Storages/StorageAzureBlob.cpp             |   8 +-
 src/Storages/StorageAzureBlob.h               |   1 +
 src/Storages/StorageAzureBlobCluster.cpp      |   2 +-
 src/Storages/StorageFile.cpp                  |   9 +-
 src/Storages/StorageS3.cpp                    | 695 +++++++++---------
 src/Storages/StorageS3.h                      |   1 +
 src/Storages/StorageS3Cluster.cpp             |   2 +-
 src/Storages/StorageURL.cpp                   |  10 +-
 src/Storages/StorageURL.h                     |   1 +
 src/Storages/StorageURLCluster.cpp            |   2 +-
 src/Storages/VirtualColumnUtils.cpp           |  16 +-
 src/Storages/VirtualColumnUtils.h             |   6 +-
 ..._file_engine_size_virtual_column.reference |  12 +
 .../02921_file_engine_size_virtual_column.sh  |  23 +
 ...rl_s3_engine_size_virtual_column.reference |  12 +
 ...02922_url_s3_engine_size_virtual_column.sh |  13 +
 ..._hdfs_engine_size_virtual_column.reference |   6 +
 .../02923_hdfs_engine_size_virtual_column.sh  |  15 +
 23 files changed, 489 insertions(+), 357 deletions(-)
 create mode 100644 tests/queries/0_stateless/02921_file_engine_size_virtual_column.reference
 create mode 100755 tests/queries/0_stateless/02921_file_engine_size_virtual_column.sh
 create mode 100644 tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.reference
 create mode 100755 tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.sh
 create mode 100644 tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.reference
 create mode 100755 tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.sh

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 7d845dac57d..a911862aa1c 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -238,7 +238,7 @@ StorageHDFS::StorageHDFS(
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
 namespace
@@ -540,6 +540,7 @@ bool HDFSSource::initialize()
     }
 
     current_path = path_with_info.path;
+    current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt;
 
     QueryPipelineBuilder builder;
     std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt;
@@ -613,7 +614,7 @@ Chunk HDFSSource::generate()
             if (input_format)
                 chunk_size = input_format->getApproxBytesReadForChunk();
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
-            VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, current_path);
+            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size);
             return chunk;
         }
 
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index ffbf4e93ff9..9e53f1bd87c 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -181,6 +181,7 @@ private:
     std::unique_ptr<QueryPipeline> pipeline;
     std::unique_ptr<PullingPipelineExecutor> reader;
     String current_path;
+    std::optional<size_t> current_file_size;
 
     /// Recreate ReadBuffer and PullingPipelineExecutor for each file.
     bool initialize();
diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp
index 83655b06cc8..bff22936e95 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ b/src/Storages/HDFS/StorageHDFSCluster.cpp
@@ -66,7 +66,7 @@ StorageHDFSCluster::StorageHDFSCluster(
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
 void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index 1afd17edbe1..27bec039f96 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -210,7 +210,7 @@ Chunk StorageS3QueueSource::generate()
                 file_status->processed_rows += chunk.getNumRows();
                 processed_rows_from_file += chunk.getNumRows();
 
-                VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath());
+                VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getKeyWithInfo().info->size);
                 return chunk;
             }
         }
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 99699aab709..c64bb32d7a7 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -149,7 +149,7 @@ StorageS3Queue::StorageS3Queue(
     createOrCheckMetadata(storage_metadata);
     setInMemoryMetadata(storage_metadata);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
     task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); });
 
     LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index b43f25b0fff..fd4678ade59 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -494,7 +494,7 @@ StorageAzureBlob::StorageAzureBlob(
     for (const auto & key : configuration.blobs_paths)
         objects.emplace_back(key);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
 void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &)
@@ -1011,7 +1011,11 @@ Chunk StorageAzureBlobSource::generate()
             if (const auto * input_format = reader.getInputFormat())
                 chunk_size = input_format->getApproxBytesReadForChunk();
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
-            VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, fs::path(container) / reader.getRelativePath());
+            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
+                chunk,
+                requested_virtual_columns,
+                fs::path(container) / reader.getRelativePath(),
+                reader.getRelativePathWithMetadata().metadata.size_bytes);
             return chunk;
         }
 
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index b97dee0caed..a80abce99f1 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -331,6 +331,7 @@ private:
         const String & getRelativePath() const { return relative_path_with_metadata.relative_path; }
         const RelativePathWithMetadata & getRelativePathWithMetadata() const { return relative_path_with_metadata; }
         const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
+        ReadBuffer & getReadBuffer() const { return *read_buf; }
 
     private:
         RelativePathWithMetadata relative_path_with_metadata;
diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp
index 7592a6e6acc..b8f95458379 100644
--- a/src/Storages/StorageAzureBlobCluster.cpp
+++ b/src/Storages/StorageAzureBlobCluster.cpp
@@ -57,7 +57,7 @@ StorageAzureBlobCluster::StorageAzureBlobCluster(
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
 void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index dd527a50794..9769ea09d80 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -859,7 +859,7 @@ void StorageFile::setStorageMetadata(CommonArguments args)
     storage_metadata.setComment(args.comment);
     setInMemoryMetadata(storage_metadata);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
 
@@ -1149,6 +1149,7 @@ public:
 
                             chassert(file_enumerator);
                             current_path = fmt::format("{}::{}", archive_reader->getPath(), *filename_override);
+                            current_file_size = file_enumerator->getFileInfo().uncompressed_size;
                             if (need_only_count && tryGetCountFromCache(current_archive_stat))
                                 continue;
 
@@ -1177,6 +1178,7 @@ public:
                 {
                     struct stat file_stat;
                     file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName());
+                    current_file_size = file_stat.st_size;
 
                     if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
                         continue;
@@ -1243,8 +1245,8 @@ public:
                 progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
 
                 /// Enrich with virtual columns.
-                VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(
-                    chunk, requested_virtual_columns, current_path, filename_override.has_value() ? &filename_override.value() : nullptr);
+                VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
+                    chunk, requested_virtual_columns, current_path, current_file_size, filename_override.has_value() ? &filename_override.value() : nullptr);
                 return chunk;
             }
 
@@ -1305,6 +1307,7 @@ private:
     StorageSnapshotPtr storage_snapshot;
     FilesIteratorPtr files_iterator;
     String current_path;
+    std::optional<size_t> current_file_size;
     struct stat current_archive_stat;
     std::optional<String> filename_override;
     Block sample_block;
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index bdbba5abd96..a00f28a733f 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1,90 +1,90 @@
-#include "config.h"
 #include <Common/ProfileEvents.h>
 #include "Parsers/ASTCreateQuery.h"
+#include "config.h"
 
 #if USE_AWS_S3
 
-#include <Common/isValidUTF8.h>
+#    include <Common/isValidUTF8.h>
 
-#include <IO/S3Common.h>
-#include <IO/S3/Requests.h>
-#include <IO/ParallelReadBuffer.h>
-#include <IO/SharedThreadPools.h>
+#    include <IO/ParallelReadBuffer.h>
+#    include <IO/S3/Requests.h>
+#    include <IO/S3Common.h>
+#    include <IO/SharedThreadPools.h>
 
-#include <Interpreters/TreeRewriter.h>
-#include <Interpreters/evaluateConstantExpression.h>
+#    include <Interpreters/TreeRewriter.h>
+#    include <Interpreters/evaluateConstantExpression.h>
 
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTInsertQuery.h>
+#    include <Parsers/ASTFunction.h>
+#    include <Parsers/ASTInsertQuery.h>
 
-#include <Storages/StorageFactory.h>
-#include <Storages/StorageS3.h>
-#include <Storages/StorageS3Settings.h>
-#include <Storages/StorageSnapshot.h>
-#include <Storages/PartitionedSink.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Storages/checkAndGetLiteralArgument.h>
-#include <Storages/StorageURL.h>
-#include <Storages/NamedCollectionsHelpers.h>
+#    include <Storages/NamedCollectionsHelpers.h>
+#    include <Storages/PartitionedSink.h>
+#    include <Storages/StorageFactory.h>
+#    include <Storages/StorageS3.h>
+#    include <Storages/StorageS3Settings.h>
+#    include <Storages/StorageSnapshot.h>
+#    include <Storages/StorageURL.h>
+#    include <Storages/VirtualColumnUtils.h>
+#    include <Storages/checkAndGetLiteralArgument.h>
 
-#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
-#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
-#include <Disks/ObjectStorages/StoredObject.h>
+#    include <Disks/IO/AsynchronousBoundedReadBuffer.h>
+#    include <Disks/IO/ReadBufferFromRemoteFSGather.h>
+#    include <Disks/ObjectStorages/StoredObject.h>
 
-#include <IO/ReadBufferFromS3.h>
-#include <IO/WriteBufferFromS3.h>
+#    include <IO/ReadBufferFromS3.h>
+#    include <IO/WriteBufferFromS3.h>
 
-#include <Formats/FormatFactory.h>
-#include <Formats/ReadSchemaUtils.h>
+#    include <Formats/FormatFactory.h>
+#    include <Formats/ReadSchemaUtils.h>
 
-#include <Processors/Transforms/AddingDefaultsTransform.h>
-#include <Processors/Transforms/ExtractColumnsTransform.h>
-#include <Processors/Formats/IOutputFormat.h>
-#include <Processors/Formats/IInputFormat.h>
-#include <Processors/Sources/ConstChunkGenerator.h>
+#    include <Processors/Formats/IInputFormat.h>
+#    include <Processors/Formats/IOutputFormat.h>
+#    include <Processors/Sources/ConstChunkGenerator.h>
+#    include <Processors/Transforms/AddingDefaultsTransform.h>
+#    include <Processors/Transforms/ExtractColumnsTransform.h>
 
-#include <QueryPipeline/QueryPipelineBuilder.h>
+#    include <QueryPipeline/QueryPipelineBuilder.h>
 
-#include <DataTypes/DataTypeString.h>
+#    include <DataTypes/DataTypeString.h>
 
-#include <aws/core/auth/AWSCredentials.h>
+#    include <aws/core/auth/AWSCredentials.h>
 
-#include <Common/NamedCollections/NamedCollections.h>
-#include <Common/parseGlobs.h>
-#include <Common/quoteString.h>
-#include <Common/CurrentMetrics.h>
+#    include <Common/CurrentMetrics.h>
+#    include <Common/NamedCollections/NamedCollections.h>
+#    include <Common/parseGlobs.h>
+#    include <Common/quoteString.h>
 
-#include <Processors/ISource.h>
-#include <Processors/Sinks/SinkToStorage.h>
-#include <QueryPipeline/Pipe.h>
-#include <filesystem>
+#    include <filesystem>
+#    include <Processors/ISource.h>
+#    include <Processors/Sinks/SinkToStorage.h>
+#    include <QueryPipeline/Pipe.h>
 
-#include <boost/algorithm/string.hpp>
+#    include <boost/algorithm/string.hpp>
 
-#ifdef __clang__
-#  pragma clang diagnostic push
-#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
-#endif
-#include <re2/re2.h>
-#ifdef __clang__
-#  pragma clang diagnostic pop
-#endif
+#    ifdef __clang__
+#        pragma clang diagnostic push
+#        pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#    endif
+#    include <re2/re2.h>
+#    ifdef __clang__
+#        pragma clang diagnostic pop
+#    endif
 
 namespace fs = std::filesystem;
 
 
 namespace CurrentMetrics
 {
-    extern const Metric StorageS3Threads;
-    extern const Metric StorageS3ThreadsActive;
-    extern const Metric StorageS3ThreadsScheduled;
+extern const Metric StorageS3Threads;
+extern const Metric StorageS3ThreadsActive;
+extern const Metric StorageS3ThreadsScheduled;
 }
 
 namespace ProfileEvents
 {
-    extern const Event S3DeleteObjects;
-    extern const Event S3ListObjects;
-    extern const Event EngineFileLikeReadFiles;
+extern const Event S3DeleteObjects;
+extern const Event S3ListObjects;
+extern const Event EngineFileLikeReadFiles;
 }
 
 namespace DB
@@ -93,37 +93,36 @@ namespace DB
 static const std::unordered_set<std::string_view> required_configuration_keys = {
     "url",
 };
-static const std::unordered_set<std::string_view> optional_configuration_keys = {
-    "format",
-    "compression",
-    "compression_method",
-    "structure",
-    "access_key_id",
-    "secret_access_key",
-    "filename",
-    "use_environment_credentials",
-    "max_single_read_retries",
-    "min_upload_part_size",
-    "upload_part_size_multiply_factor",
-    "upload_part_size_multiply_parts_count_threshold",
-    "max_single_part_upload_size",
-    "max_connections",
-    "expiration_window_seconds",
-    "no_sign_request"
-};
+static const std::unordered_set<std::string_view> optional_configuration_keys
+    = {"format",
+       "compression",
+       "compression_method",
+       "structure",
+       "access_key_id",
+       "secret_access_key",
+       "filename",
+       "use_environment_credentials",
+       "max_single_read_retries",
+       "min_upload_part_size",
+       "upload_part_size_multiply_factor",
+       "upload_part_size_multiply_parts_count_threshold",
+       "max_single_part_upload_size",
+       "max_connections",
+       "expiration_window_seconds",
+       "no_sign_request"};
 
 namespace ErrorCodes
 {
-    extern const int CANNOT_PARSE_TEXT;
-    extern const int BAD_ARGUMENTS;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int S3_ERROR;
-    extern const int UNEXPECTED_EXPRESSION;
-    extern const int DATABASE_ACCESS_DENIED;
-    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-    extern const int NOT_IMPLEMENTED;
-    extern const int CANNOT_COMPILE_REGEXP;
-    extern const int FILE_DOESNT_EXIST;
+extern const int CANNOT_PARSE_TEXT;
+extern const int BAD_ARGUMENTS;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int S3_ERROR;
+extern const int UNEXPECTED_EXPRESSION;
+extern const int DATABASE_ACCESS_DENIED;
+extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+extern const int NOT_IMPLEMENTED;
+extern const int CANNOT_COMPILE_REGEXP;
+extern const int FILE_DOESNT_EXIST;
 }
 
 class IOutputFormat;
@@ -148,7 +147,8 @@ public:
         , virtual_columns(virtual_columns_)
         , read_keys(read_keys_)
         , request_settings(request_settings_)
-        , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
+        , list_objects_pool(
+              CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
         , list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
         , file_progress_callback(file_progress_callback_)
     {
@@ -174,8 +174,8 @@ public:
 
         matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
         if (!matcher->ok())
-            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
-                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
+            throw Exception(
+                ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
 
         recursive = globbed_uri.key == "/**" ? true : false;
         fillInternalBufferAssumeLocked();
@@ -187,15 +187,9 @@ public:
         return nextAssumeLocked();
     }
 
-    size_t objectsCount()
-    {
-        return buffer.size();
-    }
+    size_t objectsCount() { return buffer.size(); }
 
-    ~Impl()
-    {
-        list_objects_pool.wait();
-    }
+    ~Impl() { list_objects_pool.wait(); }
 
 private:
     using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome;
@@ -251,9 +245,13 @@ private:
 
         if (!outcome.IsSuccess())
         {
-            throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
-                            quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
-                            backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
+            throw S3Exception(
+                outcome.GetError().GetErrorType(),
+                "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
+                quoteString(request.GetBucket()),
+                quoteString(request.GetPrefix()),
+                backQuote(outcome.GetError().GetExceptionName()),
+                quoteString(outcome.GetError().GetMessage()));
         }
 
         const auto & result_batch = outcome.GetResult().GetContents();
@@ -280,8 +278,7 @@ private:
             String key = row.GetKey();
             if (recursive || re2::RE2::FullMatch(key, *matcher))
             {
-                S3::ObjectInfo info =
-                {
+                S3::ObjectInfo info = {
                     .size = size_t(row.GetSize()),
                     .last_modification_time = row.GetLastModified().Millis() / 1000,
                 };
@@ -298,7 +295,8 @@ private:
 
         if (!is_initialized)
         {
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(globbed_uri.bucket) / temp_buffer.front()->key, getContext());
+            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(
+                query, virtual_columns, fs::path(globbed_uri.bucket) / temp_buffer.front()->key, getContext());
             is_initialized = true;
         }
 
@@ -315,10 +313,8 @@ private:
         buffer = std::move(temp_buffer);
 
         if (file_progress_callback)
-        {
             for (const auto & key_with_info : buffer)
                 file_progress_callback(FileProgress(0, key_with_info->info->size));
-        }
 
         /// Set iterator only after the whole batch is processed
         buffer_iter = buffer.begin();
@@ -329,17 +325,19 @@ private:
 
     std::future<ListObjectsOutcome> listObjectsAsync()
     {
-        return list_objects_scheduler([this]
-        {
-            ProfileEvents::increment(ProfileEvents::S3ListObjects);
-            auto outcome = client->ListObjectsV2(request);
+        return list_objects_scheduler(
+            [this]
+            {
+                ProfileEvents::increment(ProfileEvents::S3ListObjects);
+                auto outcome = client->ListObjectsV2(request);
 
-            /// Outcome failure will be handled on the caller side.
-            if (outcome.IsSuccess())
-                request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
+                /// Outcome failure will be handled on the caller side.
+                if (outcome.IsSuccess())
+                    request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
 
-            return outcome;
-        }, Priority{});
+                return outcome;
+            },
+            Priority{});
     }
 
     std::mutex mutex;
@@ -376,7 +374,8 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
     KeysWithInfo * read_keys_,
     const S3Settings::RequestSettings & request_settings_,
     std::function<void(FileProgress)> file_progress_callback_)
-    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
+    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(
+        client_, globbed_uri_, query, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
 {
 }
 
@@ -429,10 +428,8 @@ public:
         }
 
         if (read_keys_)
-        {
             for (const auto & key : keys)
                 read_keys_->push_back(std::make_shared<KeyWithInfo>(key));
-        }
     }
 
     KeyWithInfoPtr next()
@@ -451,10 +448,7 @@ public:
         return std::make_shared<KeyWithInfo>(key, info);
     }
 
-    size_t objectsCount()
-    {
-        return keys.size();
-    }
+    size_t objectsCount() { return keys.size(); }
 
 private:
     Strings keys;
@@ -480,8 +474,7 @@ StorageS3Source::KeysIterator::KeysIterator(
     KeysWithInfo * read_keys,
     std::function<void(FileProgress)> file_progress_callback_)
     : pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(
-        client_, version_id_, keys_, bucket_, request_settings_,
-        query, virtual_columns_, context, read_keys, file_progress_callback_))
+        client_, version_id_, keys_, bucket_, request_settings_, query, virtual_columns_, context, read_keys, file_progress_callback_))
 {
 }
 
@@ -495,12 +488,13 @@ size_t StorageS3Source::KeysIterator::estimatedKeysCount()
     return pimpl->objectsCount();
 }
 
-StorageS3Source::ReadTaskIterator::ReadTaskIterator(
-    const DB::ReadTaskCallback & callback_,
-    size_t max_threads_count)
-    : callback(callback_)
+StorageS3Source::ReadTaskIterator::ReadTaskIterator(const DB::ReadTaskCallback & callback_, size_t max_threads_count) : callback(callback_)
 {
-    ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count);
+    ThreadPool pool(
+        CurrentMetrics::StorageS3Threads,
+        CurrentMetrics::StorageS3ThreadsActive,
+        CurrentMetrics::StorageS3ThreadsScheduled,
+        max_threads_count);
     auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "S3ReadTaskItr");
 
     std::vector<std::future<String>> keys;
@@ -565,7 +559,8 @@ StorageS3Source::StorageS3Source(
     , file_iterator(file_iterator_)
     , max_parsing_threads(max_parsing_threads_)
     , need_only_count(need_only_count_)
-    , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
+    , create_reader_pool(
+          CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
     , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "CreateS3Reader"))
 {
 }
@@ -592,13 +587,14 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader()
 
         if (!key_with_info->info)
             key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings);
-    }
-    while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0);
+    } while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0);
 
     QueryPipelineBuilder builder;
     std::shared_ptr<ISource> source;
     std::unique_ptr<ReadBuffer> read_buf;
-    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt;
+    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files
+        ? tryGetNumRowsFromCache(*key_with_info)
+        : std::nullopt;
     if (num_rows_from_cache)
     {
         /// We should not return single chunk with all number of rows,
@@ -647,10 +643,7 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader()
 
     /// Add ExtractColumnsTransform to extract requested columns/subcolumns
     /// from chunk read by IInputFormat.
-    builder.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<ExtractColumnsTransform>(header, requested_columns);
-    });
+    builder.addSimpleTransform([&](const Block & header) { return std::make_shared<ExtractColumnsTransform>(header, requested_columns); });
 
     auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
     auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
@@ -682,18 +675,25 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & k
     }
 
     return std::make_unique<ReadBufferFromS3>(
-        client, bucket, key, version_id, request_settings, read_settings,
-        /*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0,
-        /*restricted_seek_*/ false, object_size);
+        client,
+        bucket,
+        key,
+        version_id,
+        request_settings,
+        read_settings,
+        /*use_external_buffer*/ false,
+        /*offset_*/ 0,
+        /*read_until_position_*/ 0,
+        /*restricted_seek_*/ false,
+        object_size);
 }
 
-std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
-    const String & key, const ReadSettings & read_settings, size_t object_size)
+std::unique_ptr<ReadBuffer>
+StorageS3Source::createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size)
 {
     auto context = getContext();
     auto read_buffer_creator =
-        [this, read_settings, object_size]
-        (const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
+        [this, read_settings, object_size](const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
     {
         return std::make_unique<ReadBufferFromS3>(
             client,
@@ -702,10 +702,10 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
             version_id,
             request_settings,
             read_settings,
-            /* use_external_buffer */true,
-            /* offset */0,
+            /* use_external_buffer */ true,
+            /* offset */ 0,
             read_until_position,
-            /* restricted_seek */true,
+            /* restricted_seek */ true,
             object_size);
     };
 
@@ -713,7 +713,8 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
         std::move(read_buffer_creator),
         StoredObjects{StoredObject{key, object_size}},
         read_settings,
-        /* cache_log */nullptr, /* use_external_buffer */true);
+        /* cache_log */ nullptr,
+        /* use_external_buffer */ true);
 
     auto modified_settings{read_settings};
     /// FIXME: Changing this setting to default value breaks something around parquet reading
@@ -721,8 +722,7 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
 
     auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
     auto async_reader = std::make_unique<AsynchronousBoundedReadBuffer>(
-        std::move(s3_impl), pool_reader, modified_settings,
-        context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog());
+        std::move(s3_impl), pool_reader, modified_settings, context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog());
 
     async_reader->setReadUntilEnd();
     if (read_settings.remote_fs_prefetch)
@@ -763,7 +763,7 @@ Chunk StorageS3Source::generate()
             if (const auto * input_format = reader.getInputFormat())
                 chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk();
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
-            VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath());
+            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize());
             return chunk;
         }
 
@@ -798,10 +798,7 @@ std::optional<size_t> StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo
 {
     String source = fs::path(url_host_and_port) / bucket / key_with_info.key;
     auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext());
-    auto get_last_mod_time = [&]() -> std::optional<time_t>
-    {
-        return key_with_info.info->last_modification_time;
-    };
+    auto get_last_mod_time = [&]() -> std::optional<time_t> { return key_with_info.info->last_modification_time; };
 
     return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time);
 }
@@ -818,9 +815,7 @@ public:
         const StorageS3::Configuration & configuration_,
         const String & bucket,
         const String & key)
-        : SinkToStorage(sample_block_)
-        , sample_block(sample_block_)
-        , format_settings(format_settings_)
+        : SinkToStorage(sample_block_), sample_block(sample_block_), format_settings(format_settings_)
     {
         write_buf = wrapWriteBufferWithCompressionMethod(
             std::make_unique<WriteBufferFromS3>(
@@ -834,8 +829,7 @@ public:
                 context->getWriteSettings()),
             compression_method,
             3);
-        writer
-            = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings);
+        writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings);
     }
 
     String getName() const override { return "StorageS3Sink"; }
@@ -944,15 +938,7 @@ public:
         validateKey(partition_key);
 
         return std::make_shared<StorageS3Sink>(
-            format,
-            sample_block,
-            context,
-            format_settings,
-            compression_method,
-            configuration,
-            partition_bucket,
-            partition_key
-        );
+            format, sample_block, context, format_settings, compression_method, configuration, partition_bucket, partition_key);
     }
 
 private:
@@ -1027,7 +1013,8 @@ StorageS3::StorageS3(
     {
         /// We don't allow special columns in S3 storage.
         if (!columns_.hasOnlyOrdinary())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
         storage_metadata.setColumns(columns_);
     }
 
@@ -1035,7 +1022,7 @@ StorageS3::StorageS3(
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
 std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
@@ -1049,21 +1036,35 @@ std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
 {
     if (distributed_processing)
     {
-        return std::make_shared<StorageS3Source::ReadTaskIterator>(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
+        return std::make_shared<StorageS3Source::ReadTaskIterator>(
+            local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
     }
     else if (configuration.withGlobs())
     {
         /// Iterate through disclosed globs and make a source for each file
         return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-            *configuration.client, configuration.url, query, virtual_columns,
-            local_context, read_keys, configuration.request_settings, file_progress_callback);
+            *configuration.client,
+            configuration.url,
+            query,
+            virtual_columns,
+            local_context,
+            read_keys,
+            configuration.request_settings,
+            file_progress_callback);
     }
     else
     {
         return std::make_shared<StorageS3Source::KeysIterator>(
-            *configuration.client, configuration.url.version_id, configuration.keys,
-            configuration.url.bucket, configuration.request_settings, query,
-            virtual_columns, local_context, read_keys, file_progress_callback);
+            *configuration.client,
+            configuration.url.version_id,
+            configuration.keys,
+            configuration.url.bucket,
+            configuration.request_settings,
+            query,
+            virtual_columns,
+            local_context,
+            read_keys,
+            file_progress_callback);
     }
 }
 
@@ -1099,7 +1100,13 @@ Pipe StorageS3::read(
     Pipes pipes;
 
     std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
-        query_configuration, distributed_processing, local_context, query_info.query, virtual_columns, nullptr, local_context->getFileProgressCallback());
+        query_configuration,
+        distributed_processing,
+        local_context,
+        query_info.query,
+        virtual_columns,
+        nullptr,
+        local_context->getFileProgressCallback());
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     if (estimated_keys_count > 1)
@@ -1108,7 +1115,8 @@ Pipe StorageS3::read(
         /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
         num_streams = 1;
 
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
+    auto read_from_format_info
+        = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
     bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
         && local_context->getSettingsRef().optimize_count_from_files;
 
@@ -1141,7 +1149,8 @@ Pipe StorageS3::read(
     return Pipe::unitePipes(std::move(pipes));
 }
 
-SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
+SinkToStoragePtr
+StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
 {
     auto query_configuration = updateConfigurationAndGetCopy(local_context);
 
@@ -1168,12 +1177,20 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr
     else
     {
         if (query_configuration.withGlobs())
-            throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
-                            "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key);
+            throw Exception(
+                ErrorCodes::DATABASE_ACCESS_DENIED,
+                "S3 key '{}' contains globs, so the table is in readonly mode",
+                query_configuration.url.key);
 
         bool truncate_in_insert = local_context->getSettingsRef().s3_truncate_on_insert;
 
-        if (!truncate_in_insert && S3::objectExists(*query_configuration.client, query_configuration.url.bucket, query_configuration.keys.back(), query_configuration.url.version_id, query_configuration.request_settings))
+        if (!truncate_in_insert
+            && S3::objectExists(
+                *query_configuration.client,
+                query_configuration.url.bucket,
+                query_configuration.keys.back(),
+                query_configuration.url.version_id,
+                query_configuration.request_settings))
         {
             if (local_context->getSettingsRef().s3_create_new_file_on_insert)
             {
@@ -1183,10 +1200,15 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr
                 String new_key;
                 do
                 {
-                    new_key = first_key.substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : first_key.substr(pos));
+                    new_key
+                        = first_key.substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : first_key.substr(pos));
                     ++index;
-                }
-                while (S3::objectExists(*query_configuration.client, query_configuration.url.bucket, new_key, query_configuration.url.version_id, query_configuration.request_settings));
+                } while (S3::objectExists(
+                    *query_configuration.client,
+                    query_configuration.url.bucket,
+                    new_key,
+                    query_configuration.url.version_id,
+                    query_configuration.request_settings));
 
                 query_configuration.keys.push_back(new_key);
                 configuration.keys.push_back(new_key);
@@ -1198,7 +1220,8 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr
                     "Object in bucket {} with key {} already exists. "
                     "If you want to overwrite it, enable setting s3_truncate_on_insert, if you "
                     "want to create a new file on each insert, enable setting s3_create_new_file_on_insert",
-                    query_configuration.url.bucket, query_configuration.keys.back());
+                    query_configuration.url.bucket,
+                    query_configuration.keys.back());
             }
         }
 
@@ -1345,10 +1368,12 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur
     configuration.auth_settings.secret_access_key = collection.getOrDefault<String>("secret_access_key", "");
     configuration.auth_settings.use_environment_credentials = collection.getOrDefault<UInt64>("use_environment_credentials", 1);
     configuration.auth_settings.no_sign_request = collection.getOrDefault<bool>("no_sign_request", false);
-    configuration.auth_settings.expiration_window_seconds = collection.getOrDefault<UInt64>("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS);
+    configuration.auth_settings.expiration_window_seconds
+        = collection.getOrDefault<UInt64>("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS);
 
     configuration.format = collection.getOrDefault<String>("format", configuration.format);
-    configuration.compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
+    configuration.compression_method
+        = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
     configuration.structure = collection.getOrDefault<String>("structure", "auto");
 
     configuration.request_settings = S3Settings::RequestSettings(collection);
@@ -1378,9 +1403,10 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
         /// with optional headers() function
 
         if (engine_args.empty() || engine_args.size() > 5)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            "Storage S3 requires 1 to 5 arguments: "
-                            "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]");
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Storage S3 requires 1 to 5 arguments: "
+                "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]");
 
         auto * header_it = StorageURL::collectHeaders(engine_args, configuration.headers_from_ast, local_context);
         if (header_it != engine_args.end())
@@ -1390,11 +1416,8 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
             engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
 
         /// Size -> argument indexes
-        static std::unordered_map<size_t, std::unordered_map<std::string_view, size_t>> size_to_engine_args
-        {
-            {1, {{}}},
-            {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}}
-        };
+        static std::unordered_map<size_t, std::unordered_map<std::string_view, size_t>> size_to_engine_args{
+            {1, {{}}}, {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}}};
 
         std::unordered_map<std::string_view, size_t> engine_args_to_idx;
         bool no_sign_request = false;
@@ -1456,13 +1479,16 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
             configuration.format = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["format"]], "format");
 
         if (engine_args_to_idx.contains("compression_method"))
-            configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["compression_method"]], "compression_method");
+            configuration.compression_method
+                = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["compression_method"]], "compression_method");
 
         if (engine_args_to_idx.contains("access_key_id"))
-            configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id");
+            configuration.auth_settings.access_key_id
+                = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id");
 
         if (engine_args_to_idx.contains("secret_access_key"))
-            configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key");
+            configuration.auth_settings.secret_access_key
+                = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key");
 
         configuration.auth_settings.no_sign_request = no_sign_request;
     }
@@ -1478,105 +1504,109 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
 }
 
 ColumnsDescription StorageS3::getTableStructureFromData(
-    const StorageS3::Configuration & configuration,
-    const std::optional<FormatSettings> & format_settings,
-    ContextPtr ctx)
+    const StorageS3::Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
 {
     return getTableStructureFromDataImpl(configuration, format_settings, ctx);
 }
 
 namespace
 {
-    class ReadBufferIterator : public IReadBufferIterator, WithContext
+class ReadBufferIterator : public IReadBufferIterator, WithContext
+{
+public:
+    ReadBufferIterator(
+        std::shared_ptr<StorageS3Source::IIterator> file_iterator_,
+        const StorageS3Source::KeysWithInfo & read_keys_,
+        const StorageS3::Configuration & configuration_,
+        const std::optional<FormatSettings> & format_settings_,
+        const ContextPtr & context_)
+        : WithContext(context_)
+        , file_iterator(file_iterator_)
+        , read_keys(read_keys_)
+        , configuration(configuration_)
+        , format_settings(format_settings_)
+        , prev_read_keys_size(read_keys_.size())
     {
-    public:
-        ReadBufferIterator(
-            std::shared_ptr<StorageS3Source::IIterator> file_iterator_,
-            const StorageS3Source::KeysWithInfo & read_keys_,
-            const StorageS3::Configuration & configuration_,
-            const std::optional<FormatSettings> & format_settings_,
-            const ContextPtr & context_)
-            : WithContext(context_)
-            , file_iterator(file_iterator_)
-            , read_keys(read_keys_)
-            , configuration(configuration_)
-            , format_settings(format_settings_)
-            , prev_read_keys_size(read_keys_.size())
-        {
-        }
+    }
 
-        std::unique_ptr<ReadBuffer> next() override
+    std::unique_ptr<ReadBuffer> next() override
+    {
+        while (true)
         {
-            while (true)
+            current_key_with_info = (*file_iterator)();
+
+            if (!current_key_with_info || current_key_with_info->key.empty())
             {
-                current_key_with_info = (*file_iterator)();
+                if (first)
+                    throw Exception(
+                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                        "Cannot extract table structure from {} format file, because there are no files with provided path "
+                        "in S3 or all files are empty. You must specify table structure manually",
+                        configuration.format);
 
-                if (!current_key_with_info || current_key_with_info->key.empty())
-                {
-                    if (first)
-                        throw Exception(
-                            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                            "Cannot extract table structure from {} format file, because there are no files with provided path "
-                            "in S3 or all files are empty. You must specify table structure manually",
-                            configuration.format);
+                return nullptr;
+            }
 
+            /// S3 file iterator could get new keys after new iteration, check them in schema cache.
+            if (getContext()->getSettingsRef().schema_inference_use_cache_for_s3 && read_keys.size() > prev_read_keys_size)
+            {
+                columns_from_cache = StorageS3::tryGetColumnsFromCache(
+                    read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, getContext());
+                prev_read_keys_size = read_keys.size();
+                if (columns_from_cache)
                     return nullptr;
-                }
+            }
 
-                /// S3 file iterator could get new keys after new iteration, check them in schema cache.
-                if (getContext()->getSettingsRef().schema_inference_use_cache_for_s3 && read_keys.size() > prev_read_keys_size)
-                {
-                    columns_from_cache = StorageS3::tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, getContext());
-                    prev_read_keys_size = read_keys.size();
-                    if (columns_from_cache)
-                        return nullptr;
-                }
+            if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0)
+                continue;
 
-                if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0)
-                    continue;
-
-                int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
-                auto impl = std::make_unique<ReadBufferFromS3>(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings());
-                if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof())
-                {
-                    first = false;
-                    return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max);
-                }
+            int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
+            auto impl = std::make_unique<ReadBufferFromS3>(
+                configuration.client,
+                configuration.url.bucket,
+                current_key_with_info->key,
+                configuration.url.version_id,
+                configuration.request_settings,
+                getContext()->getReadSettings());
+            if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof())
+            {
+                first = false;
+                return wrapReadBufferWithCompressionMethod(
+                    std::move(impl),
+                    chooseCompressionMethod(current_key_with_info->key, configuration.compression_method),
+                    zstd_window_log_max);
             }
         }
+    }
 
-        std::optional<ColumnsDescription> getCachedColumns() override
-        {
-            return columns_from_cache;
-        }
+    std::optional<ColumnsDescription> getCachedColumns() override { return columns_from_cache; }
 
-        void setNumRowsToLastFile(size_t num_rows) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
-                return;
+    void setNumRowsToLastFile(size_t num_rows) override
+    {
+        if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
+            return;
 
-            String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key;
-            auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-            StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows);
-        }
+        String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort()))
+            / configuration.url.bucket / current_key_with_info->key;
+        auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+        StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows);
+    }
 
-    private:
-        std::shared_ptr<StorageS3Source::IIterator> file_iterator;
-        const StorageS3Source::KeysWithInfo & read_keys;
-        const StorageS3::Configuration & configuration;
-        const std::optional<FormatSettings> & format_settings;
-        std::optional<ColumnsDescription> columns_from_cache;
-        StorageS3Source::KeyWithInfoPtr current_key_with_info;
-        size_t prev_read_keys_size;
-        bool first = true;
-    };
+private:
+    std::shared_ptr<StorageS3Source::IIterator> file_iterator;
+    const StorageS3Source::KeysWithInfo & read_keys;
+    const StorageS3::Configuration & configuration;
+    const std::optional<FormatSettings> & format_settings;
+    std::optional<ColumnsDescription> columns_from_cache;
+    StorageS3Source::KeyWithInfoPtr current_key_with_info;
+    size_t prev_read_keys_size;
+    bool first = true;
+};
 
 }
 
 ColumnsDescription StorageS3::getTableStructureFromDataImpl(
-    const Configuration & configuration,
-    const std::optional<FormatSettings> & format_settings,
-    ContextPtr ctx)
+    const Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
 {
     KeysWithInfo read_keys;
 
@@ -1606,60 +1636,60 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
 
 void registerStorageS3Impl(const String & name, StorageFactory & factory)
 {
-    factory.registerStorage(name, [](const StorageFactory::Arguments & args)
-    {
-        auto & engine_args = args.engine_args;
-        if (engine_args.empty())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
-
-        auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext());
-        // Use format settings from global server context + settings from
-        // the SETTINGS clause of the create query. Settings from current
-        // session and user are ignored.
-        std::optional<FormatSettings> format_settings;
-        if (args.storage_def->settings)
+    factory.registerStorage(
+        name,
+        [](const StorageFactory::Arguments & args)
         {
-            FormatFactorySettings user_format_settings;
+            auto & engine_args = args.engine_args;
+            if (engine_args.empty())
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
 
-            // Apply changed settings from global context, but ignore the
-            // unknown ones, because we only have the format settings here.
-            const auto & changes = args.getContext()->getSettingsRef().changes();
-            for (const auto & change : changes)
+            auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext());
+            // Use format settings from global server context + settings from
+            // the SETTINGS clause of the create query. Settings from current
+            // session and user are ignored.
+            std::optional<FormatSettings> format_settings;
+            if (args.storage_def->settings)
             {
-                if (user_format_settings.has(change.name))
-                    user_format_settings.set(change.name, change.value);
+                FormatFactorySettings user_format_settings;
+
+                // Apply changed settings from global context, but ignore the
+                // unknown ones, because we only have the format settings here.
+                const auto & changes = args.getContext()->getSettingsRef().changes();
+                for (const auto & change : changes)
+                    if (user_format_settings.has(change.name))
+                        user_format_settings.set(change.name, change.value);
+
+                // Apply changes from SETTINGS clause, with validation.
+                user_format_settings.applyChanges(args.storage_def->settings->changes);
+                format_settings = getFormatSettings(args.getContext(), user_format_settings);
+            }
+            else
+            {
+                format_settings = getFormatSettings(args.getContext());
             }
 
-            // Apply changes from SETTINGS clause, with validation.
-            user_format_settings.applyChanges(args.storage_def->settings->changes);
-            format_settings = getFormatSettings(args.getContext(), user_format_settings);
-        }
-        else
+            ASTPtr partition_by;
+            if (args.storage_def->partition_by)
+                partition_by = args.storage_def->partition_by->clone();
+
+            return std::make_shared<StorageS3>(
+                std::move(configuration),
+                args.getContext(),
+                args.table_id,
+                args.columns,
+                args.constraints,
+                args.comment,
+                format_settings,
+                /* distributed_processing_ */ false,
+                partition_by);
+        },
         {
-            format_settings = getFormatSettings(args.getContext());
-        }
-
-        ASTPtr partition_by;
-        if (args.storage_def->partition_by)
-            partition_by = args.storage_def->partition_by->clone();
-
-        return std::make_shared<StorageS3>(
-            std::move(configuration),
-            args.getContext(),
-            args.table_id,
-            args.columns,
-            args.constraints,
-            args.comment,
-            format_settings,
-            /* distributed_processing_ */false,
-            partition_by);
-    },
-    {
-        .supports_settings = true,
-        .supports_sort_order = true, // for partition by
-        .supports_schema_inference = true,
-        .source_access_type = AccessType::S3,
-    });
+            .supports_settings = true,
+            .supports_sort_order = true, // for partition by
+            .supports_schema_inference = true,
+            .source_access_type = AccessType::S3,
+        });
 }
 
 void registerStorageS3(StorageFactory & factory)
@@ -1689,7 +1719,8 @@ bool StorageS3::supportsPartitionBy() const
 
 SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx)
 {
-    static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS));
+    static SchemaCache schema_cache(
+        ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS));
     return schema_cache;
 }
 
@@ -1716,14 +1747,15 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
                 /// but schema cache will handle this case and won't return columns from cache
                 /// because we can't say that it's valid without last modification time.
                 last_modification_time = S3::getObjectInfo(
-                    *configuration.client,
-                    configuration.url.bucket,
-                    (*it)->key,
-                    configuration.url.version_id,
-                    configuration.request_settings,
-                    /*with_metadata=*/ false,
-                    /*for_disk_s3=*/ false,
-                    /*throw_on_error= */ false).last_modification_time;
+                                             *configuration.client,
+                                             configuration.url.bucket,
+                                             (*it)->key,
+                                             configuration.url.version_id,
+                                             configuration.request_settings,
+                                             /*with_metadata=*/false,
+                                             /*for_disk_s3=*/false,
+                                             /*throw_on_error= */ false)
+                                             .last_modification_time;
             }
 
             return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt;
@@ -1748,10 +1780,11 @@ void StorageS3::addColumnsToCache(
     const std::optional<FormatSettings> & format_settings,
     const ContextPtr & ctx)
 {
-    auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
+    auto host_and_bucket
+        = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
     Strings sources;
     sources.reserve(keys.size());
-    std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; });
+    std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem) { return host_and_bucket / elem->key; });
     auto cache_keys = getKeysForSchemaCache(sources, format_name, format_settings, ctx);
     auto & schema_cache = getSchemaCache(ctx);
     schema_cache.addManyColumns(cache_keys, columns);
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 3f35c578e19..62bbccc86a0 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -231,6 +231,7 @@ private:
         String getPath() const { return fs::path(bucket) / key_with_info->key; }
         const String & getFile() const { return key_with_info->key; }
         const KeyWithInfo & getKeyWithInfo() const { return *key_with_info; }
+        std::optional<size_t> getFileSize() const { return key_with_info->info ? std::optional(key_with_info->info->size) : std::nullopt; }
 
         const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
 
diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp
index 824dae6bc3e..702b1f14ae7 100644
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@@ -61,7 +61,7 @@ StorageS3Cluster::StorageS3Cluster(
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
 void StorageS3Cluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 724574e9515..a485215f72f 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -140,7 +140,7 @@ IStorageURLBase::IStorageURLBase(
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
 
@@ -308,12 +308,10 @@ StorageURLSource::StorageURLSource(
         curr_uri = uri_and_buf.first;
         auto last_mod_time = uri_and_buf.second->tryGetLastModificationTime();
         read_buf = std::move(uri_and_buf.second);
+        current_file_size = tryGetFileSizeFromReadBuffer(*read_buf);
 
         if (auto file_progress_callback = getContext()->getFileProgressCallback())
-        {
-            size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0);
-            file_progress_callback(FileProgress(0, file_size));
-        }
+            file_progress_callback(FileProgress(0, current_file_size.value_or(0)));
 
         QueryPipelineBuilder builder;
         std::optional<size_t> num_rows_from_cache = std::nullopt;
@@ -401,7 +399,7 @@ Chunk StorageURLSource::generate()
             if (input_format)
                 chunk_size = input_format->getApproxBytesReadForChunk();
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
-            VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, curr_uri.getPath());
+            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, curr_uri.getPath(), current_file_size);
             return chunk;
         }
 
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index 6306d16742e..f3758f92468 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -225,6 +225,7 @@ private:
     Block block_for_format;
     std::shared_ptr<IteratorWrapper> uri_iterator;
     Poco::URI curr_uri;
+    std::optional<size_t> current_file_size;
     String format;
     const std::optional<FormatSettings> & format_settings;
     HTTPHeaderEntries headers;
diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index 5c2108bef33..c052e781877 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -69,7 +69,7 @@ StorageURLCluster::StorageURLCluster(
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
 
-    virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
 }
 
 void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 219043f25c6..88cbdf045c3 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -350,11 +350,12 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr contex
     }
 }
 
-NamesAndTypesList getPathAndFileVirtualsForStorage(NamesAndTypesList storage_columns)
+NamesAndTypesList getPathFileAndSizeVirtualsForStorage(NamesAndTypesList storage_columns)
 {
     auto default_virtuals = NamesAndTypesList{
         {"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
-        {"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
+        {"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
+        {"_size", makeNullable(std::make_shared<DataTypeUInt64>())}};
 
     default_virtuals.sort();
     storage_columns.sort();
@@ -420,8 +421,8 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
     return block.getByName("_idx").column;
 }
 
-void addRequestedPathAndFileVirtualsToChunk(
-    Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, const String * filename)
+void addRequestedPathFileAndSizeVirtualsToChunk(
+    Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional<size_t> size, const String * filename)
 {
     for (const auto & virtual_column : requested_virtual_columns)
     {
@@ -442,6 +443,13 @@ void addRequestedPathAndFileVirtualsToChunk(
                 chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), filename_from_path)->convertToFullColumnIfConst());
             }
         }
+        else if (virtual_column.name == "_size")
+        {
+            if (size)
+                chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *size)->convertToFullColumnIfConst());
+            else
+                chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst());
+        }
     }
 }
 
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index a21f2b05552..f5d73be5fb8 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -47,7 +47,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
     return res;
 }
 
-NamesAndTypesList getPathAndFileVirtualsForStorage(NamesAndTypesList storage_columns);
+NamesAndTypesList getPathFileAndSizeVirtualsForStorage(NamesAndTypesList storage_columns);
 
 ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList & virtual_columns, const String & path_example, const ContextPtr & context);
 
@@ -68,8 +68,8 @@ void filterByPathOrFile(std::vector<T> & sources, const std::vector<String> & pa
     sources = std::move(filtered_sources);
 }
 
-void addRequestedPathAndFileVirtualsToChunk(
-    Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, const String * filename = nullptr);
+void addRequestedPathFileAndSizeVirtualsToChunk(
+    Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional<size_t> size, const String * filename = nullptr);
 }
 
 }
diff --git a/tests/queries/0_stateless/02921_file_engine_size_virtual_column.reference b/tests/queries/0_stateless/02921_file_engine_size_virtual_column.reference
new file mode 100644
index 00000000000..2f319dfb812
--- /dev/null
+++ b/tests/queries/0_stateless/02921_file_engine_size_virtual_column.reference
@@ -0,0 +1,12 @@
+2
+3
+4
+2
+3
+4
+2
+3
+4
+2
+3
+4
diff --git a/tests/queries/0_stateless/02921_file_engine_size_virtual_column.sh b/tests/queries/0_stateless/02921_file_engine_size_virtual_column.sh
new file mode 100755
index 00000000000..5dd58ec0d7f
--- /dev/null
+++ b/tests/queries/0_stateless/02921_file_engine_size_virtual_column.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+echo "1" > $CLICKHOUSE_TEST_UNIQUE_NAME.data1.tsv
+echo "12" > $CLICKHOUSE_TEST_UNIQUE_NAME.data2.tsv
+echo "123" > $CLICKHOUSE_TEST_UNIQUE_NAME.data3.tsv
+
+$CLICKHOUSE_LOCAL -q "select _size from file('$CLICKHOUSE_TEST_UNIQUE_NAME.data{1,2,3}.tsv') order by _size"
+# Run this query twice to check correct behaviour when cache is used
+$CLICKHOUSE_LOCAL -q "select _size from file('$CLICKHOUSE_TEST_UNIQUE_NAME.data{1,2,3}.tsv') order by _size"
+
+# Test the same fils in archive
+tar -cf $CLICKHOUSE_TEST_UNIQUE_NAME.archive.tar $CLICKHOUSE_TEST_UNIQUE_NAME.data1.tsv $CLICKHOUSE_TEST_UNIQUE_NAME.data2.tsv $CLICKHOUSE_TEST_UNIQUE_NAME.data3.tsv
+
+$CLICKHOUSE_LOCAL -q "select _size from file('$CLICKHOUSE_TEST_UNIQUE_NAME.archive.tar :: $CLICKHOUSE_TEST_UNIQUE_NAME.data{1,2,3}.tsv') order by _size"
+$CLICKHOUSE_LOCAL -q "select _size from file('$CLICKHOUSE_TEST_UNIQUE_NAME.archive.tar :: $CLICKHOUSE_TEST_UNIQUE_NAME.data{1,2,3}.tsv') order by _size"
+
+rm $CLICKHOUSE_TEST_UNIQUE_NAME.*
+
diff --git a/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.reference b/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.reference
new file mode 100644
index 00000000000..369837adcbb
--- /dev/null
+++ b/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.reference
@@ -0,0 +1,12 @@
+a.tsv	24
+b.tsv	33
+c.tsv	33
+a.tsv	24
+b.tsv	33
+c.tsv	33
+a.tsv	24
+b.tsv	33
+c.tsv	33
+a.tsv	24
+b.tsv	33
+c.tsv	33
diff --git a/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.sh b/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.sh
new file mode 100755
index 00000000000..57f38719f8b
--- /dev/null
+++ b/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select _file, _size from url('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
+$CLICKHOUSE_LOCAL -q "select _file, _size from url('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
+
+$CLICKHOUSE_LOCAL -q "select _file, _size from s3('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
+$CLICKHOUSE_LOCAL -q "select _file, _size from s3('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
+
diff --git a/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.reference b/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.reference
new file mode 100644
index 00000000000..bc42121fb39
--- /dev/null
+++ b/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.reference
@@ -0,0 +1,6 @@
+2
+3
+4
+2
+3
+4
diff --git a/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.sh b/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.sh
new file mode 100755
index 00000000000..06d7b7717c1
--- /dev/null
+++ b/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, use-hdfs
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data1.tsv') select 1 settings hdfs_truncate_on_insert=1;"
+$CLICKHOUSE_LOCAL -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data2.tsv') select 11 settings hdfs_truncate_on_insert=1;"
+$CLICKHOUSE_LOCAL -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data3.tsv') select 111 settings hdfs_truncate_on_insert=1;"
+
+
+$CLICKHOUSE_LOCAL -q "select _size from hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data*.tsv', auto, 'x UInt64') order by _size"
+$CLICKHOUSE_LOCAL -q "select _size from hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data*.tsv', auto, 'x UInt64') order by _size"
+

From 193f8a5d2365d86d0e38dd8fbfdac59b8a38e986 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 22 Nov 2023 18:12:50 +0000
Subject: [PATCH 0936/1097] Add test for azure

---
 .../test_storage_azure_blob_storage/test.py   | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index e0365f70e7f..d8f29793fd2 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1156,3 +1156,36 @@ def test_filtering_by_file_or_path(cluster):
     )
 
     assert int(result) == 1
+
+
+def test_size_virtual_column(cluster):
+    node = cluster.instances["node"]
+    storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_size_virtual_column1.tsv', 'devstoreaccount1',  "
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1",
+    )
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_size_virtual_column2.tsv', 'devstoreaccount1',  "
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 11",
+    )
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_size_virtual_column3.tsv', 'devstoreaccount1', "
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 111",
+    )
+
+    result = node.query(
+        f"select _file, _size from azureBlobStorage('{storage_account_url}', 'cont', 'test_size_virtual_column*.tsv', 'devstoreaccount1', "
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') "
+        f"order by _file"
+    )
+
+    assert (
+        result
+        == "test_size_virtual_column1.tsv\t2\ntest_size_virtual_column2.tsv\t3\ntest_size_virtual_column3.tsv\t4\n"
+    )

From 6b64efb55a38c1e65052b6babaddc67d743d26bd Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 22 Nov 2023 18:21:30 +0000
Subject: [PATCH 0937/1097] Add docs

---
 .../table-engines/integrations/azureBlobStorage.md       | 6 ++++++
 docs/en/engines/table-engines/integrations/hdfs.md       | 5 +++--
 docs/en/engines/table-engines/integrations/s3.md         | 5 +++--
 docs/en/engines/table-engines/special/file.md            | 8 +++++++-
 docs/en/engines/table-engines/special/url.md             | 6 ++++++
 .../en/sql-reference/table-functions/azureBlobStorage.md | 6 ++++++
 docs/en/sql-reference/table-functions/file.md            | 9 +++++----
 docs/en/sql-reference/table-functions/hdfs.md            | 5 +++--
 docs/en/sql-reference/table-functions/s3.md              | 6 ++++++
 docs/en/sql-reference/table-functions/url.md             | 5 +++--
 10 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md
index 3df08ee2ffb..c6525121667 100644
--- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md
+++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md
@@ -47,6 +47,12 @@ SELECT * FROM test_table;
 └──────┴───────┘
 ```
 
+## Virtual columns {#virtual-columns}
+
+- `_path` — Path to the file. Type: `LowCardinalty(String)`.
+- `_file` — Name of the file. Type: `LowCardinalty(String)`.
+- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
+
 ## See also
 
 [Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage)
diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md
index c677123a8d0..19221c256f9 100644
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@@ -230,8 +230,9 @@ libhdfs3 support HDFS namenode HA.
 
 ## Virtual Columns {#virtual-columns}
 
-- `_path` — Path to the file.
-- `_file` — Name of the file.
+- `_path` — Path to the file. Type: `LowCardinalty(String)`.
+- `_file` — Name of the file. Type: `LowCardinalty(String)`.
+- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
 
 ## Storage Settings {#storage-settings}
 
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index 2967a15494c..3144bdd32fa 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -142,8 +142,9 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr
 
 ## Virtual columns {#virtual-columns}
 
-- `_path` — Path to the file.
-- `_file` — Name of the file.
+- `_path` — Path to the file. Type: `LowCardinalty(String)`.
+- `_file` — Name of the file. Type: `LowCardinalty(String)`.
+- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
 
 For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).
 
diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md
index 27945b30c03..6e3897398a5 100644
--- a/docs/en/engines/table-engines/special/file.md
+++ b/docs/en/engines/table-engines/special/file.md
@@ -87,12 +87,18 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64
     - Indices
     - Replication
 
-## PARTITION BY
+## PARTITION BY {#partition-by}
 
 `PARTITION BY` — Optional.  It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
 
 For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
 
+## Virtual Columns {#virtual-columns}
+
+- `_path` — Path to the file. Type: `LowCardinalty(String)`.
+- `_file` — Name of the file. Type: `LowCardinalty(String)`.
+- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
+
 ## Settings {#settings}
 
 - [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md
index 5a5e1564180..f6183a779ae 100644
--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@@ -103,6 +103,12 @@ SELECT * FROM url_engine_table
 
 For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
 
+## Virtual Columns {#virtual-columns}
+
+- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
+- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
+- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
+
 ## Storage Settings {#storage-settings}
 
 - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md
index 59c92e1327e..1510489ce83 100644
--- a/docs/en/sql-reference/table-functions/azureBlobStorage.md
+++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md
@@ -67,6 +67,12 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
 └─────────┘
 ```
 
+## Virtual Columns {#virtual-columns}
+
+- `_path` — Path to the file. Type: `LowCardinalty(String)`.
+- `_file` — Name of the file. Type: `LowCardinalty(String)`.
+- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
+
 **See Also**
 
 - [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md)
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index a871bdaafa9..ad1feb87c60 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -191,12 +191,13 @@ Query the total number of rows from all files `file002` inside any folder in dir
 SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt32');
 ```
 
-## Virtual Columns
+## Virtual Columns {#virtual-columns}
 
-- `_path` — Path to the file.
-- `_file` — Name of the file.
+- `_path` — Path to the file. Type: `LowCardinalty(String)`.
+- `_file` — Name of the file. Type: `LowCardinalty(String)`.
+- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
 
-## Settings
+## Settings {#settings}
 
 - [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
 - [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index 678470e9150..31780e30e8e 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -94,8 +94,9 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
 
 ## Virtual Columns
 
-- `_path` — Path to the file.
-- `_file` — Name of the file.
+- `_path` — Path to the file. Type: `LowCardinalty(String)`.
+- `_file` — Name of the file. Type: `LowCardinalty(String)`.
+- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
 
 ## Storage Settings {#storage-settings}
 
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 8649295e815..e0c6c08b9a4 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -224,6 +224,12 @@ FROM s3(
 LIMIT 5;
 ```
 
+## Virtual Columns {#virtual-columns}
+
+- `_path` — Path to the file. Type: `LowCardinalty(String)`.
+- `_file` — Name of the file. Type: `LowCardinalty(String)`.
+- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
+
 ## Storage Settings {#storage-settings}
 
 - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md
index 859de86f019..4dc6e435b50 100644
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@@ -50,8 +50,9 @@ Character `|` inside patterns is used to specify failover addresses. They are it
 
 ## Virtual Columns
 
-- `_path` — Path to the `URL`.
-- `_file` — Resource name of the `URL`.
+- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
+- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
+- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
 
 ## Storage Settings {#storage-settings}
 

From 4a86f4a7b91df52817b9eb5df5052fc689fdd5b2 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 22 Nov 2023 18:24:34 +0000
Subject: [PATCH 0938/1097] Fix style changes

---
 src/Storages/StorageS3.cpp | 691 ++++++++++++++++++-------------------
 1 file changed, 329 insertions(+), 362 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index a00f28a733f..93f288fd874 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1,90 +1,90 @@
+#include "config.h"
 #include <Common/ProfileEvents.h>
 #include "Parsers/ASTCreateQuery.h"
-#include "config.h"
 
 #if USE_AWS_S3
 
-#    include <Common/isValidUTF8.h>
+#include <Common/isValidUTF8.h>
 
-#    include <IO/ParallelReadBuffer.h>
-#    include <IO/S3/Requests.h>
-#    include <IO/S3Common.h>
-#    include <IO/SharedThreadPools.h>
+#include <IO/S3Common.h>
+#include <IO/S3/Requests.h>
+#include <IO/ParallelReadBuffer.h>
+#include <IO/SharedThreadPools.h>
 
-#    include <Interpreters/TreeRewriter.h>
-#    include <Interpreters/evaluateConstantExpression.h>
+#include <Interpreters/TreeRewriter.h>
+#include <Interpreters/evaluateConstantExpression.h>
 
-#    include <Parsers/ASTFunction.h>
-#    include <Parsers/ASTInsertQuery.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTInsertQuery.h>
 
-#    include <Storages/NamedCollectionsHelpers.h>
-#    include <Storages/PartitionedSink.h>
-#    include <Storages/StorageFactory.h>
-#    include <Storages/StorageS3.h>
-#    include <Storages/StorageS3Settings.h>
-#    include <Storages/StorageSnapshot.h>
-#    include <Storages/StorageURL.h>
-#    include <Storages/VirtualColumnUtils.h>
-#    include <Storages/checkAndGetLiteralArgument.h>
+#include <Storages/StorageFactory.h>
+#include <Storages/StorageS3.h>
+#include <Storages/StorageS3Settings.h>
+#include <Storages/StorageSnapshot.h>
+#include <Storages/PartitionedSink.h>
+#include <Storages/VirtualColumnUtils.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <Storages/StorageURL.h>
+#include <Storages/NamedCollectionsHelpers.h>
 
-#    include <Disks/IO/AsynchronousBoundedReadBuffer.h>
-#    include <Disks/IO/ReadBufferFromRemoteFSGather.h>
-#    include <Disks/ObjectStorages/StoredObject.h>
+#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
+#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
+#include <Disks/ObjectStorages/StoredObject.h>
 
-#    include <IO/ReadBufferFromS3.h>
-#    include <IO/WriteBufferFromS3.h>
+#include <IO/ReadBufferFromS3.h>
+#include <IO/WriteBufferFromS3.h>
 
-#    include <Formats/FormatFactory.h>
-#    include <Formats/ReadSchemaUtils.h>
+#include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 
-#    include <Processors/Formats/IInputFormat.h>
-#    include <Processors/Formats/IOutputFormat.h>
-#    include <Processors/Sources/ConstChunkGenerator.h>
-#    include <Processors/Transforms/AddingDefaultsTransform.h>
-#    include <Processors/Transforms/ExtractColumnsTransform.h>
+#include <Processors/Transforms/AddingDefaultsTransform.h>
+#include <Processors/Transforms/ExtractColumnsTransform.h>
+#include <Processors/Formats/IOutputFormat.h>
+#include <Processors/Formats/IInputFormat.h>
+#include <Processors/Sources/ConstChunkGenerator.h>
 
-#    include <QueryPipeline/QueryPipelineBuilder.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 
-#    include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeString.h>
 
-#    include <aws/core/auth/AWSCredentials.h>
+#include <aws/core/auth/AWSCredentials.h>
 
-#    include <Common/CurrentMetrics.h>
-#    include <Common/NamedCollections/NamedCollections.h>
-#    include <Common/parseGlobs.h>
-#    include <Common/quoteString.h>
+#include <Common/NamedCollections/NamedCollections.h>
+#include <Common/parseGlobs.h>
+#include <Common/quoteString.h>
+#include <Common/CurrentMetrics.h>
 
-#    include <filesystem>
-#    include <Processors/ISource.h>
-#    include <Processors/Sinks/SinkToStorage.h>
-#    include <QueryPipeline/Pipe.h>
+#include <Processors/ISource.h>
+#include <Processors/Sinks/SinkToStorage.h>
+#include <QueryPipeline/Pipe.h>
+#include <filesystem>
 
-#    include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string.hpp>
 
-#    ifdef __clang__
-#        pragma clang diagnostic push
-#        pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
-#    endif
-#    include <re2/re2.h>
-#    ifdef __clang__
-#        pragma clang diagnostic pop
-#    endif
+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif
 
 namespace fs = std::filesystem;
 
 
 namespace CurrentMetrics
 {
-extern const Metric StorageS3Threads;
-extern const Metric StorageS3ThreadsActive;
-extern const Metric StorageS3ThreadsScheduled;
+    extern const Metric StorageS3Threads;
+    extern const Metric StorageS3ThreadsActive;
+    extern const Metric StorageS3ThreadsScheduled;
 }
 
 namespace ProfileEvents
 {
-extern const Event S3DeleteObjects;
-extern const Event S3ListObjects;
-extern const Event EngineFileLikeReadFiles;
+    extern const Event S3DeleteObjects;
+    extern const Event S3ListObjects;
+    extern const Event EngineFileLikeReadFiles;
 }
 
 namespace DB
@@ -93,36 +93,37 @@ namespace DB
 static const std::unordered_set<std::string_view> required_configuration_keys = {
     "url",
 };
-static const std::unordered_set<std::string_view> optional_configuration_keys
-    = {"format",
-       "compression",
-       "compression_method",
-       "structure",
-       "access_key_id",
-       "secret_access_key",
-       "filename",
-       "use_environment_credentials",
-       "max_single_read_retries",
-       "min_upload_part_size",
-       "upload_part_size_multiply_factor",
-       "upload_part_size_multiply_parts_count_threshold",
-       "max_single_part_upload_size",
-       "max_connections",
-       "expiration_window_seconds",
-       "no_sign_request"};
+static const std::unordered_set<std::string_view> optional_configuration_keys = {
+    "format",
+    "compression",
+    "compression_method",
+    "structure",
+    "access_key_id",
+    "secret_access_key",
+    "filename",
+    "use_environment_credentials",
+    "max_single_read_retries",
+    "min_upload_part_size",
+    "upload_part_size_multiply_factor",
+    "upload_part_size_multiply_parts_count_threshold",
+    "max_single_part_upload_size",
+    "max_connections",
+    "expiration_window_seconds",
+    "no_sign_request"
+};
 
 namespace ErrorCodes
 {
-extern const int CANNOT_PARSE_TEXT;
-extern const int BAD_ARGUMENTS;
-extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-extern const int S3_ERROR;
-extern const int UNEXPECTED_EXPRESSION;
-extern const int DATABASE_ACCESS_DENIED;
-extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-extern const int NOT_IMPLEMENTED;
-extern const int CANNOT_COMPILE_REGEXP;
-extern const int FILE_DOESNT_EXIST;
+    extern const int CANNOT_PARSE_TEXT;
+    extern const int BAD_ARGUMENTS;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int S3_ERROR;
+    extern const int UNEXPECTED_EXPRESSION;
+    extern const int DATABASE_ACCESS_DENIED;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int NOT_IMPLEMENTED;
+    extern const int CANNOT_COMPILE_REGEXP;
+    extern const int FILE_DOESNT_EXIST;
 }
 
 class IOutputFormat;
@@ -147,8 +148,7 @@ public:
         , virtual_columns(virtual_columns_)
         , read_keys(read_keys_)
         , request_settings(request_settings_)
-        , list_objects_pool(
-              CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
+        , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
         , list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
         , file_progress_callback(file_progress_callback_)
     {
@@ -174,8 +174,8 @@ public:
 
         matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
         if (!matcher->ok())
-            throw Exception(
-                ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
+            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
+                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
 
         recursive = globbed_uri.key == "/**" ? true : false;
         fillInternalBufferAssumeLocked();
@@ -187,9 +187,15 @@ public:
         return nextAssumeLocked();
     }
 
-    size_t objectsCount() { return buffer.size(); }
+    size_t objectsCount()
+    {
+        return buffer.size();
+    }
 
-    ~Impl() { list_objects_pool.wait(); }
+    ~Impl()
+    {
+        list_objects_pool.wait();
+    }
 
 private:
     using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome;
@@ -245,13 +251,9 @@ private:
 
         if (!outcome.IsSuccess())
         {
-            throw S3Exception(
-                outcome.GetError().GetErrorType(),
-                "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
-                quoteString(request.GetBucket()),
-                quoteString(request.GetPrefix()),
-                backQuote(outcome.GetError().GetExceptionName()),
-                quoteString(outcome.GetError().GetMessage()));
+            throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
+                            quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
+                            backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
         }
 
         const auto & result_batch = outcome.GetResult().GetContents();
@@ -278,7 +280,8 @@ private:
             String key = row.GetKey();
             if (recursive || re2::RE2::FullMatch(key, *matcher))
             {
-                S3::ObjectInfo info = {
+                S3::ObjectInfo info =
+                {
                     .size = size_t(row.GetSize()),
                     .last_modification_time = row.GetLastModified().Millis() / 1000,
                 };
@@ -295,8 +298,7 @@ private:
 
         if (!is_initialized)
         {
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(
-                query, virtual_columns, fs::path(globbed_uri.bucket) / temp_buffer.front()->key, getContext());
+            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(globbed_uri.bucket) / temp_buffer.front()->key, getContext());
             is_initialized = true;
         }
 
@@ -313,8 +315,10 @@ private:
         buffer = std::move(temp_buffer);
 
         if (file_progress_callback)
+        {
             for (const auto & key_with_info : buffer)
                 file_progress_callback(FileProgress(0, key_with_info->info->size));
+        }
 
         /// Set iterator only after the whole batch is processed
         buffer_iter = buffer.begin();
@@ -325,19 +329,17 @@ private:
 
     std::future<ListObjectsOutcome> listObjectsAsync()
     {
-        return list_objects_scheduler(
-            [this]
-            {
-                ProfileEvents::increment(ProfileEvents::S3ListObjects);
-                auto outcome = client->ListObjectsV2(request);
+        return list_objects_scheduler([this]
+        {
+            ProfileEvents::increment(ProfileEvents::S3ListObjects);
+            auto outcome = client->ListObjectsV2(request);
 
-                /// Outcome failure will be handled on the caller side.
-                if (outcome.IsSuccess())
-                    request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
+            /// Outcome failure will be handled on the caller side.
+            if (outcome.IsSuccess())
+                request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
 
-                return outcome;
-            },
-            Priority{});
+            return outcome;
+        }, Priority{});
     }
 
     std::mutex mutex;
@@ -374,8 +376,7 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
     KeysWithInfo * read_keys_,
     const S3Settings::RequestSettings & request_settings_,
     std::function<void(FileProgress)> file_progress_callback_)
-    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(
-        client_, globbed_uri_, query, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
+    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
 {
 }
 
@@ -428,8 +429,10 @@ public:
         }
 
         if (read_keys_)
+        {
             for (const auto & key : keys)
                 read_keys_->push_back(std::make_shared<KeyWithInfo>(key));
+        }
     }
 
     KeyWithInfoPtr next()
@@ -448,7 +451,10 @@ public:
         return std::make_shared<KeyWithInfo>(key, info);
     }
 
-    size_t objectsCount() { return keys.size(); }
+    size_t objectsCount()
+    {
+        return keys.size();
+    }
 
 private:
     Strings keys;
@@ -474,7 +480,8 @@ StorageS3Source::KeysIterator::KeysIterator(
     KeysWithInfo * read_keys,
     std::function<void(FileProgress)> file_progress_callback_)
     : pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(
-        client_, version_id_, keys_, bucket_, request_settings_, query, virtual_columns_, context, read_keys, file_progress_callback_))
+        client_, version_id_, keys_, bucket_, request_settings_,
+        query, virtual_columns_, context, read_keys, file_progress_callback_))
 {
 }
 
@@ -488,13 +495,12 @@ size_t StorageS3Source::KeysIterator::estimatedKeysCount()
     return pimpl->objectsCount();
 }
 
-StorageS3Source::ReadTaskIterator::ReadTaskIterator(const DB::ReadTaskCallback & callback_, size_t max_threads_count) : callback(callback_)
+StorageS3Source::ReadTaskIterator::ReadTaskIterator(
+    const DB::ReadTaskCallback & callback_,
+    size_t max_threads_count)
+    : callback(callback_)
 {
-    ThreadPool pool(
-        CurrentMetrics::StorageS3Threads,
-        CurrentMetrics::StorageS3ThreadsActive,
-        CurrentMetrics::StorageS3ThreadsScheduled,
-        max_threads_count);
+    ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count);
     auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "S3ReadTaskItr");
 
     std::vector<std::future<String>> keys;
@@ -559,8 +565,7 @@ StorageS3Source::StorageS3Source(
     , file_iterator(file_iterator_)
     , max_parsing_threads(max_parsing_threads_)
     , need_only_count(need_only_count_)
-    , create_reader_pool(
-          CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
+    , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
     , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "CreateS3Reader"))
 {
 }
@@ -587,14 +592,13 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader()
 
         if (!key_with_info->info)
             key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings);
-    } while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0);
+    }
+    while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0);
 
     QueryPipelineBuilder builder;
     std::shared_ptr<ISource> source;
     std::unique_ptr<ReadBuffer> read_buf;
-    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files
-        ? tryGetNumRowsFromCache(*key_with_info)
-        : std::nullopt;
+    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt;
     if (num_rows_from_cache)
     {
         /// We should not return single chunk with all number of rows,
@@ -643,7 +647,10 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader()
 
     /// Add ExtractColumnsTransform to extract requested columns/subcolumns
     /// from chunk read by IInputFormat.
-    builder.addSimpleTransform([&](const Block & header) { return std::make_shared<ExtractColumnsTransform>(header, requested_columns); });
+    builder.addSimpleTransform([&](const Block & header)
+    {
+        return std::make_shared<ExtractColumnsTransform>(header, requested_columns);
+    });
 
     auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
     auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
@@ -675,25 +682,18 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & k
     }
 
     return std::make_unique<ReadBufferFromS3>(
-        client,
-        bucket,
-        key,
-        version_id,
-        request_settings,
-        read_settings,
-        /*use_external_buffer*/ false,
-        /*offset_*/ 0,
-        /*read_until_position_*/ 0,
-        /*restricted_seek_*/ false,
-        object_size);
+        client, bucket, key, version_id, request_settings, read_settings,
+        /*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0,
+        /*restricted_seek_*/ false, object_size);
 }
 
-std::unique_ptr<ReadBuffer>
-StorageS3Source::createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size)
+std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
+    const String & key, const ReadSettings & read_settings, size_t object_size)
 {
     auto context = getContext();
     auto read_buffer_creator =
-        [this, read_settings, object_size](const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
+        [this, read_settings, object_size]
+        (const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
     {
         return std::make_unique<ReadBufferFromS3>(
             client,
@@ -702,10 +702,10 @@ StorageS3Source::createAsyncS3ReadBuffer(const String & key, const ReadSettings
             version_id,
             request_settings,
             read_settings,
-            /* use_external_buffer */ true,
-            /* offset */ 0,
+            /* use_external_buffer */true,
+            /* offset */0,
             read_until_position,
-            /* restricted_seek */ true,
+            /* restricted_seek */true,
             object_size);
     };
 
@@ -713,8 +713,7 @@ StorageS3Source::createAsyncS3ReadBuffer(const String & key, const ReadSettings
         std::move(read_buffer_creator),
         StoredObjects{StoredObject{key, object_size}},
         read_settings,
-        /* cache_log */ nullptr,
-        /* use_external_buffer */ true);
+        /* cache_log */nullptr, /* use_external_buffer */true);
 
     auto modified_settings{read_settings};
     /// FIXME: Changing this setting to default value breaks something around parquet reading
@@ -722,7 +721,8 @@ StorageS3Source::createAsyncS3ReadBuffer(const String & key, const ReadSettings
 
     auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
     auto async_reader = std::make_unique<AsynchronousBoundedReadBuffer>(
-        std::move(s3_impl), pool_reader, modified_settings, context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog());
+        std::move(s3_impl), pool_reader, modified_settings,
+        context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog());
 
     async_reader->setReadUntilEnd();
     if (read_settings.remote_fs_prefetch)
@@ -798,7 +798,10 @@ std::optional<size_t> StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo
 {
     String source = fs::path(url_host_and_port) / bucket / key_with_info.key;
     auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext());
-    auto get_last_mod_time = [&]() -> std::optional<time_t> { return key_with_info.info->last_modification_time; };
+    auto get_last_mod_time = [&]() -> std::optional<time_t>
+    {
+        return key_with_info.info->last_modification_time;
+    };
 
     return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time);
 }
@@ -815,7 +818,9 @@ public:
         const StorageS3::Configuration & configuration_,
         const String & bucket,
         const String & key)
-        : SinkToStorage(sample_block_), sample_block(sample_block_), format_settings(format_settings_)
+        : SinkToStorage(sample_block_)
+        , sample_block(sample_block_)
+        , format_settings(format_settings_)
     {
         write_buf = wrapWriteBufferWithCompressionMethod(
             std::make_unique<WriteBufferFromS3>(
@@ -829,7 +834,8 @@ public:
                 context->getWriteSettings()),
             compression_method,
             3);
-        writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings);
+        writer
+            = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings);
     }
 
     String getName() const override { return "StorageS3Sink"; }
@@ -938,7 +944,15 @@ public:
         validateKey(partition_key);
 
         return std::make_shared<StorageS3Sink>(
-            format, sample_block, context, format_settings, compression_method, configuration, partition_bucket, partition_key);
+            format,
+            sample_block,
+            context,
+            format_settings,
+            compression_method,
+            configuration,
+            partition_bucket,
+            partition_key
+        );
     }
 
 private:
@@ -1013,8 +1027,7 @@ StorageS3::StorageS3(
     {
         /// We don't allow special columns in S3 storage.
         if (!columns_.hasOnlyOrdinary())
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
         storage_metadata.setColumns(columns_);
     }
 
@@ -1036,35 +1049,21 @@ std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
 {
     if (distributed_processing)
     {
-        return std::make_shared<StorageS3Source::ReadTaskIterator>(
-            local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
+        return std::make_shared<StorageS3Source::ReadTaskIterator>(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
     }
     else if (configuration.withGlobs())
     {
         /// Iterate through disclosed globs and make a source for each file
         return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-            *configuration.client,
-            configuration.url,
-            query,
-            virtual_columns,
-            local_context,
-            read_keys,
-            configuration.request_settings,
-            file_progress_callback);
+            *configuration.client, configuration.url, query, virtual_columns,
+            local_context, read_keys, configuration.request_settings, file_progress_callback);
     }
     else
     {
         return std::make_shared<StorageS3Source::KeysIterator>(
-            *configuration.client,
-            configuration.url.version_id,
-            configuration.keys,
-            configuration.url.bucket,
-            configuration.request_settings,
-            query,
-            virtual_columns,
-            local_context,
-            read_keys,
-            file_progress_callback);
+            *configuration.client, configuration.url.version_id, configuration.keys,
+            configuration.url.bucket, configuration.request_settings, query,
+            virtual_columns, local_context, read_keys, file_progress_callback);
     }
 }
 
@@ -1100,13 +1099,7 @@ Pipe StorageS3::read(
     Pipes pipes;
 
     std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
-        query_configuration,
-        distributed_processing,
-        local_context,
-        query_info.query,
-        virtual_columns,
-        nullptr,
-        local_context->getFileProgressCallback());
+        query_configuration, distributed_processing, local_context, query_info.query, virtual_columns, nullptr, local_context->getFileProgressCallback());
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     if (estimated_keys_count > 1)
@@ -1115,8 +1108,7 @@ Pipe StorageS3::read(
         /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
         num_streams = 1;
 
-    auto read_from_format_info
-        = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
     bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
         && local_context->getSettingsRef().optimize_count_from_files;
 
@@ -1149,8 +1141,7 @@ Pipe StorageS3::read(
     return Pipe::unitePipes(std::move(pipes));
 }
 
-SinkToStoragePtr
-StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
+SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
 {
     auto query_configuration = updateConfigurationAndGetCopy(local_context);
 
@@ -1177,20 +1168,12 @@ StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snaps
     else
     {
         if (query_configuration.withGlobs())
-            throw Exception(
-                ErrorCodes::DATABASE_ACCESS_DENIED,
-                "S3 key '{}' contains globs, so the table is in readonly mode",
-                query_configuration.url.key);
+            throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
+                            "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key);
 
         bool truncate_in_insert = local_context->getSettingsRef().s3_truncate_on_insert;
 
-        if (!truncate_in_insert
-            && S3::objectExists(
-                *query_configuration.client,
-                query_configuration.url.bucket,
-                query_configuration.keys.back(),
-                query_configuration.url.version_id,
-                query_configuration.request_settings))
+        if (!truncate_in_insert && S3::objectExists(*query_configuration.client, query_configuration.url.bucket, query_configuration.keys.back(), query_configuration.url.version_id, query_configuration.request_settings))
         {
             if (local_context->getSettingsRef().s3_create_new_file_on_insert)
             {
@@ -1200,15 +1183,10 @@ StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snaps
                 String new_key;
                 do
                 {
-                    new_key
-                        = first_key.substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : first_key.substr(pos));
+                    new_key = first_key.substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : first_key.substr(pos));
                     ++index;
-                } while (S3::objectExists(
-                    *query_configuration.client,
-                    query_configuration.url.bucket,
-                    new_key,
-                    query_configuration.url.version_id,
-                    query_configuration.request_settings));
+                }
+                while (S3::objectExists(*query_configuration.client, query_configuration.url.bucket, new_key, query_configuration.url.version_id, query_configuration.request_settings));
 
                 query_configuration.keys.push_back(new_key);
                 configuration.keys.push_back(new_key);
@@ -1220,8 +1198,7 @@ StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snaps
                     "Object in bucket {} with key {} already exists. "
                     "If you want to overwrite it, enable setting s3_truncate_on_insert, if you "
                     "want to create a new file on each insert, enable setting s3_create_new_file_on_insert",
-                    query_configuration.url.bucket,
-                    query_configuration.keys.back());
+                    query_configuration.url.bucket, query_configuration.keys.back());
             }
         }
 
@@ -1368,12 +1345,10 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur
     configuration.auth_settings.secret_access_key = collection.getOrDefault<String>("secret_access_key", "");
     configuration.auth_settings.use_environment_credentials = collection.getOrDefault<UInt64>("use_environment_credentials", 1);
     configuration.auth_settings.no_sign_request = collection.getOrDefault<bool>("no_sign_request", false);
-    configuration.auth_settings.expiration_window_seconds
-        = collection.getOrDefault<UInt64>("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS);
+    configuration.auth_settings.expiration_window_seconds = collection.getOrDefault<UInt64>("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS);
 
     configuration.format = collection.getOrDefault<String>("format", configuration.format);
-    configuration.compression_method
-        = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
+    configuration.compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
     configuration.structure = collection.getOrDefault<String>("structure", "auto");
 
     configuration.request_settings = S3Settings::RequestSettings(collection);
@@ -1403,10 +1378,9 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
         /// with optional headers() function
 
         if (engine_args.empty() || engine_args.size() > 5)
-            throw Exception(
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Storage S3 requires 1 to 5 arguments: "
-                "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]");
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                            "Storage S3 requires 1 to 5 arguments: "
+                            "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]");
 
         auto * header_it = StorageURL::collectHeaders(engine_args, configuration.headers_from_ast, local_context);
         if (header_it != engine_args.end())
@@ -1416,8 +1390,11 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
             engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
 
         /// Size -> argument indexes
-        static std::unordered_map<size_t, std::unordered_map<std::string_view, size_t>> size_to_engine_args{
-            {1, {{}}}, {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}}};
+        static std::unordered_map<size_t, std::unordered_map<std::string_view, size_t>> size_to_engine_args
+        {
+            {1, {{}}},
+            {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}}
+        };
 
         std::unordered_map<std::string_view, size_t> engine_args_to_idx;
         bool no_sign_request = false;
@@ -1479,16 +1456,13 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
             configuration.format = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["format"]], "format");
 
         if (engine_args_to_idx.contains("compression_method"))
-            configuration.compression_method
-                = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["compression_method"]], "compression_method");
+            configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["compression_method"]], "compression_method");
 
         if (engine_args_to_idx.contains("access_key_id"))
-            configuration.auth_settings.access_key_id
-                = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id");
+            configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id");
 
         if (engine_args_to_idx.contains("secret_access_key"))
-            configuration.auth_settings.secret_access_key
-                = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key");
+            configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key");
 
         configuration.auth_settings.no_sign_request = no_sign_request;
     }
@@ -1504,109 +1478,105 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
 }
 
 ColumnsDescription StorageS3::getTableStructureFromData(
-    const StorageS3::Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
+    const StorageS3::Configuration & configuration,
+    const std::optional<FormatSettings> & format_settings,
+    ContextPtr ctx)
 {
     return getTableStructureFromDataImpl(configuration, format_settings, ctx);
 }
 
 namespace
 {
-class ReadBufferIterator : public IReadBufferIterator, WithContext
-{
-public:
-    ReadBufferIterator(
-        std::shared_ptr<StorageS3Source::IIterator> file_iterator_,
-        const StorageS3Source::KeysWithInfo & read_keys_,
-        const StorageS3::Configuration & configuration_,
-        const std::optional<FormatSettings> & format_settings_,
-        const ContextPtr & context_)
-        : WithContext(context_)
-        , file_iterator(file_iterator_)
-        , read_keys(read_keys_)
-        , configuration(configuration_)
-        , format_settings(format_settings_)
-        , prev_read_keys_size(read_keys_.size())
+    class ReadBufferIterator : public IReadBufferIterator, WithContext
     {
-    }
-
-    std::unique_ptr<ReadBuffer> next() override
-    {
-        while (true)
+    public:
+        ReadBufferIterator(
+            std::shared_ptr<StorageS3Source::IIterator> file_iterator_,
+            const StorageS3Source::KeysWithInfo & read_keys_,
+            const StorageS3::Configuration & configuration_,
+            const std::optional<FormatSettings> & format_settings_,
+            const ContextPtr & context_)
+            : WithContext(context_)
+            , file_iterator(file_iterator_)
+            , read_keys(read_keys_)
+            , configuration(configuration_)
+            , format_settings(format_settings_)
+            , prev_read_keys_size(read_keys_.size())
         {
-            current_key_with_info = (*file_iterator)();
+        }
 
-            if (!current_key_with_info || current_key_with_info->key.empty())
+        std::unique_ptr<ReadBuffer> next() override
+        {
+            while (true)
             {
-                if (first)
-                    throw Exception(
-                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                        "Cannot extract table structure from {} format file, because there are no files with provided path "
-                        "in S3 or all files are empty. You must specify table structure manually",
-                        configuration.format);
+                current_key_with_info = (*file_iterator)();
 
-                return nullptr;
-            }
+                if (!current_key_with_info || current_key_with_info->key.empty())
+                {
+                    if (first)
+                        throw Exception(
+                            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                            "Cannot extract table structure from {} format file, because there are no files with provided path "
+                            "in S3 or all files are empty. You must specify table structure manually",
+                            configuration.format);
 
-            /// S3 file iterator could get new keys after new iteration, check them in schema cache.
-            if (getContext()->getSettingsRef().schema_inference_use_cache_for_s3 && read_keys.size() > prev_read_keys_size)
-            {
-                columns_from_cache = StorageS3::tryGetColumnsFromCache(
-                    read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, getContext());
-                prev_read_keys_size = read_keys.size();
-                if (columns_from_cache)
                     return nullptr;
-            }
+                }
 
-            if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0)
-                continue;
+                /// S3 file iterator could get new keys after new iteration, check them in schema cache.
+                if (getContext()->getSettingsRef().schema_inference_use_cache_for_s3 && read_keys.size() > prev_read_keys_size)
+                {
+                    columns_from_cache = StorageS3::tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, getContext());
+                    prev_read_keys_size = read_keys.size();
+                    if (columns_from_cache)
+                        return nullptr;
+                }
 
-            int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
-            auto impl = std::make_unique<ReadBufferFromS3>(
-                configuration.client,
-                configuration.url.bucket,
-                current_key_with_info->key,
-                configuration.url.version_id,
-                configuration.request_settings,
-                getContext()->getReadSettings());
-            if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof())
-            {
-                first = false;
-                return wrapReadBufferWithCompressionMethod(
-                    std::move(impl),
-                    chooseCompressionMethod(current_key_with_info->key, configuration.compression_method),
-                    zstd_window_log_max);
+                if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0)
+                    continue;
+
+                int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
+                auto impl = std::make_unique<ReadBufferFromS3>(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings());
+                if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof())
+                {
+                    first = false;
+                    return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max);
+                }
             }
         }
-    }
 
-    std::optional<ColumnsDescription> getCachedColumns() override { return columns_from_cache; }
+        std::optional<ColumnsDescription> getCachedColumns() override
+        {
+            return columns_from_cache;
+        }
 
-    void setNumRowsToLastFile(size_t num_rows) override
-    {
-        if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
-            return;
+        void setNumRowsToLastFile(size_t num_rows) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
+                return;
 
-        String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort()))
-            / configuration.url.bucket / current_key_with_info->key;
-        auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-        StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows);
-    }
+            String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key;
+            auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+            StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows);
+        }
 
-private:
-    std::shared_ptr<StorageS3Source::IIterator> file_iterator;
-    const StorageS3Source::KeysWithInfo & read_keys;
-    const StorageS3::Configuration & configuration;
-    const std::optional<FormatSettings> & format_settings;
-    std::optional<ColumnsDescription> columns_from_cache;
-    StorageS3Source::KeyWithInfoPtr current_key_with_info;
-    size_t prev_read_keys_size;
-    bool first = true;
-};
+    private:
+        std::shared_ptr<StorageS3Source::IIterator> file_iterator;
+        const StorageS3Source::KeysWithInfo & read_keys;
+        const StorageS3::Configuration & configuration;
+        const std::optional<FormatSettings> & format_settings;
+        std::optional<ColumnsDescription> columns_from_cache;
+        StorageS3Source::KeyWithInfoPtr current_key_with_info;
+        size_t prev_read_keys_size;
+        bool first = true;
+    };
 
 }
 
 ColumnsDescription StorageS3::getTableStructureFromDataImpl(
-    const Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
+    const Configuration & configuration,
+    const std::optional<FormatSettings> & format_settings,
+    ContextPtr ctx)
 {
     KeysWithInfo read_keys;
 
@@ -1636,60 +1606,60 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
 
 void registerStorageS3Impl(const String & name, StorageFactory & factory)
 {
-    factory.registerStorage(
-        name,
-        [](const StorageFactory::Arguments & args)
+    factory.registerStorage(name, [](const StorageFactory::Arguments & args)
+    {
+        auto & engine_args = args.engine_args;
+        if (engine_args.empty())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
+
+        auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext());
+        // Use format settings from global server context + settings from
+        // the SETTINGS clause of the create query. Settings from current
+        // session and user are ignored.
+        std::optional<FormatSettings> format_settings;
+        if (args.storage_def->settings)
         {
-            auto & engine_args = args.engine_args;
-            if (engine_args.empty())
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
+            FormatFactorySettings user_format_settings;
 
-            auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext());
-            // Use format settings from global server context + settings from
-            // the SETTINGS clause of the create query. Settings from current
-            // session and user are ignored.
-            std::optional<FormatSettings> format_settings;
-            if (args.storage_def->settings)
+            // Apply changed settings from global context, but ignore the
+            // unknown ones, because we only have the format settings here.
+            const auto & changes = args.getContext()->getSettingsRef().changes();
+            for (const auto & change : changes)
             {
-                FormatFactorySettings user_format_settings;
-
-                // Apply changed settings from global context, but ignore the
-                // unknown ones, because we only have the format settings here.
-                const auto & changes = args.getContext()->getSettingsRef().changes();
-                for (const auto & change : changes)
-                    if (user_format_settings.has(change.name))
-                        user_format_settings.set(change.name, change.value);
-
-                // Apply changes from SETTINGS clause, with validation.
-                user_format_settings.applyChanges(args.storage_def->settings->changes);
-                format_settings = getFormatSettings(args.getContext(), user_format_settings);
-            }
-            else
-            {
-                format_settings = getFormatSettings(args.getContext());
+                if (user_format_settings.has(change.name))
+                    user_format_settings.set(change.name, change.value);
             }
 
-            ASTPtr partition_by;
-            if (args.storage_def->partition_by)
-                partition_by = args.storage_def->partition_by->clone();
-
-            return std::make_shared<StorageS3>(
-                std::move(configuration),
-                args.getContext(),
-                args.table_id,
-                args.columns,
-                args.constraints,
-                args.comment,
-                format_settings,
-                /* distributed_processing_ */ false,
-                partition_by);
-        },
+            // Apply changes from SETTINGS clause, with validation.
+            user_format_settings.applyChanges(args.storage_def->settings->changes);
+            format_settings = getFormatSettings(args.getContext(), user_format_settings);
+        }
+        else
         {
-            .supports_settings = true,
-            .supports_sort_order = true, // for partition by
-            .supports_schema_inference = true,
-            .source_access_type = AccessType::S3,
-        });
+            format_settings = getFormatSettings(args.getContext());
+        }
+
+        ASTPtr partition_by;
+        if (args.storage_def->partition_by)
+            partition_by = args.storage_def->partition_by->clone();
+
+        return std::make_shared<StorageS3>(
+            std::move(configuration),
+            args.getContext(),
+            args.table_id,
+            args.columns,
+            args.constraints,
+            args.comment,
+            format_settings,
+            /* distributed_processing_ */false,
+            partition_by);
+    },
+    {
+        .supports_settings = true,
+        .supports_sort_order = true, // for partition by
+        .supports_schema_inference = true,
+        .source_access_type = AccessType::S3,
+    });
 }
 
 void registerStorageS3(StorageFactory & factory)
@@ -1719,8 +1689,7 @@ bool StorageS3::supportsPartitionBy() const
 
 SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx)
 {
-    static SchemaCache schema_cache(
-        ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS));
+    static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS));
     return schema_cache;
 }
 
@@ -1747,15 +1716,14 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
                 /// but schema cache will handle this case and won't return columns from cache
                 /// because we can't say that it's valid without last modification time.
                 last_modification_time = S3::getObjectInfo(
-                                             *configuration.client,
-                                             configuration.url.bucket,
-                                             (*it)->key,
-                                             configuration.url.version_id,
-                                             configuration.request_settings,
-                                             /*with_metadata=*/false,
-                                             /*for_disk_s3=*/false,
-                                             /*throw_on_error= */ false)
-                                             .last_modification_time;
+                    *configuration.client,
+                    configuration.url.bucket,
+                    (*it)->key,
+                    configuration.url.version_id,
+                    configuration.request_settings,
+                    /*with_metadata=*/ false,
+                    /*for_disk_s3=*/ false,
+                    /*throw_on_error= */ false).last_modification_time;
             }
 
             return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt;
@@ -1780,11 +1748,10 @@ void StorageS3::addColumnsToCache(
     const std::optional<FormatSettings> & format_settings,
     const ContextPtr & ctx)
 {
-    auto host_and_bucket
-        = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
+    auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
     Strings sources;
     sources.reserve(keys.size());
-    std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem) { return host_and_bucket / elem->key; });
+    std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; });
     auto cache_keys = getKeysForSchemaCache(sources, format_name, format_settings, ctx);
     auto & schema_cache = getSchemaCache(ctx);
     schema_cache.addManyColumns(cache_keys, columns);

From 031f03010266db9969cd9d613edec7efe366d177 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 22 Nov 2023 18:27:32 +0000
Subject: [PATCH 0939/1097] Remove unused method

---
 src/Storages/StorageAzureBlob.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index a80abce99f1..b97dee0caed 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -331,7 +331,6 @@ private:
         const String & getRelativePath() const { return relative_path_with_metadata.relative_path; }
         const RelativePathWithMetadata & getRelativePathWithMetadata() const { return relative_path_with_metadata; }
         const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
-        ReadBuffer & getReadBuffer() const { return *read_buf; }
 
     private:
         RelativePathWithMetadata relative_path_with_metadata;

From 86f7732c248b23f209aa2a99669cdf072c422ec1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 22 Nov 2023 19:41:27 +0100
Subject: [PATCH 0940/1097] Update StorageReplicatedMergeTree.cpp

---
 src/Storages/StorageReplicatedMergeTree.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 77b4452e1f2..9023f8e6f90 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2240,7 +2240,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
 
     /// Wait for loading of outdated parts because DROP_RANGE
     /// command must be applied to all parts on disk.
-    waitForOutdatedPartsToBeLoadedIfNotDisjoint(drop_range_info);
+    waitForOutdatedPartsToBeLoaded();
 
     getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range_info.partition_id, drop_range_info.max_block);
     {
@@ -2318,7 +2318,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry)
 
     /// Wait for loading of outdated parts because REPLACE_RANGE
     /// command must be applied to all parts on disk.
-    waitForOutdatedPartsToBeLoadedIfNotDisjoint(drop_range);
+    waitForOutdatedPartsToBeLoaded();
 
     auto metadata_snapshot = getInMemoryMetadataPtr();
     auto storage_settings_ptr = getSettings();

From 9f1a9826af64cd5752ad66946952f606855f1bbd Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 22 Nov 2023 20:04:14 +0100
Subject: [PATCH 0941/1097] fix replace/move partition

---
 src/Storages/StorageReplicatedMergeTree.cpp   | 18 ++++++----
 ...86_truncate_and_unexpected_parts.reference | 13 ++++++-
 .../02486_truncate_and_unexpected_parts.sql   | 34 ++++++++++++++++++-
 3 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 97f6d2b9303..b94b98cc7bc 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -7763,7 +7763,8 @@ void StorageReplicatedMergeTree::clearBlocksInPartition(
                 LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", delete_requests[i]->getPath(), delete_responses[i]->error);
     }
 
-    LOG_TRACE(log, "Deleted {} deduplication block IDs in partition ID {}", delete_requests.size(), partition_id);
+    LOG_TRACE(log, "Deleted {} deduplication block IDs in partition ID {} in range [{}, {}]",
+              delete_requests.size(), partition_id, min_block_num, max_block_num);
 }
 
 void StorageReplicatedMergeTree::replacePartitionFrom(
@@ -7908,9 +7909,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
         if (replace)
         {
             /// Cancel concurrent inserts in range
-            clearLockedBlockNumbersInPartition(*zookeeper, drop_range.partition_id, drop_range.max_block, drop_range.max_block);
+            clearLockedBlockNumbersInPartition(*zookeeper, drop_range.partition_id, drop_range.min_block, drop_range.max_block);
             /// Remove deduplication block_ids of replacing parts
-            clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.max_block, drop_range.max_block);
+            clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.min_block, drop_range.max_block);
         }
 
         Coordination::Responses op_results;
@@ -7966,7 +7967,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                 if (replace)
                 {
                     parts_holder = getDataPartsVectorInPartitionForInternalUsage(MergeTreeDataPartState::Active, drop_range.partition_id, &data_parts_lock);
-                    removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
+                    /// We ignore the list of parts returned from the function below. We will remove them from zk when executing REPLACE_RANGE
+                    removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
                 }
             }
 
@@ -8164,9 +8166,9 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
         }
 
         /// Cancel concurrent inserts in range
-        clearLockedBlockNumbersInPartition(*zookeeper, drop_range.partition_id, drop_range.max_block, drop_range.max_block);
+        clearLockedBlockNumbersInPartition(*zookeeper, drop_range.partition_id, drop_range.min_block, drop_range.max_block);
 
-        clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.max_block, drop_range.max_block);
+        clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.min_block, drop_range.max_block);
 
         Coordination::Responses op_results;
 
@@ -8207,7 +8209,9 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
                     zkutil::KeeperMultiException::check(code, ops, op_results);
 
                 parts_holder = getDataPartsVectorInPartitionForInternalUsage(MergeTreeDataPartState::Active, drop_range.partition_id, &src_data_parts_lock);
-                removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, src_data_parts_lock);
+                /// We ignore the list of parts returned from the function below because we cannot remove them from zk
+                /// because we have not created the DROP_RANGE yet. Yes, MOVE PARTITION is trash.
+                removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, src_data_parts_lock);
                 transaction.commit(&src_data_parts_lock);
             }
 
diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
index 1f991703c7b..2ece1147d78 100644
--- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
@@ -1,4 +1,15 @@
 1	rmt
-1	rmt1
 2	rmt
+1	rmt1
 2	rmt1
+0
+1	rmt
+2	rmt
+1	rmt1
+2	rmt1
+1	rmt2
+1	rmt2
+3	rmt2
+5	rmt2
+7	rmt2
+9	rmt2
diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
index fbd90d8ab0f..e1b91bf368e 100644
--- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
@@ -24,4 +24,36 @@ insert into rmt1 values (2);
 system sync replica rmt;
 system sync replica rmt1;
 
-select *, _table from merge(currentDatabase(), '') order by (*,), _table;
+select *, _table from merge(currentDatabase(), '') order by  _table, (*,);
+select 0;
+
+drop table rmt sync;
+drop table rmt1 sync;
+create table rmt (n int) engine=ReplicatedMergeTree('/test/02468/{database}', '1') order by tuple() partition by n % 2 settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
+create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02468/{database}', '2') order by tuple() partition by n % 2 settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
+insert into rmt values (1);
+insert into rmt1 values (2);
+
+create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02468/{database}2', '1') order by tuple() partition by n % 2 settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
+
+system stop cleanup rmt;
+system stop merges rmt1;
+insert into rmt select * from numbers(10) settings max_block_size=1;
+system sync replica rmt1 lightweight;
+
+alter table rmt replace partition id '0' from rmt2;
+alter table rmt1 move partition id '1' to table rmt2;
+
+detach table rmt sync;
+detach table rmt1 sync;
+
+attach table rmt;
+attach table rmt1;
+
+insert into rmt values (1);
+insert into rmt1 values (2);
+system sync replica rmt;
+system sync replica rmt1;
+system sync replica rmt2;
+
+select *, _table from merge(currentDatabase(), '') order by _table, (*,);

From d9eb68ba358e2cb2293f296806adcc6f5ca972e5 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 22 Nov 2023 20:29:57 +0100
Subject: [PATCH 0942/1097] fix unexpected outdated parts

---
 src/Storages/StorageReplicatedMergeTree.cpp     | 17 ++++++++++-------
 .../02486_truncate_and_unexpected_parts.sql     |  7 -------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index b94b98cc7bc..45e6354043a 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1432,6 +1432,7 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
     NameSet expected_parts(expected_parts_vec.begin(), expected_parts_vec.end());
 
     /// There are no PreActive parts at startup.
+    bool incomplete_list_of_outdated_parts = !outdated_data_parts_loading_finished;
     auto parts = getDataParts({MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated});
 
     /** Local parts that are not in ZK.
@@ -1469,6 +1470,11 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
         if (part->rows_count || part->getState() != MergeTreeDataPartState::Active || expected_parts.contains(part->name))
             continue;
 
+        if (incomplete_list_of_outdated_parts)
+        {
+            LOG_INFO(log, "Outdated parts are not loaded yet, but we may need them to handle dropped parts. Need retry.");
+            return false;
+        }
         set_of_empty_unexpected_parts.add(part->name);
     }
     if (auto empty_count = set_of_empty_unexpected_parts.size())
@@ -1520,14 +1526,11 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
 
         /// We have uncovered unexpected parts, and we are not sure if we can restore them or not.
         /// So we have to exit, load all Outdated parts, and check again.
+        if (incomplete_list_of_outdated_parts)
         {
-            std::lock_guard lock(outdated_data_parts_mutex);
-            if (!outdated_data_parts_loading_finished)
-            {
-                LOG_INFO(log, "Outdated parts are not loaded yet, but we may need them to check if unexpected parts can be recovered. "
-                              "Need retry.");
-                return false;
-            }
+            LOG_INFO(log, "Outdated parts are not loaded yet, but we may need them to check if unexpected parts can be recovered. "
+                          "Need retry.");
+            return false;
         }
 
         /// Part is unexpected and we don't have covering part: it's suspicious
diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
index e1b91bf368e..52e8be236c8 100644
--- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
@@ -27,13 +27,6 @@ system sync replica rmt1;
 select *, _table from merge(currentDatabase(), '') order by  _table, (*,);
 select 0;
 
-drop table rmt sync;
-drop table rmt1 sync;
-create table rmt (n int) engine=ReplicatedMergeTree('/test/02468/{database}', '1') order by tuple() partition by n % 2 settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
-create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02468/{database}', '2') order by tuple() partition by n % 2 settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
-insert into rmt values (1);
-insert into rmt1 values (2);
-
 create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02468/{database}2', '1') order by tuple() partition by n % 2 settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
 
 system stop cleanup rmt;

From ef3e558a65715357cad047b8ae35eaf18b2013c5 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 22 Nov 2023 20:51:10 +0100
Subject: [PATCH 0943/1097] fix no such key for empty parts

---
 src/Storages/MergeTree/IMergeTreeDataPart.h | 2 ++
 src/Storages/MergeTree/MergeTreeData.cpp    | 2 +-
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index a9659d2f5f4..9ceea18b9bb 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -246,6 +246,8 @@ public:
         REMOVE_BLOBS,
         /// is set when Clickhouse is sure that the blobs belong to other replica and current replica has not locked them on s3 yet
         PRESERVE_BLOBS,
+        /// remove blobs even if the part is not temporary
+        REMOVE_BLOBS_OF_NOT_TEMPORARY,
     };
     BlobsRemovalPolicyForTemporaryParts remove_tmp_policy = BlobsRemovalPolicyForTemporaryParts::ASK_KEEPER;
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 6e9e44b53f8..7e43d9c43ee 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3911,7 +3911,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
         new_data_part->remove_time.store(0, std::memory_order_relaxed);
         /// Such parts are always local, they don't participate in replication, they don't have shared blobs.
         /// So we don't have locks for shared data in zk for them, and can just remove blobs (this avoids leaving garbage in S3)
-        new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
+        new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS_OF_NOT_TEMPORARY;
     }
 
     /// Since we can return parts in Deleting state, we have to use a wrapper that restricts access to such parts.
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 45e6354043a..d2be18daa4b 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -9240,7 +9240,7 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co
         }
     }
 
-    if (part.rows_count == 0 && part.remove_tmp_policy == IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS)
+    if (part.rows_count == 0 && part.remove_tmp_policy == IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS_OF_NOT_TEMPORARY)
     {
         /// It's a non-replicated empty part that was created to avoid unexpected parts after DROP_RANGE
         LOG_INFO(log, "Looks like {} is a non-replicated empty part that was created to avoid unexpected parts after DROP_RANGE, "

From 70f45c11bdaa77777b085021dd0f687ea86e10a3 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 22 Nov 2023 19:56:42 +0000
Subject: [PATCH 0944/1097] Fix

---
 src/Storages/VirtualColumnUtils.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 51e81b00838..86ba7ee3a34 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -396,7 +396,10 @@ ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList
 
     Block block;
     for (const auto & column : virtual_columns)
-        block.insert({column.type->createColumn(), column.type, column.name});
+    {
+        if (column.name == "_file" || column.name == "_path")
+            block.insert({column.type->createColumn(), column.type, column.name});
+    }
     /// Create a block with one row to construct filter
     /// Append "idx" column as the filter result
     block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
@@ -410,7 +413,10 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
 {
     Block block;
     for (const auto & column : virtual_columns)
-        block.insert({column.type->createColumn(), column.type, column.name});
+    {
+        if (column.name == "_file" || column.name == "_path")
+            block.insert({column.type->createColumn(), column.type, column.name});
+    }
     block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
 
     for (size_t i = 0; i != paths.size(); ++i)

From 3903b4bb395db4cf42856690614d708025e38f0c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 22 Nov 2023 20:58:02 +0100
Subject: [PATCH 0945/1097] Fix system.*_log in artifacts on CI

For now it is broken due to #53086:

    Code: 701. DB::Exception: Requested cluster 'system_logs_export' not found: Cannot attach table `system`.`query_views_log_sender` from metadata file /var/lib/clickhouse/store/c05/c05d0eb0-ac48-4b02-aea4-f05cf4f875a5/query_views_log_sender.sql from query ATTACH TABLE system.query_views_log_sender UUID 'ffd9aed6-344c-4b1b-8444-287d82c5a712' (`pull_request_number` UInt32, `commit_sha` String, `check_start_time` DateTime('UTC'), `check_name` LowCardinality(String), `instance_type` LowCardinality(String), `instance_id` String, `event_date` Date, `event_time` DateTime, `event_time_microseconds` DateTime64(6), `view_duration_ms` UInt64, `initial_query_id` String, `view_name` String, `view_uuid` UUID, `view_type` Enum8('Default' = 1, 'Materialized' = 2, 'Live' = 3, 'Window' = 4), `view_query` String, `view_target` String, `read_rows` UInt64, `read_bytes` UInt64, `written_rows` UInt64, `written_bytes` UInt64, `peak_memory_usage` Int64, `ProfileEvents` Map(String, UInt64), `status` Enum8('QueryStart' = 1, 'QueryFinish' = 2, 'ExceptionBeforeStart' = 3, 'ExceptionWhileProcessing' = 4), `exception_code` Int32, `exception` String, `stack_trace` String) ENGINE = Distributed('system_logs_export', 'default', 'query_views_log_17512133378547479980') SETTINGS flush_on_detach = 0. (CLUSTER_DOESNT_EXIST)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 docker/test/base/setup_export_logs.sh | 10 ++++++++++
 docker/test/stateless/run.sh          |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index ec24b237752..bafad59311e 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -182,3 +182,13 @@ function setup_logs_replication
         " || continue
     done
 )
+
+function stop_logs_replication
+{
+    echo "Detach all logs replication"
+    clickhouse-client --query "select database||'.'||table from system.tables where database = 'system' and (table like '%_sender' or table like '%_watcher')" | {
+        tee /dev/stderr
+    } | {
+        xargs -n1 -r -i clickhouse-client --query "drop table {}"
+    }
+}
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 9951d79d6ac..07b40ea3b3d 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -217,6 +217,9 @@ ls -la /
 
 clickhouse-client -q "system flush logs" ||:
 
+# stop logs replication to make it possible to dump logs tables via clickhouse-local
+stop_logs_replication
+
 # Stop server so we can safely read data with clickhouse-local.
 # Why do we read data with clickhouse-local?
 # Because it's the simplest way to read it when server has crashed.

From 96410cee6aa4c623c0bfe4187c2f789124266e02 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 22 Nov 2023 22:09:24 +0100
Subject: [PATCH 0946/1097] Fix error

---
 src/Common/Arena.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 6441a6f1cc8..aab5ee4a35a 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -123,7 +123,7 @@ private:
 
         if (head.empty())
         {
-            size_after_grow = initial_size;
+            size_after_grow = std::max(min_next_size, initial_size);
         }
         else if (head.size() < linear_growth_threshold)
         {

From 4fa1205b8a09fa79c7714d116c915a5027019020 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 22 Nov 2023 22:38:01 +0100
Subject: [PATCH 0947/1097] Resubmit 01600_parts_types_metrics test

This reverts commit c7eacbea62d895d4cfbf5ea7f1afd6999f768a76, reversing
changes made to d41bb831dae022f2090d2b53043ef4f73c69e39d.

Reverts: #56612
---
 .../01600_parts_types_metrics.reference       |  3 ++
 .../0_stateless/01600_parts_types_metrics.sh  | 49 +++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 tests/queries/0_stateless/01600_parts_types_metrics.reference
 create mode 100755 tests/queries/0_stateless/01600_parts_types_metrics.sh

diff --git a/tests/queries/0_stateless/01600_parts_types_metrics.reference b/tests/queries/0_stateless/01600_parts_types_metrics.reference
new file mode 100644
index 00000000000..e8183f05f5d
--- /dev/null
+++ b/tests/queries/0_stateless/01600_parts_types_metrics.reference
@@ -0,0 +1,3 @@
+1
+1
+1
diff --git a/tests/queries/0_stateless/01600_parts_types_metrics.sh b/tests/queries/0_stateless/01600_parts_types_metrics.sh
new file mode 100755
index 00000000000..f5854424431
--- /dev/null
+++ b/tests/queries/0_stateless/01600_parts_types_metrics.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+# Tags: no-s3-storage, no-asan, long
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -e
+set -o pipefail
+
+# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
+verify_sql="SELECT
+    (SELECT sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) =
+    (SELECT countIf(part_type = 'Compact'), countIf(part_type = 'Wide')
+        FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))"
+
+# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time.
+# So, there is inherent race condition (especially in fasttest that runs tests in parallel).
+#
+# But it should get the expected result eventually.
+# In case of test failure, this code will do infinite loop and timeout.
+verify()
+{
+    while true; do
+        result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" )
+        if [ "$result" = "1" ]; then
+            echo 1
+            return
+        fi
+        sleep 0.1
+    done
+}
+
+$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS data_01600"
+# Compact  - (5..10]
+# Wide     - >10
+$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10, index_granularity = 8192, index_granularity_bytes = '10Mi'"
+
+# Compact
+$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Compact', number FROM system.numbers LIMIT 6"
+verify
+
+# Wide
+$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Wide', number FROM system.numbers LIMIT 11 OFFSET 6"
+verify
+
+# DROP and check
+$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE data_01600"
+verify

From 7a0d11ba936ee4d0185e8d2c25fff1e701b70b49 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 22 Nov 2023 22:40:13 +0100
Subject: [PATCH 0948/1097] Fix 01600_parts_types_metrics flakiness

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/queries/0_stateless/01600_parts_types_metrics.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01600_parts_types_metrics.sh b/tests/queries/0_stateless/01600_parts_types_metrics.sh
index f5854424431..f0914b53748 100755
--- a/tests/queries/0_stateless/01600_parts_types_metrics.sh
+++ b/tests/queries/0_stateless/01600_parts_types_metrics.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-s3-storage, no-asan, long
+# Tags: no-s3-storage, no-asan, long, no-parallel
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -21,7 +21,7 @@ verify_sql="SELECT
 # In case of test failure, this code will do infinite loop and timeout.
 verify()
 {
-    while true; do
+    for ((i = 0; i < 100; ++i)); do
         result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" )
         if [ "$result" = "1" ]; then
             echo 1

From a0ce9f46cf8f8769aeed81ac96a6824672f3ec2a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 22 Nov 2023 23:15:20 +0100
Subject: [PATCH 0949/1097] fix

---
 src/Storages/MergeTree/MergeTreeData.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 4c46980f333..41742a5b6a6 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1464,7 +1464,7 @@ protected:
 
     /// This has to be "true" by default, because in case of empty table or absence of Outdated parts
     /// it is automatically finished.
-    bool outdated_data_parts_loading_finished TSA_GUARDED_BY(outdated_data_parts_mutex) = true;
+    std::atomic_bool outdated_data_parts_loading_finished = true;
 
     void loadOutdatedDataParts(bool is_async);
     void startOutdatedDataPartsLoadingTask();

From f447c5418d05928d86fbfb9c83344683a6dcf02d Mon Sep 17 00:00:00 2001
From: Andrej Hoos <andrej.hoos@gmail.com>
Date: Thu, 23 Nov 2023 00:21:56 +0100
Subject: [PATCH 0950/1097] Optimize query uniqueness check in ProcessList -
 move after `cancelled_cv.wait`

---
 src/Interpreters/ProcessList.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 9b17c31f7e6..752d3c4a12b 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -311,12 +311,12 @@ ProcessListEntry::~ProcessListEntry()
         }
     }
 
-    if (auto query_user = parent.queries_to_user.find(query_id); query_user != parent.queries_to_user.end())
-        parent.queries_to_user.erase(query_user);
-
     /// Wait for the query if it is in the cancellation right now.
     parent.cancelled_cv.wait(lock.lock, [&]() { return process_list_element_ptr->is_cancelling == false; });
 
+    if (auto query_user = parent.queries_to_user.find(query_id); query_user != parent.queries_to_user.end())
+        parent.queries_to_user.erase(query_user);
+
     /// This removes the memory_tracker of one request.
     parent.processes.erase(it);
 

From e0c96611157e52d78edfac827706b265d71bf90a Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 23 Nov 2023 01:45:08 +0100
Subject: [PATCH 0951/1097] Check dictionary source type on creation even if
 "dictionaries_lazy_load" is enabled.

---
 src/Dictionaries/DictionarySourceFactory.cpp       | 12 ++++++++++++
 src/Dictionaries/DictionarySourceFactory.h         |  3 +++
 .../getDictionaryConfigurationFromAST.cpp          | 14 +++++++++++---
 .../02918_wrong_dictionary_source.reference        |  1 +
 .../0_stateless/02918_wrong_dictionary_source.sql  | 11 +++++++++++
 5 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02918_wrong_dictionary_source.reference
 create mode 100644 tests/queries/0_stateless/02918_wrong_dictionary_source.sql

diff --git a/src/Dictionaries/DictionarySourceFactory.cpp b/src/Dictionaries/DictionarySourceFactory.cpp
index c9f4bcb7446..5ae4bb5a439 100644
--- a/src/Dictionaries/DictionarySourceFactory.cpp
+++ b/src/Dictionaries/DictionarySourceFactory.cpp
@@ -108,6 +108,18 @@ DictionarySourcePtr DictionarySourceFactory::create(
         source_type);
 }
 
+void DictionarySourceFactory::checkSourceAvailable(const std::string & source_type, const std::string & dictionary_name, const ContextPtr & /* context */) const
+{
+    const auto found = registered_sources.find(source_type);
+    if (found == registered_sources.end())
+    {
+        throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,
+            "{}: unknown dictionary source type: {}",
+            dictionary_name,
+            source_type);
+    }
+}
+
 DictionarySourceFactory & DictionarySourceFactory::instance()
 {
     static DictionarySourceFactory instance;
diff --git a/src/Dictionaries/DictionarySourceFactory.h b/src/Dictionaries/DictionarySourceFactory.h
index f4c3fa12163..4c867db4ea1 100644
--- a/src/Dictionaries/DictionarySourceFactory.h
+++ b/src/Dictionaries/DictionarySourceFactory.h
@@ -52,6 +52,9 @@ public:
         const std::string & default_database,
         bool check_config) const;
 
+    /// Checks that a specified source exists and available for the current user.
+    void checkSourceAvailable(const std::string & source_type, const std::string & dictionary_name, const ContextPtr & context) const;
+
 private:
     using SourceRegistry = std::unordered_map<std::string, Creator>;
     SourceRegistry registered_sources;
diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
index 1501931ef42..9ee2027afc7 100644
--- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
+++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
@@ -16,6 +16,7 @@
 #include <Parsers/ASTFunctionWithKeyValueArguments.h>
 #include <Parsers/ASTDictionaryAttributeDeclaration.h>
 #include <Dictionaries/DictionaryFactory.h>
+#include <Dictionaries/DictionarySourceFactory.h>
 #include <Functions/FunctionFactory.h>
 #include <Common/isLocalAddress.h>
 #include <Interpreters/Context.h>
@@ -518,8 +519,11 @@ void buildSourceConfiguration(
     AutoPtr<Element> root,
     const ASTFunctionWithKeyValueArguments * source,
     const ASTDictionarySettings * settings,
+    const String & dictionary_name,
     ContextPtr context)
 {
+    DictionarySourceFactory::instance().checkSourceAvailable(source->name, dictionary_name, context);
+
     AutoPtr<Element> outer_element(doc->createElement("source"));
     root->appendChild(outer_element);
     AutoPtr<Element> source_element(doc->createElement(source->name));
@@ -591,6 +595,10 @@ getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr conte
     checkAST(query);
     checkLifetime(query);
 
+    String dictionary_name = query.getTable();
+    String db_name = !database_.empty() ? database_ : query.getDatabase();
+    String full_dictionary_name = (!db_name.empty() ? (db_name + ".") : "") + dictionary_name;
+
     AutoPtr<Poco::XML::Document> xml_document(new Poco::XML::Document());
     AutoPtr<Poco::XML::Element> document_root(xml_document->createElement("dictionaries"));
     xml_document->appendChild(document_root);
@@ -600,12 +608,12 @@ getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr conte
 
     AutoPtr<Poco::XML::Element> name_element(xml_document->createElement("name"));
     current_dictionary->appendChild(name_element);
-    AutoPtr<Text> name(xml_document->createTextNode(query.getTable()));
+    AutoPtr<Text> name(xml_document->createTextNode(dictionary_name));
     name_element->appendChild(name);
 
     AutoPtr<Poco::XML::Element> database_element(xml_document->createElement("database"));
     current_dictionary->appendChild(database_element);
-    AutoPtr<Text> database(xml_document->createTextNode(!database_.empty() ? database_ : query.getDatabase()));
+    AutoPtr<Text> database(xml_document->createTextNode(db_name));
     database_element->appendChild(database);
 
     if (query.uuid != UUIDHelpers::Nil)
@@ -641,7 +649,7 @@ getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr conte
     buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_attrs, query.dictionary_attributes_list);
 
     buildLayoutConfiguration(xml_document, current_dictionary, query.dictionary->dict_settings, dictionary_layout);
-    buildSourceConfiguration(xml_document, current_dictionary, query.dictionary->source, query.dictionary->dict_settings, context);
+    buildSourceConfiguration(xml_document, current_dictionary, query.dictionary->source, query.dictionary->dict_settings, full_dictionary_name, context);
     buildLifetimeConfiguration(xml_document, current_dictionary, query.dictionary->lifetime);
 
     if (query.dictionary->range)
diff --git a/tests/queries/0_stateless/02918_wrong_dictionary_source.reference b/tests/queries/0_stateless/02918_wrong_dictionary_source.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/02918_wrong_dictionary_source.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/02918_wrong_dictionary_source.sql b/tests/queries/0_stateless/02918_wrong_dictionary_source.sql
new file mode 100644
index 00000000000..e729ef74c61
--- /dev/null
+++ b/tests/queries/0_stateless/02918_wrong_dictionary_source.sql
@@ -0,0 +1,11 @@
+DROP DICTIONARY IF EXISTS id_value_dictionary;
+DROP TABLE IF EXISTS source_table;
+
+CREATE TABLE source_table(id UInt64, value String) ENGINE = MergeTree ORDER BY tuple();
+
+-- There is no "CLICKHOUSEX" dictionary source, so the next query must fail even if `dictionaries_lazy_load` is enabled.
+CREATE DICTIONARY id_value_dictionary(id UInt64, value String) PRIMARY KEY id SOURCE(CLICKHOUSEX(TABLE 'source_table')) LIFETIME(MIN 0 MAX 1000) LAYOUT(FLAT()); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG }
+
+SELECT count() FROM system.dictionaries WHERE name=='id_value_dictionary' AND database==currentDatabase();
+
+DROP TABLE source_table;

From 4ff41c7e303a3d90eec27088e2e7f0514727de45 Mon Sep 17 00:00:00 2001
From: slu <strikerlu@foxmail.com>
Date: Thu, 23 Nov 2023 17:30:28 +0800
Subject: [PATCH 0952/1097] fix wrong word order

---
 docs/zh/engines/table-engines/mergetree-family/mergetree.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
index cec4cb09047..815d84702a9 100644
--- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
@@ -66,7 +66,7 @@ ORDER BY expr
 
 - `PARTITION BY` — [分区键](custom-partitioning-key.md) ，可选项。
 
-     大多数情况下，不需要分使用区键。即使需要使用，也不需要使用比月更细粒度的分区键。分区不会加快查询（这与 ORDER BY 表达式不同）。永远也别使用过细粒度的分区键。不要使用客户端指定分区标识符或分区字段名称来对数据进行分区（而是将分区字段标识或名称作为 ORDER BY 表达式的第一列来指定分区）。
+     大多数情况下，不需要使用分区键。即使需要使用，也不需要使用比月更细粒度的分区键。分区不会加快查询（这与 ORDER BY 表达式不同）。永远也别使用过细粒度的分区键。不要使用客户端指定分区标识符或分区字段名称来对数据进行分区（而是将分区字段标识或名称作为 ORDER BY 表达式的第一列来指定分区）。
 
      要按月分区，可以使用表达式 `toYYYYMM(date_column)` ，这里的 `date_column` 是一个 [Date](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的列。分区名的格式会是 `"YYYYMM"` 。
 

From e3afb4182bbe6df12ddfc2a7c63be543ec76b285 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 23 Nov 2023 11:10:41 +0000
Subject: [PATCH 0953/1097] Fix tests

---
 .../test_storage_azure_blob_storage/test.py            |  5 +++--
 .../02922_url_s3_engine_size_virtual_column.sh         |  8 ++++----
 .../02923_hdfs_engine_size_virtual_column.sh           | 10 +++++-----
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index d8f29793fd2..96fff6b891f 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1179,10 +1179,11 @@ def test_size_virtual_column(cluster):
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 111",
     )
 
-    result = node.query(
+    result = azure_query(
+        node,
         f"select _file, _size from azureBlobStorage('{storage_account_url}', 'cont', 'test_size_virtual_column*.tsv', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') "
-        f"order by _file"
+        f"order by _file",
     )
 
     assert (
diff --git a/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.sh b/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.sh
index 57f38719f8b..51de2117dca 100755
--- a/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.sh
+++ b/tests/queries/0_stateless/02922_url_s3_engine_size_virtual_column.sh
@@ -5,9 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_LOCAL -q "select _file, _size from url('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
-$CLICKHOUSE_LOCAL -q "select _file, _size from url('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
+$CLICKHOUSE_CLIENT -q "select _file, _size from url('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
+$CLICKHOUSE_CLIENT -q "select _file, _size from url('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
 
-$CLICKHOUSE_LOCAL -q "select _file, _size from s3('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
-$CLICKHOUSE_LOCAL -q "select _file, _size from s3('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
+$CLICKHOUSE_CLIENT -q "select _file, _size from s3('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
+$CLICKHOUSE_CLIENT -q "select _file, _size from s3('http://localhost:11111/test/{a,b,c}.tsv', 'One') order by _file"
 
diff --git a/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.sh b/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.sh
index 06d7b7717c1..dc01687772f 100755
--- a/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.sh
+++ b/tests/queries/0_stateless/02923_hdfs_engine_size_virtual_column.sh
@@ -5,11 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_LOCAL -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data1.tsv') select 1 settings hdfs_truncate_on_insert=1;"
-$CLICKHOUSE_LOCAL -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data2.tsv') select 11 settings hdfs_truncate_on_insert=1;"
-$CLICKHOUSE_LOCAL -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data3.tsv') select 111 settings hdfs_truncate_on_insert=1;"
+$CLICKHOUSE_CLIENT -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data1.tsv') select 1 settings hdfs_truncate_on_insert=1;"
+$CLICKHOUSE_CLIENT -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data2.tsv') select 11 settings hdfs_truncate_on_insert=1;"
+$CLICKHOUSE_CLIENT -q "insert into table function hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data3.tsv') select 111 settings hdfs_truncate_on_insert=1;"
 
 
-$CLICKHOUSE_LOCAL -q "select _size from hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data*.tsv', auto, 'x UInt64') order by _size"
-$CLICKHOUSE_LOCAL -q "select _size from hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data*.tsv', auto, 'x UInt64') order by _size"
+$CLICKHOUSE_CLIENT -q "select _size from hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data*.tsv', auto, 'x UInt64') order by _size"
+$CLICKHOUSE_CLIENT -q "select _size from hdfs('hdfs://localhost:12222/$CLICKHOUSE_TEST_UNIQUE_NAME.data*.tsv', auto, 'x UInt64') order by _size"
 

From ed7f19cfbabb217fac97b3033d11bb1ba71ea940 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Thu, 23 Nov 2023 19:36:40 +0800
Subject: [PATCH 0954/1097] fix tests

---
 tests/config/install.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/config/install.sh b/tests/config/install.sh
index c31275cdcf2..5bd3cb86b84 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -15,6 +15,7 @@ mkdir -p $DEST_SERVER_PATH/config.d/
 mkdir -p $DEST_SERVER_PATH/users.d/
 mkdir -p $DEST_CLIENT_PATH
 
+ln -sf $SRC_PATH/config.d/forbidden_get_client_http_headers.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/zookeeper_write.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/

From 953a8d774a1cc7cc31911cec6f3e7d0ae8bdd862 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 23 Nov 2023 11:41:14 +0000
Subject: [PATCH 0955/1097] Close interserver connection on exception that
 happened before first authentication

---
 src/Server/TCPHandler.cpp | 9 +++++++++
 src/Server/TCPHandler.h   | 1 +
 2 files changed, 10 insertions(+)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 915db9c5c7d..587e446d6dd 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -690,6 +690,13 @@ void TCPHandler::runImpl()
             LOG_WARNING(log, "Client has gone away.");
         }
 
+        /// Interserver authentication is done only after we read the query.
+        /// This fact can be abused by producing exception before or while we read the query.
+        /// To aovid any potential exploits, we simply close connection on any exceptions
+        /// that happen before the first query is authenticated with the cluster secret.
+        if (is_interserver_mode && exception && !is_interserver_authenticated)
+            exception->rethrow();
+
         try
         {
             /// A query packet is always followed by one or more data packets.
@@ -1797,6 +1804,8 @@ void TCPHandler::receiveQuery()
             /// address.
             session->authenticate(AlwaysAllowCredentials{client_info.initial_user}, client_info.initial_address);
         }
+
+        is_interserver_authenticated = true;
 #else
         auto exception = Exception(ErrorCodes::AUTHENTICATION_FAILED,
             "Inter-server secret support is disabled, because ClickHouse was built without SSL library");
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 9fd243baa6c..45c10b1c27d 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -200,6 +200,7 @@ private:
     bool is_ssh_based_auth = false;
     /// For inter-server secret (remote_server.*.secret)
     bool is_interserver_mode = false;
+    bool is_interserver_authenticated = false;
     /// For DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET
     String salt;
     /// For DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2

From dea4bbf4d66b1d8990619316ee2be51a85bd91fe Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 Nov 2023 12:44:35 +0100
Subject: [PATCH 0956/1097] Remove bad case

---
 .../02908_filesystem_cache_as_collection.reference           | 1 -
 .../0_stateless/02908_filesystem_cache_as_collection.sql     | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
index f5e0af6d507..eccda16c034 100644
--- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
@@ -1,2 +1 @@
-1048576	10000000	33554432	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection_sql	2	0	1
 1048576	10000000	33554432	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection	2	0	1
diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
index c7216833bc9..ae65e810da9 100644
--- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
@@ -1,10 +1,5 @@
 -- Tags: no-fasttest, no-replicated-database
 
-CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi';
-DROP TABLE IF EXISTS test;
-CREATE TABLE test (a Int32, b String)
-ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql');
-DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME';
 CREATE TABLE test2 (a Int32, b String)
 ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection');
 DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME_2';

From db4aa19f90789b797bda9e241c7dd6b4df69ac73 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 22 Nov 2023 12:40:24 +0000
Subject: [PATCH 0957/1097] Fix FunctionNode::toASTImpl

---
 src/Analyzer/FunctionNode.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp
index 7440af1e11f..974e55042b4 100644
--- a/src/Analyzer/FunctionNode.cpp
+++ b/src/Analyzer/FunctionNode.cpp
@@ -209,15 +209,17 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
         function_ast->kind = ASTFunction::Kind::WINDOW_FUNCTION;
     }
 
+    const auto & arguments = getArguments();
     auto new_options = options;
+    const auto & argument_nodes = arguments.getNodes();
     /// To avoid surrounding constants with several internal casts.
-    if (function_name == "_CAST" && (*getArguments().begin())->getNodeType() == QueryTreeNodeType::CONSTANT)
+    if (function_name == "_CAST" && !argument_nodes.empty() && argument_nodes[0]->getNodeType() == QueryTreeNodeType::CONSTANT)
         new_options.add_cast_for_constants = false;
 
     /// Avoid cast for `IN tuple(...)` expression.
     /// Tuples colud be quite big, and adding a type may significantly increase query size.
     /// It should be safe because set type for `column IN tuple` is deduced from `column` type.
-    if (isNameOfInFunction(function_name) && (*(++getArguments().begin()))->getNodeType() == QueryTreeNodeType::CONSTANT)
+    if (isNameOfInFunction(function_name) && argument_nodes.size() > 1 &&  argument_nodes[1]->getNodeType() == QueryTreeNodeType::CONSTANT)
         new_options.add_cast_for_constants = false;
 
     const auto & parameters = getParameters();
@@ -227,7 +229,6 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
         function_ast->parameters = function_ast->children.back();
     }
 
-    const auto & arguments = getArguments();
     function_ast->children.push_back(arguments.toAST(new_options));
     function_ast->arguments = function_ast->children.back();
 

From 93a34b5067653d9194116fb7d671f5b26b0eb1a1 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 22 Nov 2023 12:41:43 +0000
Subject: [PATCH 0958/1097] Add 02918_analyzer_to_ast_crash

---
 .../0_stateless/02918_analyzer_to_ast_crash.reference        | 0
 tests/queries/0_stateless/02918_analyzer_to_ast_crash.sql    | 5 +++++
 2 files changed, 5 insertions(+)
 create mode 100644 tests/queries/0_stateless/02918_analyzer_to_ast_crash.reference
 create mode 100644 tests/queries/0_stateless/02918_analyzer_to_ast_crash.sql

diff --git a/tests/queries/0_stateless/02918_analyzer_to_ast_crash.reference b/tests/queries/0_stateless/02918_analyzer_to_ast_crash.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02918_analyzer_to_ast_crash.sql b/tests/queries/0_stateless/02918_analyzer_to_ast_crash.sql
new file mode 100644
index 00000000000..274f74d6ad1
--- /dev/null
+++ b/tests/queries/0_stateless/02918_analyzer_to_ast_crash.sql
@@ -0,0 +1,5 @@
+WITH
+    x AS (SELECT in((SELECT * FROM y))),
+    y AS (SELECT 1)
+SELECT * FROM x; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+

From c43b84c68da7fa2ccac8c54c564a659339ba96b5 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Thu, 23 Nov 2023 13:12:16 +0100
Subject: [PATCH 0959/1097] Typo

---
 src/Server/TCPHandler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 587e446d6dd..f2d7a58119f 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -692,7 +692,7 @@ void TCPHandler::runImpl()
 
         /// Interserver authentication is done only after we read the query.
         /// This fact can be abused by producing exception before or while we read the query.
-        /// To aovid any potential exploits, we simply close connection on any exceptions
+        /// To avoid any potential exploits, we simply close connection on any exceptions
         /// that happen before the first query is authenticated with the cluster secret.
         if (is_interserver_mode && exception && !is_interserver_authenticated)
             exception->rethrow();

From 7b5b6eda3e33d532600d89be85531dd254f1f79e Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 Nov 2023 13:19:34 +0100
Subject: [PATCH 0960/1097] Revert "Remove bad case"

This reverts commit dea4bbf4d66b1d8990619316ee2be51a85bd91fe.
---
 .../02908_filesystem_cache_as_collection.reference           | 1 +
 .../0_stateless/02908_filesystem_cache_as_collection.sql     | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
index eccda16c034..f5e0af6d507 100644
--- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
@@ -1 +1,2 @@
+1048576	10000000	33554432	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection_sql	2	0	1
 1048576	10000000	33554432	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection	2	0	1
diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
index ae65e810da9..c7216833bc9 100644
--- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
@@ -1,5 +1,10 @@
 -- Tags: no-fasttest, no-replicated-database
 
+CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi';
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (a Int32, b String)
+ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql');
+DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME';
 CREATE TABLE test2 (a Int32, b String)
 ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection');
 DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME_2';

From ff3224e7f74962432779c33fc4e9820ad2d40188 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 Nov 2023 13:22:06 +0100
Subject: [PATCH 0961/1097] Load named collections in advance

---
 programs/server/Server.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index efb5ccb3203..e86fc1f344f 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1163,6 +1163,8 @@ try
     CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
 #endif
 
+    NamedCollectionUtils::loadIfNot();
+
     /// Initialize main config reloader.
     std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
 

From def4a62365864454761fb72712e11816c082a1e7 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 23 Nov 2023 12:22:44 +0000
Subject: [PATCH 0962/1097] Remove
 DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES

---
 src/Core/Defines.h  | 1 -
 src/Core/Settings.h | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Core/Defines.h b/src/Core/Defines.h
index ea4debe7404..e2ffc2b7d7a 100644
--- a/src/Core/Defines.h
+++ b/src/Core/Defines.h
@@ -32,7 +32,6 @@
 
 #define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60
 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160)
-#define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3
 /// each period reduces the error counter by 2 times
 /// too short a period can cause errors to disappear immediately after creation.
 #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD 60
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index bb5e4322485..7a9b33df4d3 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -75,7 +75,7 @@ class IColumn;
     M(UInt64, poll_interval, DBMS_DEFAULT_POLL_INTERVAL, "Block at the query wait loop on the server for the specified number of seconds.", 0) \
     M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \
     M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \
-    M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
+    M(UInt64, connections_with_failover_max_tries, 3, "The maximum number of attempts to connect to replicas.", 0) \
     M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \
     M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
     M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \

From 05784b66c969e195c4513d131de95bf4828183ce Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 23 Nov 2023 14:04:10 +0100
Subject: [PATCH 0963/1097] Fix test

---
 .../00753_system_columns_and_system_tables_long.reference     | 4 ++--
 .../00753_system_columns_and_system_tables_long.sql           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference
index 58f8b7abfb3..3de05d66188 100644
--- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference
+++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference
@@ -49,5 +49,5 @@ Check total_bytes/total_rows for Set
 2048	50
 2048	100
 Check total_bytes/total_rows for Join
-10240	50
-10240	100
+1	50
+1	100
diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
index 16085c8a995..ae9db656f00 100644
--- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
+++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
@@ -134,7 +134,7 @@ DROP TABLE check_system_tables;
 
 SELECT 'Check total_bytes/total_rows for Join';
 CREATE TABLE check_system_tables Engine=Join(ANY, LEFT, number) AS SELECT * FROM numbers(50);
-SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase();
+SELECT total_bytes BETWEEN 5000 AND 15000, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase();
 INSERT INTO check_system_tables SELECT number+50 FROM numbers(50);
-SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase();
+SELECT total_bytes BETWEEN 5000 AND 15000, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase();
 DROP TABLE check_system_tables;

From 4f10ad3f3c73dfc1ae25ba742a67a5fc60e6c9d6 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Nov 2023 12:34:58 +0000
Subject: [PATCH 0964/1097] Allow to disable reorder-functions-after-sorting
 optimization

---
 src/Core/Settings.h                                            | 1 +
 src/Processors/QueryPlan/Optimizations/Optimizations.h         | 2 +-
 .../QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp  | 1 +
 .../QueryPlan/Optimizations/QueryPlanOptimizationSettings.h    | 3 +++
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 3a1ceb132bd..6dac828aafa 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -685,6 +685,7 @@ class IColumn;
     M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
     M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
     M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
+    M(Bool, query_plan_execute_functions_after_sorting, true, "Allow to re-order functions after sorting", 0) \
     M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \
     M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimisation", 0) \
     M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimisation", 0) \
diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
index 2230e50425c..5bbf68a5259 100644
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -84,7 +84,7 @@ inline const auto & getOptimizations()
         {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::optimize_plan},
         {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan},
         {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down},
-        {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::optimize_plan},
+        {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::execute_functions_after_sorting},
         {tryReuseStorageOrderingForWindowFunctions,
          "reuseStorageOrderingForWindowFunctions",
          &QueryPlanOptimizationSettings::optimize_plan},
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
index 0149ecc11d0..9ab789e1192 100644
--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
@@ -11,6 +11,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const
     settings.optimize_plan = from.query_plan_enable_optimizations;
     settings.max_optimizations_to_apply = from.query_plan_max_optimizations_to_apply;
     settings.filter_push_down = from.query_plan_filter_push_down;
+    settings.execute_functions_after_sorting = from.query_plan_execute_functions_after_sorting;
     settings.distinct_in_order = from.optimize_distinct_in_order;
     settings.read_in_order = from.optimize_read_in_order && from.query_plan_read_in_order;
     settings.aggregation_in_order = from.optimize_aggregation_in_order && from.query_plan_aggregation_in_order;
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
index 9315b4b9ee4..0bf53d1ec4b 100644
--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
@@ -21,6 +21,9 @@ struct QueryPlanOptimizationSettings
     /// If filter push down optimization is enabled.
     bool filter_push_down = true;
 
+    /// If reorder-functions-after-sorting optimization is enabled.
+    bool execute_functions_after_sorting;
+
     /// if distinct in order optimization is enabled
     bool distinct_in_order = false;
 

From 5f461ff7805e1ab7a94c73925709181ab08c994a Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 23 Nov 2023 00:42:04 +0100
Subject: [PATCH 0965/1097] Change the default for
 "wait_dictionaries_load_at_startup" to true, and use this setting only if
 "dictionaries_lazy_load" is false.

---
 .../settings.md                               | 28 ++++++++++---------
 .../settings.md                               | 28 ++++++++++---------
 programs/server/Server.cpp                    |  2 +-
 programs/server/config.xml                    |  6 ++--
 .../configs/no_dictionaries_lazy_load.xml     |  3 ++
 .../configs/wait_for_dictionaries_load.xml    |  3 --
 .../test_dictionaries_wait_for_load/test.py   | 15 +++++++---
 7 files changed, 48 insertions(+), 37 deletions(-)
 create mode 100644 tests/integration/test_dictionaries_wait_for_load/configs/no_dictionaries_lazy_load.xml
 delete mode 100644 tests/integration/test_dictionaries_wait_for_load/configs/wait_for_dictionaries_load.xml

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 3e4f1f4313f..62256354467 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -963,11 +963,9 @@ Lazy loading of dictionaries.
 
 If `true`, then each dictionary is loaded on the first use. If the loading is failed, the function that was using the dictionary throws an exception.
 
-If `false`, then the server starts loading all dictionaries at startup.
-Dictionaries are loaded in background.
-The server doesn't wait at startup until all the dictionaries finish their loading
-(exception: if `wait_dictionaries_load_at_startup` is set to `true` - see below).
-When a dictionary is used in a query for the first time then the query waits until the dictionary is loaded if it's not loaded yet.
+If `false`, then the server loads all dictionaries at startup.
+The server will wait at startup until all the dictionaries finish their loading before receiving any connections
+(exception: if `wait_dictionaries_load_at_startup` is set to `false` - see below).
 
 The default is `true`.
 
@@ -2397,20 +2395,24 @@ Path to the file that contains:
 
 ## wait_dictionaries_load_at_startup {#wait_dictionaries_load_at_startup}
 
-If `false`, then the server will not wait at startup until all the dictionaries finish their loading.
-This allows to start ClickHouse faster.
+This setting allows to specify behavior if `dictionaries_lazy_load` is `false`.
+(If `dictionaries_lazy_load` is `true` this setting doesn't affect anything.)
 
-If `true`, then the server will wait at startup until all the dictionaries finish their loading (successfully or not)
-before listening to any connections.
-This can make ClickHouse start slowly, however after that some queries can be executed faster
-(because they won't have to wait for the used dictionaries to be load).
+If `wait_dictionaries_load_at_startup` is `false`, then the server
+will start loading all the dictionaries at startup and it will receive connections in parallel with that loading.
+When a dictionary is used in a query for the first time then the query will wait until the dictionary is loaded if it's not loaded yet.
+Setting `wait_dictionaries_load_at_startup` to `false` can make ClickHouse start faster, however some queries can be executed slower
+(because they will have to wait for some dictionaries to be loaded).
 
-The default is `false`.
+If `wait_dictionaries_load_at_startup` is `true`, then the server will wait at startup
+until all the dictionaries finish their loading (successfully or not) before receiving any connections.
+
+The default is `true`.
 
 **Example**
 
 ``` xml
-<wait_dictionaries_load_at_startup>false</wait_dictionaries_load_at_startup>
+<wait_dictionaries_load_at_startup>true</wait_dictionaries_load_at_startup>
 ```
 
 ## zookeeper {#server-settings_zookeeper}
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 99ea7894ef8..29776ac1249 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -275,12 +275,11 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 
 Отложенная загрузка словарей.
 
-Если `true`, то каждый словарь создаётся при первом использовании. Если словарь не удалось создать, то вызов функции, использующей словарь, сгенерирует исключение.
+Если `true`, то каждый словарь загружается при первом использовании. Если словарь не удалось загрузить, то вызов функции, использующей словарь, сгенерирует исключение.
 
-Если `false`, сервер начнет загрузку всех словарей на старте сервера.
-Словари загружаются в фоне. Сервер не ждет на старте, пока словари закончат загружаться
-(исключение: если `wait_dictionaries_load_at_startup` установлена в `true` - см. ниже).
-Когда словарь используется в запросе первый раз, этот запрос будет ждать окончания загрузки словаря, если он еще не загрузился.
+Если `false`, все словари будут загружаться на старте сервера.
+Сервер будет ждать на старте окончания загрузки всех словарей перед началом обработки соединений
+(исключение: если `wait_dictionaries_load_at_startup` установлена в `false` - см. ниже).
 
 По умолчанию - `true`.
 
@@ -1722,20 +1721,23 @@ TCP порт для защищённого обмена данными с кли
 
 ## wait_dictionaries_load_at_startup {#wait_dictionaries_load_at_startup}
 
-Если `false`, то сервер не будет ждать на старте, пока словари закончат загружаться.
-Это позволяет ClickHouse стартовать быстрее.
+Эта настройка позволяет указать поведение если `dictionaries_lazy_load` установлено в `false`.
+(Если `dictionaries_lazy_load` установлено в `true`, то эта настройка ни на что не влияет.)
 
-Если `true`, то ClickHouse будет ждать на старте до окончания загрузки всех словарей (успешно или нет)
-перед тем, как начать принимать соединения.
-Это может привести к медленному старту ClickHouse, однако после этого некоторые запросы могут выполняться быстрее
-(потому что им не придется ждать окончания загрузки используемых словарей).
+Если `wait_dictionaries_load_at_startup` установлено в `false`, то сервер начнет загрузку всех словарей на старте
+и будет обрабатывать соединения, не дожидаясь окончания загрузки словарей.
+Когда словарь первый раз используется в запросе, запрос будет ждать окончания загрузки этого словаря, если он еще не загрузился.
+Установка `wait_dictionaries_load_at_startup` в `false` может помочь ClickHouse стартовать быстрее, однако некоторые запросы могут выполняться медленее (потому что они будут ждать окончания загрузки используемых в них словарей).
 
-По умолчанию - `false`.
+Если `wait_dictionaries_load_at_startup` установлено в `true`, то сервер будет ждать окончания загрузки всех словарей на старте
+до начала обработки соединений.
+
+По умолчанию - `true`.
 
 **Пример**
 
 ``` xml
-<wait_dictionaries_load_at_startup>false</wait_dictionaries_load_at_startup>
+<wait_dictionaries_load_at_startup>true</wait_dictionaries_load_at_startup>
 ```
 
 ## zookeeper {#server-settings_zookeeper}
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index efb5ccb3203..bd0d6986182 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1823,7 +1823,7 @@ try
         {
             global_context->loadOrReloadDictionaries(config());
 
-            if (config().getBool("wait_dictionaries_load_at_startup", false))
+            if (!config().getBool("dictionaries_lazy_load", true) && config().getBool("wait_dictionaries_load_at_startup", true))
                 global_context->waitForDictionariesLoad();
         }
         catch (...)
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 7003111c193..18c4cf339c0 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1291,10 +1291,10 @@
     <dictionaries_lazy_load>true</dictionaries_lazy_load>
 
     <!-- Wait at startup until all the dictionaries finish their loading (successfully or not)
-         before listening to connections. Setting this to 1 can make ClickHouse start slowly,
-         however some queries can be executed faster (because it won't have to wait for the used dictionaries to be load).
+         before receiving any connections. Affects dictionaries only if "dictionaries_lazy_load" is false.
+         Setting this to false can make ClickHouse start faster, however some queries can be executed slower.
     -->
-    <wait_dictionaries_load_at_startup>false</wait_dictionaries_load_at_startup>
+    <wait_dictionaries_load_at_startup>true</wait_dictionaries_load_at_startup>
 
     <!-- Configuration of user defined executable functions -->
     <user_defined_executable_functions_config>*_function.*ml</user_defined_executable_functions_config>
diff --git a/tests/integration/test_dictionaries_wait_for_load/configs/no_dictionaries_lazy_load.xml b/tests/integration/test_dictionaries_wait_for_load/configs/no_dictionaries_lazy_load.xml
new file mode 100644
index 00000000000..aaae3e0c4c1
--- /dev/null
+++ b/tests/integration/test_dictionaries_wait_for_load/configs/no_dictionaries_lazy_load.xml
@@ -0,0 +1,3 @@
+<clickhouse>
+    <dictionaries_lazy_load>0</dictionaries_lazy_load>
+</clickhouse>
diff --git a/tests/integration/test_dictionaries_wait_for_load/configs/wait_for_dictionaries_load.xml b/tests/integration/test_dictionaries_wait_for_load/configs/wait_for_dictionaries_load.xml
deleted file mode 100644
index a446b730123..00000000000
--- a/tests/integration/test_dictionaries_wait_for_load/configs/wait_for_dictionaries_load.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-<clickhouse>
-    <wait_dictionaries_load_at_startup>1</wait_dictionaries_load_at_startup>
-</clickhouse>
diff --git a/tests/integration/test_dictionaries_wait_for_load/test.py b/tests/integration/test_dictionaries_wait_for_load/test.py
index 975e9ca3e56..b30cc61abce 100644
--- a/tests/integration/test_dictionaries_wait_for_load/test.py
+++ b/tests/integration/test_dictionaries_wait_for_load/test.py
@@ -10,11 +10,14 @@ DICTIONARY_FILES = [
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance(
     "node1",
-    main_configs=["configs/wait_for_dictionaries_load.xml"],
+    main_configs=["configs/no_dictionaries_lazy_load.xml"],
     dictionaries=DICTIONARY_FILES,
 )
 
-node0 = cluster.add_instance("node0", dictionaries=DICTIONARY_FILES)
+node0 = cluster.add_instance(
+    "node0",
+    dictionaries=DICTIONARY_FILES,
+)
 
 
 @pytest.fixture(scope="module", autouse=True)
@@ -33,9 +36,13 @@ def get_status(instance, dictionary_name):
 
 
 def test_wait_for_dictionaries_load():
-    assert get_status(node0, "long_loading_dictionary") == "NOT_LOADED"
-
     assert get_status(node1, "long_loading_dictionary") == "LOADED"
     assert node1.query("SELECT * FROM dictionary(long_loading_dictionary)") == TSV(
         [[1, "aa"], [2, "bb"]]
     )
+
+    assert get_status(node0, "long_loading_dictionary") == "NOT_LOADED"
+    assert node0.query("SELECT * FROM dictionary(long_loading_dictionary)") == TSV(
+        [[1, "aa"], [2, "bb"]]
+    )
+    assert get_status(node0, "long_loading_dictionary") == "LOADED"

From 9b258f4b09df177f58e1f944bf3667a1e4b71072 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 22 Nov 2023 21:09:07 +0100
Subject: [PATCH 0966/1097] Do not symbolize traces for debug/sanitizer builds
 for sending to cloud

debug/sanitizer builds is very slow and symbolizing can take awhile, for
example this increases the time for `system flush logs`, and likely make
`02152_http_external_tables_memory_tracking` flaky again (#53215):

    azat@s1:~/ch/tmp$ zstd -cdq clickhouse-server.log.zst | grep -a -e 2dd61ba3-5a26-4b38-8979-af82cf3ff8bd -e 75b3cbcb-1d09-44ac-a82b-317b4fabfea9 -e 75ad1065-51cc-4c94-95a2-f9dd22981edd -e dc991967-4443-458b-84f0-2646a8d32a76 | grep trace_log -A1
    2023.11.22 05:46:32.872164 [ 59150 ] {2dd61ba3-5a26-4b38-8979-af82cf3ff8bd} <Debug> SystemLogQueue (system.trace_log): Requested flush up to offset 308544
    2023.11.22 05:47:23.352098 [ 59150 ] {2dd61ba3-5a26-4b38-8979-af82cf3ff8bd} <Debug> SystemLogQueue (system.crash_log): Requested flush up to offset 0
    --
    2023.11.22 05:47:46.158400 [ 59158 ] {75b3cbcb-1d09-44ac-a82b-317b4fabfea9} <Debug> SystemLogQueue (system.trace_log): Requested flush up to offset 328185
    2023.11.22 05:48:38.392275 [ 59158 ] {75b3cbcb-1d09-44ac-a82b-317b4fabfea9} <Debug> SystemLogQueue (system.crash_log): Requested flush up to offset 0
    --
    2023.11.22 05:49:07.348590 [ 59150 ] {75ad1065-51cc-4c94-95a2-f9dd22981edd} <Debug> SystemLogQueue (system.trace_log): Requested flush up to offset 347744
    2023.11.22 05:50:00.265529 [ 59150 ] {75ad1065-51cc-4c94-95a2-f9dd22981edd} <Debug> SystemLogQueue (system.crash_log): Requested flush up to offset 0
    --
    2023.11.22 05:50:25.743286 [ 59158 ] {dc991967-4443-458b-84f0-2646a8d32a76} <Debug> SystemLogQueue (system.trace_log): Requested flush up to offset 367101
    2023.11.22 05:51:15.567347 [ 59158 ] {dc991967-4443-458b-84f0-2646a8d32a76} <Debug> SystemLogQueue (system.crash_log): Requested flush up to offset 0

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 docker/test/base/setup_export_logs.sh | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index ec24b237752..dbb46c3ba43 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -126,6 +126,9 @@ function setup_logs_replication
     # It's doesn't make sense to try creating tables if SYNC fails
     echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client "${CONNECTION_ARGS[@]}" || return 0
 
+    debug_or_sanitizer_build=$(clickhouse-client -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%'")
+    echo "Build is debug or sanitizer: $debug_or_sanitizer_build"
+
     # For each system log table:
     echo 'Create %_log tables'
     clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
@@ -133,7 +136,14 @@ function setup_logs_replication
         if [[ "$table" = "trace_log" ]]
         then
             EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS_TRACE_LOG}"
-            EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
+            # Do not try to resolve stack traces in case of debug/sanitizers
+            # build, since it is too slow (flushing of trace_log can take ~1min
+            # with such MV attached)
+            if [[ "$debug_or_sanitizer_build" = 1 ]]; then
+                EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"
+            else
+                EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
+            fi
         else
             EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS}"
             EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"

From 9c7b0f33fc80a0e2b25112ef42a7d69bd2035794 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 23 Nov 2023 14:49:51 +0100
Subject: [PATCH 0967/1097] Fix bad test
 `00002_log_and_exception_messages_formatting`

---
 ...nd_exception_messages_formatting.reference |  2 +-
 ..._log_and_exception_messages_formatting.sql | 50 ++++++++++++-------
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
index 00a2cd14700..cd9f0142d45 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
@@ -3,7 +3,7 @@ runtime exceptions	0.05
 unknown runtime exceptions	0.01
 messages shorter than 10	1
 messages shorter than 16	3
-exceptions shorter than 30	3
+exceptions shorter than 30	3	[]
 noisy messages	0.3
 noisy Trace messages	0.16
 noisy Debug messages	0.09
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index 63432f127aa..d1ac8773aca 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -13,11 +13,11 @@ select 'runtime messages', greatest(coalesce(sum(length(message_format_string) =
     where message not like '% Received from %clickhouse-staging.com:9440%';
 
 -- Check the same for exceptions. The value was 0.03
-select 'runtime exceptions', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs
+select 'runtime exceptions', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs
     where (message like '%DB::Exception%' or message like '%Coordination::Exception%')
     and message not like '% Received from %clickhouse-staging.com:9440%';
 
-select 'unknown runtime exceptions', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.01) from logs where
+select 'unknown runtime exceptions', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.01) from logs where
     (message like '%DB::Exception%' or message like '%Coordination::Exception%')
     and message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%';
 
@@ -50,10 +50,15 @@ create temporary table known_short_messages (s String) as select * from (select
 ] as arr) array join arr;
 
 -- Check that we don't have too many short meaningless message patterns.
-select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_string), 1) from logs where length(message_format_string) < 10 and message_format_string not in known_short_messages;
+select 'messages shorter than 10',
+    greatest(uniqExact(message_format_string), 1)
+    from logs
+    where length(message_format_string) < 10 and message_format_string not in known_short_messages;
 
 -- Same as above. Feel free to update the threshold or remove this query if really necessary
-select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
+select 'messages shorter than 16',
+    greatest(uniqExact(message_format_string), 3)
+    from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
 
 -- Unlike above, here we look at length of the formatted message, not format string. Most short format strings are fine because they end up decorated with context from outer or inner exceptions, e.g.:
 -- "Expected end of line" -> "Code: 117. DB::Exception: Expected end of line: (in file/uri /var/lib/clickhouse/user_files/data_02118): (at row 1)"
@@ -62,42 +67,53 @@ select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_st
 -- This table currently doesn't have enough information to do this reliably, so we just regex search for " (ERROR_NAME_IN_CAPS)" and hope that's good enough.
 -- For the "Code: 123. DB::Exception: " part, we just subtract 26 instead of searching for it. Because sometimes it's not at the start, e.g.:
 -- "Unexpected error, will try to restart main thread: Code: 341. DB::Exception: Unexpected error: Code: 57. DB::Exception:[...]"
-select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 3) from logs
-    where message ilike '%DB::Exception%' and if(length(regexpExtract(message, '(.*)\\([A-Z0-9_]+\\)')) as pref > 0, pref, length(message)) < 30 + 26 and message_format_string not in known_short_messages;
+select 'exceptions shorter than 30',
+    greatest(uniqExact(message_format_string), 3) AS c,
+    c = 3 ? [] : groupUniqArray(message_format_string)
+    from logs
+    where message ilike '%DB::Exception%' and if(length(extract(message, '(.*)\\([A-Z0-9_]+\\)')) as pref > 0, pref, length(message)) < 30 + 26 and message_format_string not in known_short_messages;
 
 
 -- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold
-select 'noisy messages', max2((select count() from logs group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.30);
+select 'noisy messages',
+    greatest((select count() from logs group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.30);
 
 -- Same as above, but excluding Test level (actually finds top 1 Trace message)
 with ('Access granted: {}{}', '{} -> {}') as frequent_in_tests
-select 'noisy Trace messages', max2((select count() from logs where level!='Test' and message_format_string not in frequent_in_tests
-    group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.16);
+select 'noisy Trace messages',
+    greatest((select count() from logs where level!='Test' and message_format_string not in frequent_in_tests
+        group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.16);
 
 -- Same as above for Debug
-select 'noisy Debug messages', max2((select count() from logs where level <= 'Debug' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.09);
+select 'noisy Debug messages',
+    greatest((select count() from logs where level <= 'Debug' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.09);
 
 -- Same as above for Info
-select 'noisy Info messages', max2((select count() from logs where level <= 'Information' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.05);
+select 'noisy Info messages',
+    greatest((select count() from logs where level <= 'Information' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.05);
 
 -- Same as above for Warning
 with ('Not enabled four letter command {}') as frequent_in_tests
-select 'noisy Warning messages', max2(coalesce((select countOrDefault() from logs where level = 'Warning' and message_format_string not in frequent_in_tests
+select 'noisy Warning messages',
+    greatest(coalesce((select count() from logs where level = 'Warning' and message_format_string not in frequent_in_tests
     group by message_format_string order by count() desc limit 1), 0) / (select count() from logs), 0.01);
 
 -- Same as above for Error
-select 'noisy Error messages', max2(coalesce((select countOrDefault() from logs where level = 'Error' group by message_format_string order by count() desc limit 1), 0) / (select count() from logs), 0.02);
+select 'noisy Error messages',
+    greatest(coalesce((select count() from logs where level = 'Error' group by message_format_string order by count() desc limit 1), 0) / (select count() from logs), 0.02);
 
 select 'no Fatal messages', count() from logs where level = 'Fatal';
 
 
 -- Avoid too noisy messages: limit the number of messages with high frequency
-select 'number of too noisy messages', max2(count(), 3) from (select count() / (select count() from logs) as freq, message_format_string from logs group by message_format_string having freq > 0.10);
-select 'number of noisy messages', max2(count(), 10) from (select count() / (select count() from logs) as freq, message_format_string from logs group by message_format_string having freq > 0.05);
+select 'number of too noisy messages',
+    greatest(count(), 3) from (select count() / (select count() from logs) as freq, message_format_string from logs group by message_format_string having freq > 0.10);
+select 'number of noisy messages',
+    greatest(count(), 10) from (select count() / (select count() from logs) as freq, message_format_string from logs group by message_format_string having freq > 0.05);
 
 -- Each message matches its pattern (returns 0 rows)
--- FIXME maybe we should make it stricter ('Code:%Exception: '||s||'%'), but it's not easy because of addMessage
-select 'incorrect patterns', max2(countDistinct(message_format_string), 15) from (
+-- Note: maybe we should make it stricter ('Code:%Exception: '||s||'%'), but it's not easy because of addMessage
+select 'incorrect patterns', greatest(uniqExact(message_format_string), 15) from (
     select message_format_string, any(message) as any_message from logs
     where ((rand() % 8) = 0)
     and message not like (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') as s)

From eabdc76e7c648c84c46fff8a0dbf7fd1a82f4bba Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 23 Nov 2023 15:25:58 +0100
Subject: [PATCH 0968/1097] Fix test
 test_replicated_merge_tree_encryption_codec/test.py::test_different_keys

---
 .../test_replicated_merge_tree_encryption_codec/test.py       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/test.py b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
index d2dbc7c5466..39eb4ee02eb 100644
--- a/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
+++ b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
@@ -91,7 +91,9 @@ def test_different_keys():
     copy_keys(node2, "key_b")
     create_table()
 
-    insert_data()
+    # Insert two blocks without duplicated blocks to force each replica to actually fetch parts from another replica.
+    node1.query("INSERT INTO tbl VALUES (1, 'str1')")
+    node2.query("INSERT INTO tbl VALUES (2, 'str2')")
     node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
 
     assert "BAD_DECRYPT" in node1.query_and_get_error("SELECT * FROM tbl")

From cf717bdb621aac06d9a0df62b8384b9c26e181de Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 23 Nov 2023 14:51:36 +0000
Subject: [PATCH 0969/1097] Fix segfault after ALTER UPDATE with Nullable
 MATERIALIZED column

---
 src/Interpreters/MutationsInterpreter.cpp         |  8 +++++++-
 ...egfault_nullable_materialized_update.reference |  2 ++
 ...2919_segfault_nullable_materialized_update.sql | 15 +++++++++++++++
 3 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02919_segfault_nullable_materialized_update.reference
 create mode 100644 tests/queries/0_stateless/02919_segfault_nullable_materialized_update.sql

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 0ace0a8b79c..9138be99b4b 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -690,9 +690,15 @@ void MutationsInterpreter::prepare(bool dry_run)
                 {
                     if (column.default_desc.kind == ColumnDefaultKind::Materialized)
                     {
+                        auto type_literal = std::make_shared<ASTLiteral>(column.type->getName());
+
+                        auto materialized_column = makeASTFunction("_CAST",
+                            column.default_desc.expression->clone(),
+                            type_literal);
+
                         stages.back().column_to_updated.emplace(
                             column.name,
-                            column.default_desc.expression->clone());
+                            materialized_column);
                     }
                 }
             }
diff --git a/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.reference b/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.reference
new file mode 100644
index 00000000000..e9f3e9ff01d
--- /dev/null
+++ b/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.reference
@@ -0,0 +1,2 @@
+1	1
+0	0
diff --git a/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.sql b/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.sql
new file mode 100644
index 00000000000..78a8ade99ab
--- /dev/null
+++ b/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS crash_02919;
+
+CREATE TABLE crash_02919 (
+    b Int64,
+    c Int64 MATERIALIZED b
+)
+ENGINE = MergeTree
+ORDER BY tuple();
+
+INSERT INTO crash_02919 VALUES (1);
+SELECT b, c FROM crash_02919;
+ALTER TABLE crash_02919 UPDATE b = 0 WHERE 1=1 SETTINGS mutations_sync = 1;
+SELECT b, c FROM crash_02919;
+
+DROP TABLE crash_02919;

From 956a8add35264e18bdaf8ce0039ad0d5a538f5b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hromada?= <tomas@logtail.com>
Date: Thu, 23 Nov 2023 15:57:19 +0100
Subject: [PATCH 0970/1097] Update other-functions.md

Fix `initializeAgregation` typo
---
 docs/en/sql-reference/functions/other-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 35fd5089bf0..4c103274f43 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -1556,7 +1556,7 @@ initializeAggregation (aggregate_function, arg1, arg2, ..., argN)
 
 - Result of aggregation for every row passed to the function.
 
-The return type is the same as the return type of function, that `initializeAgregation` takes as first argument.
+The return type is the same as the return type of function, that `initializeAggregation` takes as first argument.
 
 **Example**
 

From d81fb9d1175e8dd9080500ac399bcfb93ab10111 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 23 Nov 2023 16:38:15 +0100
Subject: [PATCH 0971/1097] Remove partial results from build matrix for stress
 tests

The feature had been reverted in #55893

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/ci/stress.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/ci/stress.py b/tests/ci/stress.py
index ae918363df7..14b958e5d7a 100755
--- a/tests/ci/stress.py
+++ b/tests/ci/stress.py
@@ -59,12 +59,6 @@ def get_options(i: int, upgrade_check: bool) -> str:
         client_options.append("implicit_transaction=1")
         client_options.append("throw_on_unsupported_query_inside_transaction=0")
 
-    if random.random() < 0.1:
-        client_options.append("allow_experimental_partial_result=1")
-        client_options.append(
-            f"partial_result_update_duration_ms={random.randint(10, 1000)}"
-        )
-
     if random.random() < 0.1:
         client_options.append("optimize_trivial_approximate_count_query=1")
 

From 5abed8f8a8091633069b4c1f6883100fd8b41fd3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 23 Nov 2023 17:00:02 +0100
Subject: [PATCH 0972/1097] Better exit codes from the server

---
 programs/server/Server.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index efb5ccb3203..53e2db91f27 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1970,7 +1970,7 @@ catch (...)
 {
     /// Poco does not provide stacktrace.
     tryLogCurrentException("Application");
-    throw;
+    return getCurrentExceptionCode();
 }
 
 std::unique_ptr<TCPProtocolStackFactory> Server::buildProtocolStackFromConfig(

From bd4ffec361234da945136cd33039e756085dc8ce Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 23 Nov 2023 17:00:57 +0100
Subject: [PATCH 0973/1097] Better exit codes from the server

---
 programs/keeper/Keeper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 6df1bbaa329..8a4ec646ce1 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -556,7 +556,7 @@ catch (...)
 {
     /// Poco does not provide stacktrace.
     tryLogCurrentException("Application");
-    throw;
+    return getCurrentExceptionCode();
 }
 
 
From d59b322619e985a314b3afa2d9441c9f5ae1cda5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 23 Nov 2023 17:18:11 +0100
Subject: [PATCH 0974/1097] Add a test

---
 .../0_stateless/02922_server_exit_code.reference     |  1 +
 tests/queries/0_stateless/02922_server_exit_code.sh  | 12 ++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 tests/queries/0_stateless/02922_server_exit_code.reference
 create mode 100755 tests/queries/0_stateless/02922_server_exit_code.sh

diff --git a/tests/queries/0_stateless/02922_server_exit_code.reference b/tests/queries/0_stateless/02922_server_exit_code.reference
new file mode 100644
index 00000000000..7326d960397
--- /dev/null
+++ b/tests/queries/0_stateless/02922_server_exit_code.reference
@@ -0,0 +1 @@
+Ok
diff --git a/tests/queries/0_stateless/02922_server_exit_code.sh b/tests/queries/0_stateless/02922_server_exit_code.sh
new file mode 100755
index 00000000000..60049902410
--- /dev/null
+++ b/tests/queries/0_stateless/02922_server_exit_code.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# We will check that the server's exit code corresponds to the exception code if it was terminated after exception.
+# In this example, we provide an invalid path to the server's config, ignore its logs and check the exit code.
+# The exception code is 400 = CANNOT_STAT, so the exit code will be 400 % 256.
+
+${CLICKHOUSE_SERVER_BINARY} -- --path /dev/null 2>/dev/null; [[ "$?" == "$((400 % 256))" ]] && echo 'Ok' || echo 'Fail'

From 3b4f70a811936ac2f5280435d0751c981401d65b Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 23 Nov 2023 16:22:51 +0000
Subject: [PATCH 0975/1097] Minor changes in test_check_table

---
 tests/integration/helpers/cluster.py          |  4 +-
 .../helpers/corrupt_part_data_on_disk.py      | 18 ++---
 tests/integration/test_check_table/test.py    | 76 +++++++++++--------
 3 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index cbc511628f0..5e4bb32cf94 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -4130,14 +4130,14 @@ class ClickHouseInstance:
                 [
                     "bash",
                     "-c",
-                    "echo 'ATTACH DATABASE system ENGINE=Ordinary' > /var/lib/clickhouse/metadata/system.sql",
+                    "if [ ! -f /var/lib/clickhouse/metadata/system.sql ]; then echo 'ATTACH DATABASE system ENGINE=Ordinary' > /var/lib/clickhouse/metadata/system.sql; fi",
                 ]
             )
             self.exec_in_container(
                 [
                     "bash",
                     "-c",
-                    "echo 'ATTACH DATABASE system ENGINE=Ordinary' > /var/lib/clickhouse/metadata/default.sql",
+                    "if [ ! -f /var/lib/clickhouse/metadata/default.sql ]; then echo 'ATTACH DATABASE system ENGINE=Ordinary' > /var/lib/clickhouse/metadata/default.sql; fi",
                 ]
             )
         self.exec_in_container(
diff --git a/tests/integration/helpers/corrupt_part_data_on_disk.py b/tests/integration/helpers/corrupt_part_data_on_disk.py
index e253ce23d83..a84a6e825e6 100644
--- a/tests/integration/helpers/corrupt_part_data_on_disk.py
+++ b/tests/integration/helpers/corrupt_part_data_on_disk.py
@@ -1,19 +1,21 @@
-def corrupt_part_data_on_disk(node, table, part_name):
+def corrupt_part_data_on_disk(node, table, part_name, file_ext=".bin", database=None):
     part_path = node.query(
-        "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
-            table, part_name
+        "SELECT path FROM system.parts WHERE table = '{}' and name = '{}' {}".format(
+            table,
+            part_name,
+            f"AND database = '{database}'" if database is not None else "",
         )
     ).strip()
 
-    corrupt_part_data_by_path(node, part_path)
+    corrupt_part_data_by_path(node, part_path, file_ext)
 
 
-def corrupt_part_data_by_path(node, part_path):
+def corrupt_part_data_by_path(node, part_path, file_ext=".bin"):
     print("Corrupting part", part_path, "at", node.name)
     print(
         "Will corrupt: ",
         node.exec_in_container(
-            ["bash", "-c", "cd {p} && ls *.bin | head -n 1".format(p=part_path)]
+            ["bash", "-c", f"cd {part_path} && ls *{file_ext} | head -n 1"]
         ),
     )
 
@@ -21,9 +23,7 @@ def corrupt_part_data_by_path(node, part_path):
         [
             "bash",
             "-c",
-            "cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c 'echo \"1\" >> $1' -- {{}}".format(
-                p=part_path
-            ),
+            f"cd {part_path} && ls *{file_ext} | head -n 1 | xargs -I{{}} sh -c 'truncate -s -1 $1' -- {{}}",
         ],
         privileged=True,
     )
diff --git a/tests/integration/test_check_table/test.py b/tests/integration/test_check_table/test.py
index 70cadbc97e2..021977fb6b6 100644
--- a/tests/integration/test_check_table/test.py
+++ b/tests/integration/test_check_table/test.py
@@ -3,6 +3,7 @@ import pytest
 import concurrent
 from helpers.cluster import ClickHouseCluster
 from helpers.client import QueryRuntimeException
+from helpers.corrupt_part_data_on_disk import corrupt_part_data_on_disk
 
 cluster = ClickHouseCluster(__file__)
 
@@ -21,22 +22,6 @@ def started_cluster():
         cluster.shutdown()
 
 
-def corrupt_data_part_on_disk(node, database, table, part_name):
-    part_path = node.query(
-        f"SELECT path FROM system.parts WHERE database = '{database}' AND table = '{table}' AND name = '{part_name}'"
-    ).strip()
-    node.exec_in_container(
-        [
-            "bash",
-            "-c",
-            "cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c 'echo \"1\" >> $1' -- {{}}".format(
-                p=part_path
-            ),
-        ],
-        privileged=True,
-    )
-
-
 def remove_checksums_on_disk(node, database, table, part_name):
     part_path = node.query(
         f"SELECT path FROM system.parts WHERE database = '{database}' AND table = '{table}' AND name = '{part_name}'"
@@ -59,14 +44,15 @@ def remove_part_from_disk(node, table, part_name):
     )
 
 
-def test_check_normal_table_corruption(started_cluster):
+@pytest.mark.parametrize("merge_tree_settings", [""])
+def test_check_normal_table_corruption(started_cluster, merge_tree_settings):
     node1.query("DROP TABLE IF EXISTS non_replicated_mt")
 
     node1.query(
-        """
+        f"""
         CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
         ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
-        SETTINGS min_bytes_for_wide_part=0;
+        {merge_tree_settings};
     """
     )
 
@@ -105,7 +91,9 @@ def test_check_normal_table_corruption(started_cluster):
 
     assert node1.query("SELECT COUNT() FROM non_replicated_mt") == "2\n"
 
-    corrupt_data_part_on_disk(node1, "default", "non_replicated_mt", "201902_1_1_0")
+    corrupt_part_data_on_disk(
+        node1, "non_replicated_mt", "201902_1_1_0", database="default"
+    )
 
     assert node1.query(
         "CHECK TABLE non_replicated_mt",
@@ -129,7 +117,9 @@ def test_check_normal_table_corruption(started_cluster):
         == "201901_2_2_0\t1\t\n"
     )
 
-    corrupt_data_part_on_disk(node1, "default", "non_replicated_mt", "201901_2_2_0")
+    corrupt_part_data_on_disk(
+        node1, "non_replicated_mt", "201901_2_2_0", database="default"
+    )
 
     remove_checksums_on_disk(node1, "default", "non_replicated_mt", "201901_2_2_0")
 
@@ -139,16 +129,23 @@ def test_check_normal_table_corruption(started_cluster):
     ).strip().split("\t")[0:2] == ["201901_2_2_0", "0"]
 
 
-def test_check_replicated_table_simple(started_cluster):
+@pytest.mark.parametrize("merge_tree_settings, zk_path_suffix", [("", "_0")])
+def test_check_replicated_table_simple(
+    started_cluster, merge_tree_settings, zk_path_suffix
+):
     for node in [node1, node2]:
-        node.query("DROP TABLE IF EXISTS replicated_mt")
+        node.query("DROP TABLE IF EXISTS replicated_mt SYNC")
 
         node.query(
             """
         CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
-        ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
+        ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_{zk_path_suffix}', '{replica}')
+        PARTITION BY toYYYYMM(date) ORDER BY id
+        {merge_tree_settings}
             """.format(
-                replica=node.name
+                replica=node.name,
+                zk_path_suffix=zk_path_suffix,
+                merge_tree_settings=merge_tree_settings,
             )
         )
 
@@ -220,16 +217,32 @@ def test_check_replicated_table_simple(started_cluster):
     )
 
 
-def test_check_replicated_table_corruption(started_cluster):
+@pytest.mark.parametrize(
+    "merge_tree_settings, zk_path_suffix, part_file_ext",
+    [
+        (
+            "",
+            "_0",
+            ".bin",
+        )
+    ],
+)
+def test_check_replicated_table_corruption(
+    started_cluster, merge_tree_settings, zk_path_suffix, part_file_ext
+):
     for node in [node1, node2]:
-        node.query_with_retry("DROP TABLE IF EXISTS replicated_mt_1")
+        node.query_with_retry("DROP TABLE IF EXISTS replicated_mt_1 SYNC")
 
         node.query_with_retry(
             """
         CREATE TABLE replicated_mt_1(date Date, id UInt32, value Int32)
-        ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_1', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
+        ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_1_{zk_path_suffix}', '{replica}')
+        PARTITION BY toYYYYMM(date) ORDER BY id
+        {merge_tree_settings}
             """.format(
-                replica=node.name
+                replica=node.name,
+                merge_tree_settings=merge_tree_settings,
+                zk_path_suffix=zk_path_suffix,
             )
         )
 
@@ -248,7 +261,10 @@ def test_check_replicated_table_corruption(started_cluster):
         "SELECT name from system.parts where table = 'replicated_mt_1' and partition_id = '201901' and active = 1"
     ).strip()
 
-    corrupt_data_part_on_disk(node1, "default", "replicated_mt_1", part_name)
+    corrupt_part_data_on_disk(
+        node1, "replicated_mt_1", part_name, part_file_ext, database="default"
+    )
+
     assert node1.query(
         "CHECK TABLE replicated_mt_1 PARTITION 201901",
         settings={"check_query_single_value_result": 0, "max_threads": 1},

From 64d6fe352010fc0dc23ab5c22a493bd2379943a2 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 23 Nov 2023 16:48:40 +0000
Subject: [PATCH 0976/1097] Mark select() as harmful function

---
 base/harmful/harmful.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c
index 78796ca0c05..74fcf769683 100644
--- a/base/harmful/harmful.c
+++ b/base/harmful/harmful.c
@@ -145,6 +145,7 @@ TRAP(qecvt)
 TRAP(qfcvt)
 TRAP(register_printf_function)
 TRAP(seed48)
+TRAP(select)
 //TRAP(setenv)
 TRAP(setfsent)
 TRAP(setgrent)

From 137bf2cf9c9d8e192a18a05aba22fe240665d639 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 23 Nov 2023 17:51:55 +0100
Subject: [PATCH 0977/1097] Whitespaces

---
 programs/server/config.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index 7003111c193..10db7444316 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -830,13 +830,13 @@
                  And also (and which is more important), the initial_user will
                  be used as current user for the query.
 
-                 Right now the protocol is pretty simple and it only takes into account:
+                 Right now the protocol is pretty simple, and it only takes into account:
                  - cluster name
                  - query
 
-                 Also it will be nice if the following will be implemented:
-                 - source hostname (see interserver_http_host), but then it will depends from DNS,
-                   it can use IP address instead, but then the you need to get correct on the initiator node.
+                 Also, it will be nice if the following will be implemented:
+                 - source hostname (see interserver_http_host), but then it will depend on DNS,
+                   it can use IP address instead, but then you need to get correct on the initiator node.
                  - target hostname / ip address (same notes as for source hostname)
                  - time-based security tokens
             -->

From b62e9d3a1a96603f110700eda87fb7c8f025a3c0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 23 Nov 2023 18:13:12 +0100
Subject: [PATCH 0978/1097] Address review comments

---
 programs/client/Client.cpp     | 3 +--
 programs/keeper/Keeper.cpp     | 3 ++-
 programs/local/LocalServer.cpp | 2 +-
 programs/server/Server.cpp     | 3 ++-
 src/Client/ClientBase.cpp      | 2 +-
 src/Client/ClientBase.h        | 2 +-
 6 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 7f48a9987c7..d2527ad0c98 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -325,7 +325,7 @@ try
 
     processConfig();
     adjustSettings();
-    initTtyBuffer(toProgressOption(config().getString("progress", "default")));
+    initTTYBuffer(toProgressOption(config().getString("progress", "default")));
 
     {
         // All that just to set DB::CurrentThread::get().getGlobalContext()
@@ -1459,7 +1459,6 @@ int mainEntryClickHouseClient(int argc, char ** argv)
         DB::Client client;
         // Initialize command line options
         client.init(argc, argv);
-        /// Initialize config file
         return client.run();
     }
     catch (const DB::Exception & e)
diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 8a4ec646ce1..e04e669abae 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -556,7 +556,8 @@ catch (...)
 {
     /// Poco does not provide stacktrace.
     tryLogCurrentException("Application");
-    return getCurrentExceptionCode();
+    auto code = getCurrentExceptionCode();
+    return code ? code : -1;
 }
 
 
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 80ab173c150..f3b84fa3eb1 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -495,7 +495,7 @@ try
 
     processConfig();
     adjustSettings();
-    initTtyBuffer(toProgressOption(config().getString("progress", "default")));
+    initTTYBuffer(toProgressOption(config().getString("progress", "default")));
 
     applyCmdSettings(global_context);
 
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 53e2db91f27..eeaceb8efe5 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1970,7 +1970,8 @@ catch (...)
 {
     /// Poco does not provide stacktrace.
     tryLogCurrentException("Application");
-    return getCurrentExceptionCode();
+    auto code = getCurrentExceptionCode();
+    return code ? code : -1;
 }
 
 std::unique_ptr<TCPProtocolStackFactory> Server::buildProtocolStackFromConfig(
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index bb10b104d3e..b513e623829 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -722,7 +722,7 @@ void ClientBase::adjustSettings()
     global_context->setSettings(settings);
 }
 
-void ClientBase::initTtyBuffer(ProgressOption progress)
+void ClientBase::initTTYBuffer(ProgressOption progress)
 {
     if (tty_buf)
         return;
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index 9fde23cf775..4eed8e0ace3 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -185,7 +185,7 @@ protected:
     /// Adjust some settings after command line options and config had been processed.
     void adjustSettings();
 
-    void initTtyBuffer(ProgressOption progress);
+    void initTTYBuffer(ProgressOption progress);
 
     /// Should be one of the first, to be destroyed the last,
     /// since other members can use them.

From 673dec6b608f4d8be995cf0b614b372852e62e75 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 23 Nov 2023 17:30:59 +0000
Subject: [PATCH 0979/1097] Add pselect() to harmful functions as well

---
 base/harmful/harmful.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c
index 74fcf769683..6526b7e577e 100644
--- a/base/harmful/harmful.c
+++ b/base/harmful/harmful.c
@@ -146,6 +146,7 @@ TRAP(qfcvt)
 TRAP(register_printf_function)
 TRAP(seed48)
 TRAP(select)
+TRAP(pselect)
 //TRAP(setenv)
 TRAP(setfsent)
 TRAP(setgrent)

From 77f27cf10ff17a479c27adf1c1fa6757437ee708 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 23 Nov 2023 21:02:31 +0300
Subject: [PATCH 0980/1097] Revert "Resubmit 01600_parts_types_metrics test
 (possibly without flakiness)"

---
 .../01600_parts_types_metrics.reference       |  3 --
 .../0_stateless/01600_parts_types_metrics.sh  | 49 -------------------
 2 files changed, 52 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01600_parts_types_metrics.reference
 delete mode 100755 tests/queries/0_stateless/01600_parts_types_metrics.sh

diff --git a/tests/queries/0_stateless/01600_parts_types_metrics.reference b/tests/queries/0_stateless/01600_parts_types_metrics.reference
deleted file mode 100644
index e8183f05f5d..00000000000
--- a/tests/queries/0_stateless/01600_parts_types_metrics.reference
+++ /dev/null
@@ -1,3 +0,0 @@
-1
-1
-1
diff --git a/tests/queries/0_stateless/01600_parts_types_metrics.sh b/tests/queries/0_stateless/01600_parts_types_metrics.sh
deleted file mode 100755
index f0914b53748..00000000000
--- a/tests/queries/0_stateless/01600_parts_types_metrics.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-s3-storage, no-asan, long, no-parallel
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-set -e
-set -o pipefail
-
-# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
-verify_sql="SELECT
-    (SELECT sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) =
-    (SELECT countIf(part_type = 'Compact'), countIf(part_type = 'Wide')
-        FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))"
-
-# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time.
-# So, there is inherent race condition (especially in fasttest that runs tests in parallel).
-#
-# But it should get the expected result eventually.
-# In case of test failure, this code will do infinite loop and timeout.
-verify()
-{
-    for ((i = 0; i < 100; ++i)); do
-        result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" )
-        if [ "$result" = "1" ]; then
-            echo 1
-            return
-        fi
-        sleep 0.1
-    done
-}
-
-$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS data_01600"
-# Compact  - (5..10]
-# Wide     - >10
-$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10, index_granularity = 8192, index_granularity_bytes = '10Mi'"
-
-# Compact
-$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Compact', number FROM system.numbers LIMIT 6"
-verify
-
-# Wide
-$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Wide', number FROM system.numbers LIMIT 11 OFFSET 6"
-verify
-
-# DROP and check
-$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE data_01600"
-verify

From 36d27d171360471f3bf3fd5b1fb0ef8ac4989216 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 23 Nov 2023 19:28:58 +0100
Subject: [PATCH 0981/1097] Improve the cherry-pick PR description

---
 tests/ci/cherry_pick.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py
index 9ee63a98f94..e36c31b2089 100644
--- a/tests/ci/cherry_pick.py
+++ b/tests/ci/cherry_pick.py
@@ -67,15 +67,17 @@ The check results does not matter at this step - you can safely ignore them.
 ### Note
 
 This pull-request will be merged automatically as it reaches the mergeable state, \
-**do not merge it manually**.
+**do not merge it manually**. It's 100% safe, but completely meaningless.
 
 ### If the PR was closed and then reopened
 
-If it stuck, check {pr_url} for `{backport_created_label}` and delete it if \
-necessary. Manually merging will do nothing, since `{backport_created_label}` \
-prevents the original PR {pr_url} from being processed.
+If it stuck (e.g. for a day), check {pr_url} for `{backport_created_label}` *label* and \
+delete it if necessary. Manually merging will do nothing, since \
+`{backport_created_label}` *label* prevents the original PR {pr_url} from being \
+processed.
 
-If you want to recreate the PR: delete the `{label_cherrypick}` label and delete this branch.
+If the cherry-pick PR is completely screwed, and you want to recreate it: delete the \
+`{label_cherrypick}` label and delete this branch.
 You may also need to delete the `{backport_created_label}` label from the original PR.
 """
     BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \

From aee3c9651b67562889d703a44f8b3acd886db99a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Nov 2023 18:18:57 +0000
Subject: [PATCH 0982/1097] Fix ubsan bug

---
 src/Functions/fromDaysSinceYearZero.cpp                       | 4 +++-
 .../queries/0_stateless/02907_fromDaysSinceYearZero.reference | 3 +++
 tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql     | 4 ++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp
index 628949570a0..a21d0cc25bf 100644
--- a/src/Functions/fromDaysSinceYearZero.cpp
+++ b/src/Functions/fromDaysSinceYearZero.cpp
@@ -97,7 +97,9 @@ public:
             auto value = src_data[i];
             if (value < 0)
                 throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Expected a non-negative integer, got: {}", std::to_string(value));
-            dst_data[i] = static_cast<RawReturnType>(value) - static_cast<RawReturnType>(ToDaysSinceYearZeroImpl::DAYS_BETWEEN_YEARS_0_AND_1970);
+            /// prevent potential signed integer overflows (aka. undefined behavior) with Date32 results
+            auto value_uint64 = static_cast<UInt64>(value); /// NOLINT(bugprone-signed-char-misuse,cert-str34-c)
+            dst_data[i] = static_cast<RawReturnType>(value_uint64 - ToDaysSinceYearZeroImpl::DAYS_BETWEEN_YEARS_0_AND_1970);
         }
     }
 };
diff --git a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
index 6f62c3f8403..999505f54c9 100644
--- a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
+++ b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
@@ -18,5 +18,8 @@
 719529	719529	1970-01-02	1970-01-02	1970-01-02	1970-01-02
 -- NULL handling
 \N	\N
+-- ubsan bug
+2299-12-31
+2299-12-31
 -- Alias
 1973-10-01
diff --git a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
index 44fd498fce8..9f356080fe8 100644
--- a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
+++ b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.sql
@@ -33,5 +33,9 @@ SELECT toUInt64(719529) AS u, toInt64(719529) AS s, fromDaysSinceYearZero(u), fr
 SELECT '-- NULL handling';
 SELECT fromDaysSinceYearZero(NULL), fromDaysSinceYearZero32(NULL);
 
+SELECT '-- ubsan bugs';
+SELECT fromDaysSinceYearZero32(2147483648);
+SELECT fromDaysSinceYearZero32(3);
+
 SELECT '-- Alias';
 SELECT FROM_DAYS(1);

From 77d48e4d55ee5d001d597c653c15aaf199aeb560 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Nov 2023 14:51:49 +0000
Subject: [PATCH 0983/1097] "optimise" --> "optimize". Sorry, friends from the
 UK.

---
 src/Core/Protocol.h                                           | 2 +-
 src/Core/Settings.h                                           | 4 ++--
 src/Functions/greatCircleDistance.cpp                         | 2 +-
 src/Functions/randDistribution.cpp                            | 2 +-
 src/Interpreters/TreeRewriter.cpp                             | 2 +-
 src/Processors/QueryPlan/AggregatingStep.h                    | 2 +-
 src/Processors/QueryPlan/MergingAggregatedStep.cpp            | 2 +-
 src/Processors/QueryPlan/Optimizations/Optimizations.h        | 2 +-
 .../QueryPlan/Optimizations/QueryPlanOptimizationSettings.h   | 4 ++--
 .../QueryPlan/Optimizations/optimizeReadInOrder.cpp           | 2 +-
 src/Processors/QueryPlan/ReadFromMergeTree.cpp                | 2 +-
 src/Processors/QueryPlan/ReadFromMergeTree.h                  | 4 ++--
 src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h          | 2 +-
 13 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h
index cc642a7f501..441e22f4a16 100644
--- a/src/Core/Protocol.h
+++ b/src/Core/Protocol.h
@@ -97,7 +97,7 @@ namespace Protocol
         };
 
         /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
-        /// would always be true because of compiler optimisation. That would lead to out-of-bounds error
+        /// would always be true because of compiler optimization. That would lead to out-of-bounds error
         /// if the packet is invalid.
         /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values
         inline const char * toString(UInt64 packet)
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 6dac828aafa..af536cc2846 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -687,8 +687,8 @@ class IColumn;
     M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
     M(Bool, query_plan_execute_functions_after_sorting, true, "Allow to re-order functions after sorting", 0) \
     M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \
-    M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimisation", 0) \
-    M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimisation", 0) \
+    M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimization", 0) \
+    M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimization", 0) \
     M(Bool, query_plan_remove_redundant_sorting, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries", 0) \
     M(Bool, query_plan_remove_redundant_distinct, true, "Remove redundant Distinct step in query plan", 0) \
     M(Bool, query_plan_enable_multithreading_after_window_functions, true, "Enable multithreading after evaluating window functions to allow parallel stream processing", 0) \
diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp
index db1f777fa91..d1d1a101187 100644
--- a/src/Functions/greatCircleDistance.cpp
+++ b/src/Functions/greatCircleDistance.cpp
@@ -35,7 +35,7 @@ namespace ErrorCodes
   * https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
   * Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
   * Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
-  * The main idea of this implementation is optimisations based on Taylor series, trigonometric identity
+  * The main idea of this implementation is optimizations based on Taylor series, trigonometric identity
   *  and calculated constants once for cosine, arcsine(sqrt) and look up table.
   */
 
diff --git a/src/Functions/randDistribution.cpp b/src/Functions/randDistribution.cpp
index 725ae0b4e64..db101486de8 100644
--- a/src/Functions/randDistribution.cpp
+++ b/src/Functions/randDistribution.cpp
@@ -196,7 +196,7 @@ struct PoissonDistribution
   * Accepts only constant arguments
   * Similar to the functions rand and rand64 an additional 'tag' argument could be added to the
   * end of arguments list (this argument will be ignored) which will guarantee that functions are not sticked together
-  * during optimisations.
+  * during optimizations.
   * Example: SELECT randNormal(0, 1, 1), randNormal(0, 1, 2) FROM numbers(10)
   * This query will return two different columns
   */
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 6172ef06b9a..c3cc1c8edaf 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -992,7 +992,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
 
             if (required.contains(name))
             {
-                /// Optimisation: do not add columns needed only in JOIN ON section.
+                /// Optimization: do not add columns needed only in JOIN ON section.
                 if (columns_context.nameInclusion(name) > analyzed_join->rightKeyInclusion(name))
                     analyzed_join->addJoinedColumn(joined_column);
 
diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h
index 3d128d788ac..f446ecec846 100644
--- a/src/Processors/QueryPlan/AggregatingStep.h
+++ b/src/Processors/QueryPlan/AggregatingStep.h
@@ -91,7 +91,7 @@ private:
     bool storage_has_evenly_distributed_read;
     bool group_by_use_nulls;
 
-    /// Both sort descriptions are needed for aggregate-in-order optimisation.
+    /// Both sort descriptions are needed for aggregate-in-order optimization.
     /// Both sort descriptions are subset of GROUP BY key columns (or monotonic functions over it).
     /// Sort description for merging is a sort description for input and a prefix of group_by_sort_description.
     /// group_by_sort_description contains all GROUP BY keys and is used for final merging of aggregated data.
diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp
index c724de02de8..476f5541812 100644
--- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp
+++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp
@@ -77,7 +77,7 @@ void MergingAggregatedStep::applyOrder(SortDescription sort_description, DataStr
     input_stream.sort_scope = sort_scope;
     input_stream.sort_description = sort_description;
 
-    /// Columns might be reordered during optimisation, so we better to update sort description.
+    /// Columns might be reordered during optimization, so we better to update sort description.
     group_by_sort_description = std::move(sort_description);
 
     if (memoryBoundMergingWillBeUsed() && should_produce_results_in_order_of_bucket_number)
diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
index 5bbf68a5259..1316b7895dc 100644
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -67,7 +67,7 @@ void tryRemoveRedundantSorting(QueryPlan::Node * root);
 /// Remove redundant distinct steps
 size_t tryRemoveRedundantDistinct(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
 
-/// Put some steps under union, so that plan optimisation could be applied to union parts separately.
+/// Put some steps under union, so that plan optimization could be applied to union parts separately.
 /// For example, the plan can be rewritten like:
 ///                      - Something -                    - Expression - Something -
 /// - Expression - Union - Something -     =>     - Union - Expression - Something -
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
index 0bf53d1ec4b..2b5d3bb92ca 100644
--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
@@ -27,10 +27,10 @@ struct QueryPlanOptimizationSettings
     /// if distinct in order optimization is enabled
     bool distinct_in_order = false;
 
-    /// If read-in-order optimisation is enabled
+    /// If read-in-order optimization is enabled
     bool read_in_order = true;
 
-    /// If aggregation-in-order optimisation is enabled
+    /// If aggregation-in-order optimization is enabled
     bool aggregation_in_order = false;
 
     /// If removing redundant sorting is enabled, for example, ORDER BY clauses in subqueries
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp
index 6352edddf71..433422a7c30 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp
@@ -1004,7 +1004,7 @@ void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &)
     }
 }
 
-/// This optimisation is obsolete and will be removed.
+/// This optimization is obsolete and will be removed.
 /// optimizeReadInOrder covers it.
 size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, QueryPlan::Nodes & /*nodes*/)
 {
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 617de8c8530..d3ff5221e1f 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1824,7 +1824,7 @@ Pipe ReadFromMergeTree::spreadMarkRanges(
         chassert(!is_parallel_reading_from_replicas);
 
         if (output_each_partition_through_separate_port)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Optimisation isn't supposed to be used for queries with final");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Optimization isn't supposed to be used for queries with final");
 
         /// Add columns needed to calculate the sorting expression and the sign.
         for (const auto & column : metadata_for_reading->getColumnsRequiredForSortingKey())
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h
index 35310e14416..4e38e06c6af 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.h
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.h
@@ -190,7 +190,7 @@ public:
     bool isQueryWithFinal() const;
     bool isQueryWithSampling() const;
 
-    /// Returns true if the optimisation is applicable (and applies it then).
+    /// Returns true if the optimization is applicable (and applies it then).
     bool requestOutputEachPartitionThroughSeparatePort();
     bool willOutputEachPartitionThroughSeparatePort() const { return output_each_partition_through_separate_port; }
 
@@ -255,7 +255,7 @@ private:
     size_t output_streams_limit = 0;
     const bool sample_factor_column_queried;
 
-    /// Used for aggregation optimisation (see DB::QueryPlanOptimizations::tryAggregateEachPartitionIndependently).
+    /// Used for aggregation optimization (see DB::QueryPlanOptimizations::tryAggregateEachPartitionIndependently).
     bool output_each_partition_through_separate_port = false;
 
     std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read;
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
index 2a13b7a02c0..0ce59b18818 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@@ -35,7 +35,7 @@ struct ReplicatedMergeTreeLogEntryData
         EMPTY,          /// Not used.
         GET_PART,       /// Get the part from another replica.
         ATTACH_PART,    /// Attach the part, possibly from our own replica (if found in /detached folder).
-                        /// You may think of it as a GET_PART with some optimisations as they're nearly identical.
+                        /// You may think of it as a GET_PART with some optimizations as they're nearly identical.
         MERGE_PARTS,    /// Merge the parts.
         DROP_RANGE,     /// Delete the parts in the specified partition in the specified number range.
         CLEAR_COLUMN,   /// NOTE: Deprecated. Drop specific column from specified partition.

From 4b764b8bc9b56bae16263c8220c14c64491133ca Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Nov 2023 14:53:28 +0000
Subject: [PATCH 0984/1097] Cosmetics

---
 .../QueryPlan/Optimizations/Optimizations.h           |  8 ++------
 .../Optimizations/QueryPlanOptimizationSettings.cpp   | 11 +++++++++++
 .../Optimizations/QueryPlanOptimizationSettings.h     |  6 +++---
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
index 1316b7895dc..1edde796061 100644
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -85,13 +85,9 @@ inline const auto & getOptimizations()
         {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan},
         {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down},
         {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::execute_functions_after_sorting},
-        {tryReuseStorageOrderingForWindowFunctions,
-         "reuseStorageOrderingForWindowFunctions",
-         &QueryPlanOptimizationSettings::optimize_plan},
+        {tryReuseStorageOrderingForWindowFunctions, "reuseStorageOrderingForWindowFunctions", &QueryPlanOptimizationSettings::optimize_plan},
         {tryLiftUpUnion, "liftUpUnion", &QueryPlanOptimizationSettings::optimize_plan},
-        {tryAggregatePartitionsIndependently,
-         "aggregatePartitionsIndependently",
-         &QueryPlanOptimizationSettings::aggregate_partitions_independently},
+        {tryAggregatePartitionsIndependently, "aggregatePartitionsIndependently", &QueryPlanOptimizationSettings::aggregate_partitions_independently},
         {tryRemoveRedundantDistinct, "removeRedundantDistinct", &QueryPlanOptimizationSettings::remove_redundant_distinct},
     }};
 
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
index 9ab789e1192..4d515cb7d86 100644
--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
@@ -8,20 +8,31 @@ namespace DB
 QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const Settings & from)
 {
     QueryPlanOptimizationSettings settings;
+
     settings.optimize_plan = from.query_plan_enable_optimizations;
     settings.max_optimizations_to_apply = from.query_plan_max_optimizations_to_apply;
+
     settings.filter_push_down = from.query_plan_filter_push_down;
+
     settings.execute_functions_after_sorting = from.query_plan_execute_functions_after_sorting;
+
     settings.distinct_in_order = from.optimize_distinct_in_order;
+
     settings.read_in_order = from.optimize_read_in_order && from.query_plan_read_in_order;
+
     settings.aggregation_in_order = from.optimize_aggregation_in_order && from.query_plan_aggregation_in_order;
+
     settings.remove_redundant_sorting = from.query_plan_remove_redundant_sorting;
+
     settings.aggregate_partitions_independently = from.allow_aggregate_partitions_independently;
+
     settings.remove_redundant_distinct = from.query_plan_remove_redundant_distinct;
+
     settings.optimize_projection = from.optimize_use_projections;
     settings.force_use_projection = settings.optimize_projection && from.force_optimize_projection;
     settings.force_projection_name = from.force_optimize_projection_name;
     settings.optimize_use_implicit_projections = settings.optimize_projection && from.optimize_use_implicit_projections;
+
     return settings;
 }
 
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
index 2b5d3bb92ca..496e3c86f72 100644
--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
@@ -11,13 +11,13 @@ struct Settings;
 
 struct QueryPlanOptimizationSettings
 {
+    /// If disabled, no optimization applied.
+    bool optimize_plan = true;
+
     /// If not zero, throw if too many optimizations were applied to query plan.
     /// It helps to avoid infinite optimization loop.
     size_t max_optimizations_to_apply = 0;
 
-    /// If disabled, no optimization applied.
-    bool optimize_plan = true;
-
     /// If filter push down optimization is enabled.
     bool filter_push_down = true;
 

From d750e3c004d077632c9c8f0206ccac4fba2c7e79 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 23 Nov 2023 19:44:14 +0100
Subject: [PATCH 0985/1097] fix

---
 ...eference => 02052_last_granula_adjust_logical_error.reference} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{02052_last_granula_adjust_LOGICAL_ERROR.reference => 02052_last_granula_adjust_logical_error.reference} (100%)

diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference b/tests/queries/0_stateless/02052_last_granula_adjust_logical_error.reference
similarity index 100%
rename from tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference
rename to tests/queries/0_stateless/02052_last_granula_adjust_logical_error.reference

From 194e676c70db4f109918c0bc503195289f1977ed Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Nov 2023 15:15:17 +0000
Subject: [PATCH 0986/1097] Put all query plan settings under their own
 settings

---
 src/Core/Settings.h                           |  6 +++++
 .../QueryPlan/Optimizations/Optimizations.h   | 12 +++++-----
 .../QueryPlanOptimizationSettings.cpp         | 12 ++++++++++
 .../QueryPlanOptimizationSettings.h           | 23 +++++++++++++++++--
 .../QueryPlan/Optimizations/optimizeTree.cpp  |  4 ++--
 5 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index af536cc2846..8c2c24cde8f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -684,8 +684,14 @@ class IColumn;
     \
     M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
     M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
+    M(Bool, query_plan_lift_up_array_join, true, "Allow to move array joins up in the query plan", 0) \
+    M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \
+    M(Bool, query_plan_split_filter, true, "Allow to split filters in the query plan", 0) \
+    M(Bool, query_plan_merge_expressions, true, "Allow to merge expressions in the query plan", 0) \
     M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
     M(Bool, query_plan_execute_functions_after_sorting, true, "Allow to re-order functions after sorting", 0) \
+    M(Bool, query_plan_reuse_storage_ordering_for_window_functions, true, "Allow to use the storage sorting for window functions", 0) \
+    M(Bool, query_plan_lift_up_union, true, "Allow to move UNIONs up so that more parts of the query plan can be optimized", 0) \
     M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \
     M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimization", 0) \
     M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimization", 0) \
diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
index 1edde796061..a30e9b63da4 100644
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -79,14 +79,14 @@ size_t tryAggregatePartitionsIndependently(QueryPlan::Node * node, QueryPlan::No
 inline const auto & getOptimizations()
 {
     static const std::array<Optimization, 10> optimizations = {{
-        {tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::optimize_plan},
-        {tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::optimize_plan},
-        {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::optimize_plan},
-        {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan},
+        {tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::lift_up_array_join},
+        {tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::push_down_limit},
+        {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::split_filter},
+        {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::merge_expressions},
         {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down},
         {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::execute_functions_after_sorting},
-        {tryReuseStorageOrderingForWindowFunctions, "reuseStorageOrderingForWindowFunctions", &QueryPlanOptimizationSettings::optimize_plan},
-        {tryLiftUpUnion, "liftUpUnion", &QueryPlanOptimizationSettings::optimize_plan},
+        {tryReuseStorageOrderingForWindowFunctions, "reuseStorageOrderingForWindowFunctions", &QueryPlanOptimizationSettings::reuse_storage_ordering_for_window_functions},
+        {tryLiftUpUnion, "liftUpUnion", &QueryPlanOptimizationSettings::lift_up_union},
         {tryAggregatePartitionsIndependently, "aggregatePartitionsIndependently", &QueryPlanOptimizationSettings::aggregate_partitions_independently},
         {tryRemoveRedundantDistinct, "removeRedundantDistinct", &QueryPlanOptimizationSettings::remove_redundant_distinct},
     }};
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
index 4d515cb7d86..e188cdb45ce 100644
--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
@@ -12,10 +12,22 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const
     settings.optimize_plan = from.query_plan_enable_optimizations;
     settings.max_optimizations_to_apply = from.query_plan_max_optimizations_to_apply;
 
+    settings.lift_up_array_join = from.query_plan_lift_up_array_join;
+
+    settings.push_down_limit = from.query_plan_push_down_limit;
+
+    settings.split_filter = from.query_plan_split_filter;
+
+    settings.merge_expressions = from.query_plan_merge_expressions;
+
     settings.filter_push_down = from.query_plan_filter_push_down;
 
     settings.execute_functions_after_sorting = from.query_plan_execute_functions_after_sorting;
 
+    settings.reuse_storage_ordering_for_window_functions = from.query_plan_reuse_storage_ordering_for_window_functions;
+
+    settings.lift_up_union = from.query_plan_lift_up_union;
+
     settings.distinct_in_order = from.optimize_distinct_in_order;
 
     settings.read_in_order = from.optimize_read_in_order && from.query_plan_read_in_order;
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
index 496e3c86f72..e1ff2f74701 100644
--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
@@ -18,11 +18,29 @@ struct QueryPlanOptimizationSettings
     /// It helps to avoid infinite optimization loop.
     size_t max_optimizations_to_apply = 0;
 
+    /// If moving-up-of-array-join optimization is enabled.
+    bool lift_up_array_join = true;
+
+    /// If moving-limit-down optimization is enabled.
+    bool push_down_limit = true;
+
+    /// If split-filter optimization is enabled.
+    bool split_filter = true;
+
+    /// If merge-expressions optimization is enabled.
+    bool merge_expressions = true;
+
     /// If filter push down optimization is enabled.
     bool filter_push_down = true;
 
     /// If reorder-functions-after-sorting optimization is enabled.
-    bool execute_functions_after_sorting;
+    bool execute_functions_after_sorting = true;
+
+    /// If window-functions-can-use-storage-sorting optimization is enabled.
+    bool reuse_storage_ordering_for_window_functions = true;
+
+    /// If lifting-unions-up optimization is enabled.
+    bool lift_up_union = true;
 
     /// if distinct in order optimization is enabled
     bool distinct_in_order = false;
@@ -36,9 +54,10 @@ struct QueryPlanOptimizationSettings
     /// If removing redundant sorting is enabled, for example, ORDER BY clauses in subqueries
     bool remove_redundant_sorting = true;
 
+    /// If aggregate-partitions-independently optimization is enabled.
     bool aggregate_partitions_independently = false;
 
-    /// If removing redundant distinct steps is enabled
+    /// If remove-redundant-distinct-steps optimization is enabled.
     bool remove_redundant_distinct = true;
 
     /// If reading from projection can be applied
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index 0caedff67a5..c8c95e7443f 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -41,7 +41,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, Query
     std::stack<Frame> stack;
     stack.push({.node = &root});
 
-    size_t max_optimizations_to_apply = settings.max_optimizations_to_apply;
+    const size_t max_optimizations_to_apply = settings.max_optimizations_to_apply;
     size_t total_applied_optimizations = 0;
 
     while (!stack.empty())
@@ -105,7 +105,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, Query
 
 void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes)
 {
-    size_t max_optimizations_to_apply = optimization_settings.max_optimizations_to_apply;
+    const size_t max_optimizations_to_apply = optimization_settings.max_optimizations_to_apply;
     size_t num_applied_projection = 0;
     bool has_reading_from_mt = false;
 

From e392fcab5b36960d4e0ce47fdc0f60ace4abdd36 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Nov 2023 15:42:22 +0000
Subject: [PATCH 0987/1097] Allow global and individual disabling of settings

- 'query_plan_enable_optimizations' is now a global kill switch for all
  plan-level optimizations

- even if it is true, individual optimizations can still be disabled
  individually via their respective settings
---
 src/Core/Settings.h                           |  2 +-
 .../QueryPlanOptimizationSettings.cpp         | 28 +++++++++----------
 .../QueryPlanOptimizationSettings.h           |  3 +-
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 8c2c24cde8f..2af0ffd28e1 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -682,7 +682,7 @@ class IColumn;
     M(Bool, optimize_group_by_constant_keys, true, "Optimize GROUP BY when all keys in block are constant", 0) \
     M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \
     \
-    M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
+    M(Bool, query_plan_enable_optimizations, true, "Globally enable/disable query optimization at the query plan level", 0) \
     M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
     M(Bool, query_plan_lift_up_array_join, true, "Allow to move array joins up in the query plan", 0) \
     M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
index e188cdb45ce..55b8cb5ed53 100644
--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
@@ -12,33 +12,33 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const
     settings.optimize_plan = from.query_plan_enable_optimizations;
     settings.max_optimizations_to_apply = from.query_plan_max_optimizations_to_apply;
 
-    settings.lift_up_array_join = from.query_plan_lift_up_array_join;
+    settings.lift_up_array_join = from.query_plan_enable_optimizations && from.query_plan_lift_up_array_join;
 
-    settings.push_down_limit = from.query_plan_push_down_limit;
+    settings.push_down_limit = from.query_plan_enable_optimizations && from.query_plan_push_down_limit;
 
-    settings.split_filter = from.query_plan_split_filter;
+    settings.split_filter = from.query_plan_enable_optimizations && from.query_plan_split_filter;
 
-    settings.merge_expressions = from.query_plan_merge_expressions;
+    settings.merge_expressions = from.query_plan_enable_optimizations && from.query_plan_merge_expressions;
 
-    settings.filter_push_down = from.query_plan_filter_push_down;
+    settings.filter_push_down = from.query_plan_enable_optimizations && from.query_plan_filter_push_down;
 
-    settings.execute_functions_after_sorting = from.query_plan_execute_functions_after_sorting;
+    settings.execute_functions_after_sorting = from.query_plan_enable_optimizations && from.query_plan_execute_functions_after_sorting;
 
-    settings.reuse_storage_ordering_for_window_functions = from.query_plan_reuse_storage_ordering_for_window_functions;
+    settings.reuse_storage_ordering_for_window_functions = from.query_plan_enable_optimizations && from.query_plan_reuse_storage_ordering_for_window_functions;
 
-    settings.lift_up_union = from.query_plan_lift_up_union;
+    settings.lift_up_union = from.query_plan_enable_optimizations && from.query_plan_lift_up_union;
 
-    settings.distinct_in_order = from.optimize_distinct_in_order;
+    settings.distinct_in_order = from.query_plan_enable_optimizations && from.optimize_distinct_in_order;
 
-    settings.read_in_order = from.optimize_read_in_order && from.query_plan_read_in_order;
+    settings.read_in_order = from.query_plan_enable_optimizations && from.optimize_read_in_order && from.query_plan_read_in_order;
 
-    settings.aggregation_in_order = from.optimize_aggregation_in_order && from.query_plan_aggregation_in_order;
+    settings.aggregation_in_order = from.query_plan_enable_optimizations && from.optimize_aggregation_in_order && from.query_plan_aggregation_in_order;
 
-    settings.remove_redundant_sorting = from.query_plan_remove_redundant_sorting;
+    settings.remove_redundant_sorting = from.query_plan_enable_optimizations && from.query_plan_remove_redundant_sorting;
 
-    settings.aggregate_partitions_independently = from.allow_aggregate_partitions_independently;
+    settings.aggregate_partitions_independently = from.query_plan_enable_optimizations && from.allow_aggregate_partitions_independently;
 
-    settings.remove_redundant_distinct = from.query_plan_remove_redundant_distinct;
+    settings.remove_redundant_distinct = from.query_plan_enable_optimizations && from.query_plan_remove_redundant_distinct;
 
     settings.optimize_projection = from.optimize_use_projections;
     settings.force_use_projection = settings.optimize_projection && from.force_optimize_projection;
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
index e1ff2f74701..791b8e57475 100644
--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
@@ -11,7 +11,8 @@ struct Settings;
 
 struct QueryPlanOptimizationSettings
 {
-    /// If disabled, no optimization applied.
+    /// Allows to globally disable all plan-level optimizations.
+    /// Note: Even if '= true', individual optimizations may still be disabled via below settings.
     bool optimize_plan = true;
 
     /// If not zero, throw if too many optimizations were applied to query plan.

From 682ffccccc40bd1946b296f2397affb63a703d7d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 23 Nov 2023 19:55:55 +0100
Subject: [PATCH 0988/1097] Add debugging info for 01600_parts_types_metrics on
 failures

It looks like sometimes counters do not match [1], and it does not looks
like a timing issue, since I don't see any new parts according to the
server log.

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/57074/05784b66c969e195c4513d131de95bf4828183ce/stateless_tests__release_.html

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../01600_parts_types_metrics.reference       |  3 ++
 .../0_stateless/01600_parts_types_metrics.sh  | 53 +++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 tests/queries/0_stateless/01600_parts_types_metrics.reference
 create mode 100755 tests/queries/0_stateless/01600_parts_types_metrics.sh

diff --git a/tests/queries/0_stateless/01600_parts_types_metrics.reference b/tests/queries/0_stateless/01600_parts_types_metrics.reference
new file mode 100644
index 00000000000..e8183f05f5d
--- /dev/null
+++ b/tests/queries/0_stateless/01600_parts_types_metrics.reference
@@ -0,0 +1,3 @@
+1
+1
+1
diff --git a/tests/queries/0_stateless/01600_parts_types_metrics.sh b/tests/queries/0_stateless/01600_parts_types_metrics.sh
new file mode 100755
index 00000000000..d4ffbfed613
--- /dev/null
+++ b/tests/queries/0_stateless/01600_parts_types_metrics.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+# Tags: no-s3-storage, no-asan, long, no-parallel
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -e
+set -o pipefail
+
+# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time.
+# So, there is inherent race condition (especially in fasttest that runs tests in parallel).
+#
+# But it should get the expected result eventually.
+# In case of test failure, this code will do infinite loop and timeout.
+verify()
+{
+    for ((i = 0; i < 100; ++i)); do
+        # NOTE: database = $CLICKHOUSE_DATABASE is unwanted
+        result=$( $CLICKHOUSE_CLIENT -m --query "SELECT
+            (SELECT sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) =
+            (SELECT countIf(part_type = 'Compact'), countIf(part_type = 'Wide') FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))")
+        if [ "$result" = "1" ]; then
+            echo 1
+            return
+        fi
+        sleep 0.1
+    done
+
+    echo "ERROR: metrics does not match:" >&2
+    $CLICKHOUSE_CLIENT -nm --query "
+        -- { echo }
+        SELECT sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics;
+        SELECT countIf(part_type = 'Compact'), countIf(part_type = 'Wide') FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts);
+    "
+}
+
+$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS data_01600"
+# Compact  - (5..10]
+# Wide     - >10
+$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10, index_granularity = 8192, index_granularity_bytes = '10Mi'"
+
+# Compact
+$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Compact', number FROM system.numbers LIMIT 6"
+verify
+
+# Wide
+$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Wide', number FROM system.numbers LIMIT 11 OFFSET 6"
+verify
+
+# DROP and check
+$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE data_01600"
+verify

From a8a3214185a1f36f0e0e2c36e2afd8c27c1c8a2d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Nov 2023 19:43:40 +0000
Subject: [PATCH 0989/1097] + documentation

---
 docs/en/operations/settings/settings.md | 177 ++++++++++++++++++++++++
 1 file changed, 177 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index edc1c9bdfd7..93051177ca9 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4805,6 +4805,183 @@ a	Tuple(
 
 If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis.
 
+## query_plan_enable_optimizations {#query_plan_enable_optimizations}
+
+Toggles query optimization at the query plan level.
+
+Possible values:
+
+- 0 - Disable all optimizations at the query plan level
+- 1 - Enable optimizations at the query plan level (but individual optimizations may still be disabled via their individual settings)
+
+Default value: `1`.
+
+## query_plan_max_optimizations_to_apply
+
+Limits the total number of optimizations applied to query plan, see setting [query_plan_enable_optimizations](#query_plan_enable_optimizations).
+Useful to avoid long optimization times for complex queries.
+If the actual number of optimizations exceeds this setting, an exception is thrown.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+Default value: '10000'
+
+## query_plan_lift_up_array_join
+
+Toggles a query-plan-level optimization which moves ARRAY JOINs up in the execution plan.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_push_down_limit
+
+Toggles a query-plan-level optimization which moves LIMITs down in the execution plan.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_split_filter
+
+Toggles a query-plan-level optimization which splits filters into expressions.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_merge_expressions
+
+Toggles a query-plan-level optimization which merges consecutive filters.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_filter_push_down
+
+Toggles a query-plan-level optimization which moves filters down in the execution plan.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_execute_functions_after_sorting
+
+Toggles a query-plan-level optimization which moves expressions after sorting steps.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_reuse_storage_ordering_for_window_functions
+
+Toggles a query-plan-level optimization which uses storage sorting when sorting for window functions.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_lift_up_union
+
+Toggles a query-plan-level optimization which moves larger subtrees of the query plan into union to enable further optimizations.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_distinct_in_order
+
+Toggles the distinct in-order optimization query-plan-level optimization.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_read_in_order
+
+Toggles the read in-order optimization query-plan-level optimization.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_aggregation_in_order
+
+Toggles the aggregation in-order query-plan-level optimization.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `0`.
+
+## query_plan_remove_redundant_sorting
+
+Toggles a query-plan-level optimization which removes redundant sorting steps, e.g. in subqueries.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
+## query_plan_remove_redundant_distinct
+
+Toggles a query-plan-level optimization which removes redundant DISTINCT steps.
+Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
+
+Possible values:
+
+- 0 - Disable
+- 1 - Enable
+
+Default value: `1`.
+
 ## dictionary_use_async_executor {#dictionary_use_async_executor}
 
 Execute a pipeline for reading dictionary source in several threads. It's supported only by dictionaries with local CLICKHOUSE source.

From e9f7690026265b3c1cd4e2251e1babb8aed8073e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Nov 2023 22:27:28 +0000
Subject: [PATCH 0990/1097] +s

---
 tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
index 999505f54c9..3603ebe3e0d 100644
--- a/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
+++ b/tests/queries/0_stateless/02907_fromDaysSinceYearZero.reference
@@ -18,7 +18,7 @@
 719529	719529	1970-01-02	1970-01-02	1970-01-02	1970-01-02
 -- NULL handling
 \N	\N
--- ubsan bug
+-- ubsan bugs
 2299-12-31
 2299-12-31
 -- Alias

From ace830a61823f0a760290ab4172378b643e0d21e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Nov 2023 22:56:40 +0000
Subject: [PATCH 0991/1097] Fix spelling

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 077e323ad4e..711108863cb 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -261,6 +261,7 @@ FOSDEM
 FQDN
 Failover
 FarmHash
+FileLog
 FilesystemCacheBytes
 FilesystemCacheElements
 FilesystemCacheFiles
@@ -278,7 +279,6 @@ FilesystemMainPathTotalBytes
 FilesystemMainPathTotalINodes
 FilesystemMainPathUsedBytes
 FilesystemMainPathUsedINodes
-FileLog
 FixedString
 Flink
 ForEach
@@ -441,6 +441,7 @@ Kolmogorov
 Kubernetes
 LDAP
 LGPL
+LIMITs
 LLDB
 LLVM's
 LOCALTIME
@@ -571,13 +572,13 @@ NetworkSendPackets
 NodeJs
 NuRaft
 NumHexagons
+NumPy
 NumToString
 NumToStringClassC
 NumberOfDatabases
 NumberOfDetachedByUserParts
 NumberOfDetachedParts
 NumberOfTables
-NumPy
 OFNS
 OLAP
 OLTP
@@ -588,10 +589,10 @@ OSGuestNiceTimeNormalized
 OSGuestTime
 OSGuestTimeCPU
 OSGuestTimeNormalized
+OSIOWaitMicroseconds
 OSIOWaitTime
 OSIOWaitTimeCPU
 OSIOWaitTimeNormalized
-OSIOWaitMicroseconds
 OSIdleTime
 OSIdleTimeCPU
 OSIdleTimeNormalized
@@ -1470,12 +1471,12 @@ fastops
 fcoverage
 fibonacci
 fifo
+filelog
 filesystem
 filesystemAvailable
 filesystemCapacity
 filesystemFree
 filesystems
-filelog
 finalizeAggregation
 fips
 firstLine
@@ -2348,6 +2349,7 @@ subtractSeconds
 subtractWeeks
 subtractYears
 subtree
+subtrees
 subtype
 sudo
 sumCount

From 35cf4cd1dde0bc28adf378f98721c9c3d0d37a9e Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 24 Nov 2023 15:17:39 +0800
Subject: [PATCH 0992/1097] reuse some intermediate results while executing
 array join

---
 src/Interpreters/ArrayJoinAction.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp
index 39e35314afc..54501a5b50f 100644
--- a/src/Interpreters/ArrayJoinAction.cpp
+++ b/src/Interpreters/ArrayJoinAction.cpp
@@ -206,10 +206,16 @@ Block ArrayJoinResultIterator::next()
     bool is_left = array_join->is_left;
     auto cut_any_col = any_array->cut(current_row, next_row - current_row);
     const auto * cut_any_array = typeid_cast<const ColumnArray *>(cut_any_col.get());
+
     for (size_t i = 0; i < num_columns; ++i)
     {
         ColumnWithTypeAndName current = block.safeGetByPosition(i);
-        current.column = current.column->cut(current_row, next_row - current_row);
+
+        /// Reuse cut_any_col if possible to avoid unnecessary cut.
+        if (!is_unaligned && !is_left && current.name == *columns.begin())
+            current.column = cut_any_col;
+        else
+            current.column = current.column->cut(current_row, next_row - current_row);
 
         if (columns.contains(current.name))
         {

From e5284c125f758adfe31fce74cb4eeaa5fc1c8d2a Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 24 Nov 2023 10:02:47 +0100
Subject: [PATCH 0993/1097] Update blob_storage_log.md

---
 docs/en/operations/system-tables/blob_storage_log.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/system-tables/blob_storage_log.md b/docs/en/operations/system-tables/blob_storage_log.md
index db08b0c583d..2328f7f0346 100644
--- a/docs/en/operations/system-tables/blob_storage_log.md
+++ b/docs/en/operations/system-tables/blob_storage_log.md
@@ -1,7 +1,7 @@
 ---
 slug: /en/operations/system-tables/blob_storage_log
 ---
-# Blob Storage Operations Log
+# blob_storage_log
 
 Contains logging entries with information about various blob storage operations such as uploads and deletes.
 

From d4290ade61c2298e810227f3fa0d8175916ea4c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 Nov 2023 12:20:46 +0100
Subject: [PATCH 0994/1097] Remove optimize_move_functions_out_of_any
 optimization

---
 .../Passes/MoveFunctionsOutOfAnyPass.cpp      | 134 ------------------
 .../Passes/MoveFunctionsOutOfAnyPass.h        |  27 ----
 src/Analyzer/QueryTreePassManager.cpp         |   2 -
 src/Core/Settings.h                           |   2 +-
 .../RewriteAnyFunctionVisitor.cpp             | 124 ----------------
 src/Interpreters/RewriteAnyFunctionVisitor.h  |  29 ----
 src/Interpreters/TreeOptimizer.cpp            |  11 --
 ...84_shard_distributed_group_by_no_merge.sql |   2 -
 ...1_aggregate_functions_of_group_by_keys.sql |   1 -
 .../01322_any_input_optimize.reference        |  32 -----
 .../0_stateless/01322_any_input_optimize.sql  |  34 -----
 .../01398_any_with_alias.reference            |   8 --
 .../0_stateless/01398_any_with_alias.sql      |   7 -
 .../01414_optimize_any_bug.reference          |   0
 .../0_stateless/01414_optimize_any_bug.sql    |  19 ---
 ...t_optimizations_over_distributed.reference |   2 -
 ...456_ast_optimizations_over_distributed.sql |   3 -
 .../0_stateless/01650_any_null_if.reference   |   1 -
 .../queries/0_stateless/01650_any_null_if.sql |   6 -
 ...3_analyzer_push_any_to_functions.reference | 124 ----------------
 .../02813_analyzer_push_any_to_functions.sql  |  33 -----
 21 files changed, 1 insertion(+), 600 deletions(-)
 delete mode 100644 src/Analyzer/Passes/MoveFunctionsOutOfAnyPass.cpp
 delete mode 100644 src/Analyzer/Passes/MoveFunctionsOutOfAnyPass.h
 delete mode 100644 src/Interpreters/RewriteAnyFunctionVisitor.cpp
 delete mode 100644 src/Interpreters/RewriteAnyFunctionVisitor.h
 delete mode 100644 tests/queries/0_stateless/01322_any_input_optimize.reference
 delete mode 100644 tests/queries/0_stateless/01322_any_input_optimize.sql
 delete mode 100644 tests/queries/0_stateless/01398_any_with_alias.reference
 delete mode 100644 tests/queries/0_stateless/01398_any_with_alias.sql
 delete mode 100644 tests/queries/0_stateless/01414_optimize_any_bug.reference
 delete mode 100644 tests/queries/0_stateless/01414_optimize_any_bug.sql
 delete mode 100644 tests/queries/0_stateless/01650_any_null_if.reference
 delete mode 100644 tests/queries/0_stateless/01650_any_null_if.sql
 delete mode 100644 tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
 delete mode 100644 tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql

diff --git a/src/Analyzer/Passes/MoveFunctionsOutOfAnyPass.cpp b/src/Analyzer/Passes/MoveFunctionsOutOfAnyPass.cpp
deleted file mode 100644
index 51edbcc6bd0..00000000000
--- a/src/Analyzer/Passes/MoveFunctionsOutOfAnyPass.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
-
-#include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <AggregateFunctions/IAggregateFunction.h>
-
-#include <Analyzer/InDepthQueryTreeVisitor.h>
-#include <Analyzer/FunctionNode.h>
-#include <Analyzer/LambdaNode.h>
-#include <Analyzer/ConstantNode.h>
-
-namespace DB
-{
-
-namespace
-{
-
-class AnyFunctionViMoveFunctionsOutOfAnyVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>
-{
-public:
-    using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>;
-    using Base::Base;
-
-    void enterImpl(QueryTreeNodePtr & node)
-    {
-        if (!getSettings().optimize_move_functions_out_of_any)
-            return;
-
-        auto * function_node = node->as<FunctionNode>();
-        if (!function_node)
-            return;
-
-        /// check function is any
-        const auto & function_name = function_node->getFunctionName();
-        if (function_name != "any" && function_name != "anyLast")
-            return;
-
-        auto & arguments = function_node->getArguments().getNodes();
-        if (arguments.size() != 1)
-            return;
-
-        auto * inside_function_node = arguments[0]->as<FunctionNode>();
-
-        /// check argument is a function
-        if (!inside_function_node)
-            return;
-
-        /// check arguments can not contain arrayJoin or lambda
-        if (!canRewrite(inside_function_node))
-            return;
-
-        auto & inside_function_node_arguments = inside_function_node->getArguments().getNodes();
-
-        /// case any(f())
-        if (inside_function_node_arguments.empty())
-            return;
-
-        auto it = node_to_rewritten_node.find(node.get());
-        if (it != node_to_rewritten_node.end())
-        {
-            node = it->second;
-            return;
-        }
-
-        /// checking done, rewrite function
-        bool changed_argument = false;
-        for (auto & inside_argument : inside_function_node_arguments)
-        {
-            if (inside_argument->as<ConstantNode>()) /// skip constant node
-                break;
-
-            AggregateFunctionProperties properties;
-            auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
-
-            auto any_function = std::make_shared<FunctionNode>(function_name);
-            any_function->resolveAsAggregateFunction(std::move(aggregate_function));
-
-            auto & any_function_arguments = any_function->getArguments().getNodes();
-            any_function_arguments.push_back(std::move(inside_argument));
-
-            inside_argument = std::move(any_function);
-            changed_argument = true;
-        }
-
-        if (changed_argument)
-        {
-            node_to_rewritten_node.emplace(node.get(), arguments[0]);
-            node = arguments[0];
-        }
-    }
-
-private:
-    bool canRewrite(const FunctionNode * function_node)
-    {
-        for (const auto & argument : function_node->getArguments().getNodes())
-        {
-            if (argument->as<LambdaNode>())
-                return false;
-
-            if (const auto * inside_function = argument->as<FunctionNode>())
-            {
-                /// Function arrayJoin is special and should be skipped (think about it as
-                /// an aggregate function), otherwise wrong result will be produced.
-                /// For example:
-                ///     SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
-                ///     ┌─number─┬─arrayJoin(array(array(), array()))─┐
-                ///     │      0 │ []                                 │
-                ///     │      0 │ []                                 │
-                ///     └────────┴────────────────────────────────────┘
-                if (inside_function->getFunctionName() == "arrayJoin")
-                    return false;
-
-                if (!canRewrite(inside_function))
-                    return false;
-            }
-        }
-
-        return true;
-    }
-
-    /// After query analysis, alias identifier will be resolved to node whose memory address is same with the original one.
-    /// So we can reuse the rewritten function.
-    std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr> node_to_rewritten_node;
-
-};
-
-}
-
-void MoveFunctionsOutOfAnyPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
-{
-    AnyFunctionViMoveFunctionsOutOfAnyVisitor visitor(context);
-    visitor.visit(query_tree_node);
-}
-
-}
diff --git a/src/Analyzer/Passes/MoveFunctionsOutOfAnyPass.h b/src/Analyzer/Passes/MoveFunctionsOutOfAnyPass.h
deleted file mode 100644
index 09a53f2b9e0..00000000000
--- a/src/Analyzer/Passes/MoveFunctionsOutOfAnyPass.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include <Analyzer/IQueryTreePass.h>
-
-namespace DB
-{
-
-/** Rewrite 'any' and 'anyLast' functions pushing them inside original function.
-  *
-  * Example: SELECT any(f(x, y, g(z)));
-  * Result: SELECT f(any(x), any(y), g(any(z)));
-  */
-class MoveFunctionsOutOfAnyPass final : public IQueryTreePass
-{
-public:
-    String getName() override { return "MoveFunctionsOutOfAnyPass"; }
-
-    String getDescription() override
-    {
-        return "Rewrite 'any' and 'anyLast' functions pushing them inside original function.";
-    }
-
-    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
-
-};
-
-}
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 254bd81c030..0ec29a2155b 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -44,7 +44,6 @@
 #include <Analyzer/Passes/CrossToInnerJoinPass.h>
 #include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
 #include <Analyzer/Passes/ConvertQueryToCNFPass.h>
-#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
 #include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
 
 
@@ -284,7 +283,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
     manager.addPass(std::make_unique<CrossToInnerJoinPass>());
     manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
 
-    manager.addPass(std::make_unique<MoveFunctionsOutOfAnyPass>());
     manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
 
 }
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 448656aa435..f450db3a5d8 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -554,7 +554,6 @@ class IColumn;
     M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
     M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \
     M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \
-    M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
     M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
     M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
     M(Bool, rewrite_count_distinct_if_with_count_distinct_implementation, false, "Rewrite countDistinctIf with count_distinct_implementation configuration", 0) \
@@ -886,6 +885,7 @@ class IColumn;
     MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
     MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
     MAKE_OBSOLETE(M, Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false) \
+    MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \
 
     /** The section above is for obsolete settings. Do not add anything there. */
 
diff --git a/src/Interpreters/RewriteAnyFunctionVisitor.cpp b/src/Interpreters/RewriteAnyFunctionVisitor.cpp
deleted file mode 100644
index 163e117f93d..00000000000
--- a/src/Interpreters/RewriteAnyFunctionVisitor.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <Common/typeid_cast.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTSubquery.h>
-#include <Interpreters/RewriteAnyFunctionVisitor.h>
-#include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <Parsers/ASTTablesInSelectQuery.h>
-
-namespace DB
-{
-
-namespace
-{
-
-bool extractIdentifiers(const ASTFunction & func, std::unordered_set<ASTPtr *> & identifiers)
-{
-    for (auto & arg : func.arguments->children)
-    {
-        if (const auto * arg_func = arg->as<ASTFunction>())
-        {
-            /// arrayJoin() is special and should not be optimized (think about
-            /// it as a an aggregate function), otherwise wrong result will be
-            /// produced:
-            ///     SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
-            ///     ┌─number─┬─arrayJoin(array(array(), array()))─┐
-            ///     │      0 │ []                                 │
-            ///     │      0 │ []                                 │
-            ///     └────────┴────────────────────────────────────┘
-            /// While should be:
-            ///     ┌─number─┬─any(arrayJoin(array(array(), array())))─┐
-            ///     │      0 │ []                                      │
-            ///     └────────┴─────────────────────────────────────────┘
-            if (arg_func->name == "arrayJoin")
-                return false;
-
-            if (arg_func->name == "lambda")
-                return false;
-
-            // We are looking for identifiers inside a function calculated inside
-            // the aggregate function `any()`. Window or aggregate function can't
-            // be inside `any`, but this check in GetAggregatesMatcher happens
-            // later, so we have to explicitly skip these nested functions here.
-            if (arg_func->is_window_function
-                || AggregateUtils::isAggregateFunction(*arg_func))
-            {
-                return false;
-            }
-
-            if (!extractIdentifiers(*arg_func, identifiers))
-                return false;
-        }
-        else if (arg->as<ASTIdentifier>())
-            identifiers.emplace(&arg);
-    }
-
-    return true;
-}
-
-}
-
-
-void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data)
-{
-    if (auto * func = ast->as<ASTFunction>())
-    {
-        if (func->is_window_function)
-            return;
-
-        visit(*func, ast, data);
-    }
-}
-
-void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
-{
-    if (!func.arguments || func.arguments->children.empty() || !func.arguments->children[0])
-        return;
-
-    if (func.name != "any" && func.name != "anyLast")
-        return;
-
-    auto & func_arguments = func.arguments->children;
-
-    if (func_arguments.size() != 1)
-        return;
-
-    const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
-    if (!first_arg_func || first_arg_func->arguments->children.empty())
-        return;
-
-    /// We have rewritten this function. Just unwrap its argument.
-    if (data.rewritten.contains(ast.get()))
-    {
-        func_arguments[0]->setAlias(func.alias);
-        ast = func_arguments[0];
-        return;
-    }
-
-    std::unordered_set<ASTPtr *> identifiers; /// implicit remove duplicates
-    if (!extractIdentifiers(func, identifiers))
-        return;
-
-    /// Wrap identifiers: any(f(x, y, g(z))) -> any(f(any(x), any(y), g(any(z))))
-    for (auto * ast_to_change : identifiers)
-    {
-        ASTPtr identifier_ast = *ast_to_change;
-        *ast_to_change = makeASTFunction(func.name);
-        (*ast_to_change)->as<ASTFunction>()->arguments->children.emplace_back(identifier_ast);
-    }
-
-    data.rewritten.insert(ast.get());
-
-    /// Unwrap function: any(f(any(x), any(y), g(any(z)))) -> f(any(x), any(y), g(any(z)))
-    func_arguments[0]->setAlias(func.alias);
-    ast = func_arguments[0];
-}
-
-bool RewriteAnyFunctionMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
-{
-    return !node->as<ASTSubquery>() &&
-        !node->as<ASTTableExpression>() &&
-        !node->as<ASTArrayJoin>();
-}
-
-}
diff --git a/src/Interpreters/RewriteAnyFunctionVisitor.h b/src/Interpreters/RewriteAnyFunctionVisitor.h
deleted file mode 100644
index d29af322711..00000000000
--- a/src/Interpreters/RewriteAnyFunctionVisitor.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-
-#include <unordered_set>
-
-#include <Parsers/IAST.h>
-#include <Interpreters/InDepthNodeVisitor.h>
-
-namespace DB
-{
-
-class ASTFunction;
-
-/// Rewrite 'any' and 'anyLast' functions pushing them inside original function.
-/// any(f(x, y, g(z))) -> f(any(x), any(y), g(any(z)))
-class RewriteAnyFunctionMatcher
-{
-public:
-    struct Data
-    {
-        std::unordered_set<IAST *> rewritten;
-    };
-
-    static void visit(ASTPtr & ast, Data & data);
-    static void visit(const ASTFunction &, ASTPtr & ast, Data & data);
-    static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
-};
-using RewriteAnyFunctionVisitor = InDepthNodeVisitor<RewriteAnyFunctionMatcher, false>;
-
-}
diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp
index 49d337267ae..729e2ed6007 100644
--- a/src/Interpreters/TreeOptimizer.cpp
+++ b/src/Interpreters/TreeOptimizer.cpp
@@ -11,7 +11,6 @@
 #include <Interpreters/DuplicateOrderByVisitor.h>
 #include <Interpreters/GroupByFunctionKeysVisitor.h>
 #include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
-#include <Interpreters/RewriteAnyFunctionVisitor.h>
 #include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
 #include <Interpreters/FunctionMaskingArgumentCheckVisitor.h>
 #include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
@@ -606,12 +605,6 @@ void optimizeAggregationFunctions(ASTPtr & query)
     ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
 }
 
-void optimizeAnyFunctions(ASTPtr & query)
-{
-    RewriteAnyFunctionVisitor::Data data = {};
-    RewriteAnyFunctionVisitor(data).visit(query);
-}
-
 void optimizeSumIfFunctions(ASTPtr & query)
 {
     RewriteSumIfFunctionVisitor::Data data = {};
@@ -764,10 +757,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
     if (settings.optimize_group_by_function_keys)
         optimizeGroupByFunctionKeys(select_query);
 
-    /// Move all operations out of any function
-    if (settings.optimize_move_functions_out_of_any)
-        optimizeAnyFunctions(query);
-
     if (settings.optimize_normalize_count_variants)
         optimizeCountConstantAndSumOne(query, context);
 
diff --git a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql
index 965ce45fb90..422f4a010f1 100644
--- a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql
+++ b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql
@@ -8,8 +8,6 @@ SELECT count(), uniq(dummy) FROM remote('127.0.0.{2,3}', system.one) LIMIT 1 SET
 SELECT 'distributed_group_by_no_merge=2';
 SET max_distributed_connections=1;
 SET max_threads=1;
--- breaks any(_shard_num)
-SET optimize_move_functions_out_of_any=0;
 
 SELECT 'LIMIT';
 SELECT * FROM (SELECT any(_shard_num) shard_num, count(), uniq(dummy) FROM remote('127.0.0.{2,3}', system.one)) ORDER BY shard_num LIMIT 1 SETTINGS distributed_group_by_no_merge=2;
diff --git a/tests/queries/0_stateless/01321_aggregate_functions_of_group_by_keys.sql b/tests/queries/0_stateless/01321_aggregate_functions_of_group_by_keys.sql
index 2937e856bf5..3f08936e636 100644
--- a/tests/queries/0_stateless/01321_aggregate_functions_of_group_by_keys.sql
+++ b/tests/queries/0_stateless/01321_aggregate_functions_of_group_by_keys.sql
@@ -1,5 +1,4 @@
 set optimize_aggregators_of_group_by_keys = 1;
-set optimize_move_functions_out_of_any = 0;
 
 SELECT min(number % 2) AS a, max(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b;
 SELECT any(number % 2) AS a, anyLast(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b;
diff --git a/tests/queries/0_stateless/01322_any_input_optimize.reference b/tests/queries/0_stateless/01322_any_input_optimize.reference
deleted file mode 100644
index f88f2f5937c..00000000000
--- a/tests/queries/0_stateless/01322_any_input_optimize.reference
+++ /dev/null
@@ -1,32 +0,0 @@
-SELECT any(number) + (any(number) * 2)
-FROM numbers(1, 2)
-3
-SELECT anyLast(number) + (anyLast(number) * 2)
-FROM numbers(1, 2)
-6
-WITH any(number) * 3 AS x
-SELECT x
-FROM numbers(1, 2)
-3
-SELECT
-    anyLast(number) * 3 AS x,
-    x
-FROM numbers(1, 2)
-6	6
-SELECT any(number + (number * 2))
-FROM numbers(1, 2)
-3
-SELECT anyLast(number + (number * 2))
-FROM numbers(1, 2)
-6
-WITH any(number * 3) AS x
-SELECT x
-FROM numbers(1, 2)
-3
-SELECT
-    anyLast(number * 3) AS x,
-    x
-FROM numbers(1, 2)
-6	6
-arrayJoin
-0	[]
diff --git a/tests/queries/0_stateless/01322_any_input_optimize.sql b/tests/queries/0_stateless/01322_any_input_optimize.sql
deleted file mode 100644
index 4c3345f4be4..00000000000
--- a/tests/queries/0_stateless/01322_any_input_optimize.sql
+++ /dev/null
@@ -1,34 +0,0 @@
-SET optimize_move_functions_out_of_any = 1;
-
-EXPLAIN SYNTAX SELECT any(number + number * 2) FROM numbers(1, 2);
-SELECT any(number + number * 2) FROM numbers(1, 2);
-
-EXPLAIN SYNTAX SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-
-EXPLAIN SYNTAX WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-
-EXPLAIN SYNTAX SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-
-SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
-
-SET optimize_move_functions_out_of_any = 0;
-
-EXPLAIN SYNTAX SELECT any(number + number * 2) FROM numbers(1, 2);
-SELECT any(number + number * 2) FROM numbers(1, 2);
-
-EXPLAIN SYNTAX SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-
-EXPLAIN SYNTAX WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-
-EXPLAIN SYNTAX SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-
-SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
-
-SELECT 'arrayJoin';
-SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number;
diff --git a/tests/queries/0_stateless/01398_any_with_alias.reference b/tests/queries/0_stateless/01398_any_with_alias.reference
deleted file mode 100644
index 4f8e72ef29c..00000000000
--- a/tests/queries/0_stateless/01398_any_with_alias.reference
+++ /dev/null
@@ -1,8 +0,0 @@
-"n"
-0
-SELECT any(number) * any(number) AS n
-FROM numbers(100)
-"n"
-0,0
-SELECT (any(number), any(number) * 2) AS n
-FROM numbers(100)
diff --git a/tests/queries/0_stateless/01398_any_with_alias.sql b/tests/queries/0_stateless/01398_any_with_alias.sql
deleted file mode 100644
index a65b8132c67..00000000000
--- a/tests/queries/0_stateless/01398_any_with_alias.sql
+++ /dev/null
@@ -1,7 +0,0 @@
-SET optimize_move_functions_out_of_any = 1;
-
-SELECT any(number * number) AS n FROM numbers(100) FORMAT CSVWithNames;
-EXPLAIN SYNTAX SELECT any(number * number) AS n FROM numbers(100);
-
-SELECT any((number, number * 2)) as n FROM numbers(100) FORMAT CSVWithNames;
-EXPLAIN SYNTAX SELECT any((number, number * 2)) as n FROM numbers(100);
diff --git a/tests/queries/0_stateless/01414_optimize_any_bug.reference b/tests/queries/0_stateless/01414_optimize_any_bug.reference
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/queries/0_stateless/01414_optimize_any_bug.sql b/tests/queries/0_stateless/01414_optimize_any_bug.sql
deleted file mode 100644
index ec24a09fc11..00000000000
--- a/tests/queries/0_stateless/01414_optimize_any_bug.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-DROP TABLE IF EXISTS test;
-
-CREATE TABLE test
-(
-    `Source.C1` Array(UInt64),
-    `Source.C2` Array(UInt64)
-)
-ENGINE = MergeTree()
-ORDER BY tuple();
-
-SET enable_positional_arguments=0;
-SET optimize_move_functions_out_of_any = 1;
-
-SELECT any(arrayFilter((c, d) -> (4 = d), `Source.C1`, `Source.C2`)[1]) AS x
-FROM test
-WHERE 0
-GROUP BY 42;
-
-DROP TABLE test;
diff --git a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.reference b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.reference
index 8c76b239991..1fb8df14afc 100644
--- a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.reference
+++ b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.reference
@@ -1,10 +1,8 @@
 1
 1
-1
 other
 google
 1
-1
 2
 other
 other
diff --git a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql
index 1e1d87a5ad5..91044859c1c 100644
--- a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql
+++ b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql
@@ -1,11 +1,9 @@
 -- Tags: distributed
 
-SET optimize_move_functions_out_of_any = 1;
 SET optimize_injective_functions_inside_uniq = 1;
 SET optimize_arithmetic_operations_in_aggregate_functions = 1;
 SET optimize_if_transform_strings_to_enum = 1;
 
-SELECT any(number + 1) FROM numbers(1);
 SELECT uniq(bitNot(number)) FROM numbers(1);
 SELECT sum(number + 1) FROM numbers(1);
 SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM numbers(1);
@@ -20,7 +18,6 @@ CREATE TABLE dist AS local_table ENGINE = Distributed(test_cluster_two_shards_lo
 
 INSERT INTO local_table SELECT number FROM numbers(1);
 
-SELECT any(number + 1) FROM dist;
 SELECT uniq(bitNot(number)) FROM dist;
 SELECT sum(number + 1) FROM dist;
 SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM dist;
diff --git a/tests/queries/0_stateless/01650_any_null_if.reference b/tests/queries/0_stateless/01650_any_null_if.reference
deleted file mode 100644
index e965047ad7c..00000000000
--- a/tests/queries/0_stateless/01650_any_null_if.reference
+++ /dev/null
@@ -1 +0,0 @@
-Hello
diff --git a/tests/queries/0_stateless/01650_any_null_if.sql b/tests/queries/0_stateless/01650_any_null_if.sql
deleted file mode 100644
index 17f57e92032..00000000000
--- a/tests/queries/0_stateless/01650_any_null_if.sql
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT any(nullIf(s, '')) FROM (SELECT arrayJoin(['', 'Hello']) AS s);
-
-SET optimize_move_functions_out_of_any = 0;
-EXPLAIN SYNTAX select any(nullIf('', ''), 'some text'); -- { serverError 42 }
-SET optimize_move_functions_out_of_any = 1;
-EXPLAIN SYNTAX select any(nullIf('', ''), 'some text'); -- { serverError 42 }
diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
deleted file mode 100644
index 025c04af1da..00000000000
--- a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
+++ /dev/null
@@ -1,124 +0,0 @@
--- { echoOn }
-SET optimize_move_functions_out_of_any = 1;
-EXPLAIN QUERY TREE SELECT any(number + number * 2) FROM numbers(1, 2);
-QUERY id: 0
-  PROJECTION COLUMNS
-    any(plus(number, multiply(number, 2))) UInt64
-  PROJECTION
-    LIST id: 1, nodes: 1
-      FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64
-        ARGUMENTS
-          LIST id: 3, nodes: 2
-            FUNCTION id: 4, function_name: any, function_type: aggregate, result_type: UInt64
-              ARGUMENTS
-                LIST id: 5, nodes: 1
-                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
-            FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64
-              ARGUMENTS
-                LIST id: 9, nodes: 2
-                  FUNCTION id: 10, function_name: any, function_type: aggregate, result_type: UInt64
-                    ARGUMENTS
-                      LIST id: 11, nodes: 1
-                        COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
-                  CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
-  JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
-      ARGUMENTS
-        LIST id: 13, nodes: 2
-          CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8
-          CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8
-SELECT any(number + number * 2) FROM numbers(1, 2);
-3
-EXPLAIN QUERY TREE SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-QUERY id: 0
-  PROJECTION COLUMNS
-    anyLast(plus(number, multiply(number, 2))) UInt64
-  PROJECTION
-    LIST id: 1, nodes: 1
-      FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64
-        ARGUMENTS
-          LIST id: 3, nodes: 2
-            FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
-              ARGUMENTS
-                LIST id: 5, nodes: 1
-                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
-            FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64
-              ARGUMENTS
-                LIST id: 9, nodes: 2
-                  FUNCTION id: 10, function_name: anyLast, function_type: aggregate, result_type: UInt64
-                    ARGUMENTS
-                      LIST id: 11, nodes: 1
-                        COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
-                  CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
-  JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
-      ARGUMENTS
-        LIST id: 13, nodes: 2
-          CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8
-          CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8
-SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-6
-EXPLAIN QUERY TREE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-QUERY id: 0
-  PROJECTION COLUMNS
-    x UInt64
-  PROJECTION
-    LIST id: 1, nodes: 1
-      FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
-        ARGUMENTS
-          LIST id: 3, nodes: 2
-            FUNCTION id: 4, function_name: any, function_type: aggregate, result_type: UInt64
-              ARGUMENTS
-                LIST id: 5, nodes: 1
-                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
-            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
-  JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
-      ARGUMENTS
-        LIST id: 9, nodes: 2
-          CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
-          CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
-WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-3
-EXPLAIN QUERY TREE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-QUERY id: 0
-  PROJECTION COLUMNS
-    x UInt64
-    x UInt64
-  PROJECTION
-    LIST id: 1, nodes: 2
-      FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
-        ARGUMENTS
-          LIST id: 3, nodes: 2
-            FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
-              ARGUMENTS
-                LIST id: 5, nodes: 1
-                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
-            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
-      FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
-        ARGUMENTS
-          LIST id: 3, nodes: 2
-            FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
-              ARGUMENTS
-                LIST id: 5, nodes: 1
-                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
-            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
-  JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
-      ARGUMENTS
-        LIST id: 9, nodes: 2
-          CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
-          CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
-SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-6	6
-SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
-SET optimize_move_functions_out_of_any = 0;
-SELECT any(number + number * 2) FROM numbers(1, 2);
-3
-SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-6
-WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-3
-SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-6	6
-SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql
deleted file mode 100644
index c9707d10fde..00000000000
--- a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql
+++ /dev/null
@@ -1,33 +0,0 @@
-SET allow_experimental_analyzer = 1;
-
--- { echoOn }
-SET optimize_move_functions_out_of_any = 1;
-
-EXPLAIN QUERY TREE SELECT any(number + number * 2) FROM numbers(1, 2);
-SELECT any(number + number * 2) FROM numbers(1, 2);
-
-EXPLAIN QUERY TREE SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-
-EXPLAIN QUERY TREE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-
-EXPLAIN QUERY TREE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-
-SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
-
-
-
-SET optimize_move_functions_out_of_any = 0;
-
-SELECT any(number + number * 2) FROM numbers(1, 2);
-
-SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-
-WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-
-SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-
-SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
--- { echoOff }

From b3d378abd54403f9df73838ff298fd3dd1991c70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 Nov 2023 12:50:29 +0100
Subject: [PATCH 0995/1097] Add test for #5323

---
 .../02923_cte_equality_disjunction.reference         |  1 +
 .../0_stateless/02923_cte_equality_disjunction.sql   | 12 ++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 tests/queries/0_stateless/02923_cte_equality_disjunction.reference
 create mode 100644 tests/queries/0_stateless/02923_cte_equality_disjunction.sql

diff --git a/tests/queries/0_stateless/02923_cte_equality_disjunction.reference b/tests/queries/0_stateless/02923_cte_equality_disjunction.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/02923_cte_equality_disjunction.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/02923_cte_equality_disjunction.sql b/tests/queries/0_stateless/02923_cte_equality_disjunction.sql
new file mode 100644
index 00000000000..288bed9e491
--- /dev/null
+++ b/tests/queries/0_stateless/02923_cte_equality_disjunction.sql
@@ -0,0 +1,12 @@
+--https://github.com/ClickHouse/ClickHouse/issues/5323
+CREATE TABLE test_bug_optimization
+(
+    `path` String
+)
+ENGINE = MergeTree
+ORDER BY path;
+
+WITH (path = 'test1') OR match(path, 'test2') OR (match(path, 'test3') AND match(path, 'test2')) OR match(path, 'test4') OR (path = 'test5') OR (path = 'test6') AS alias_in_error
+SELECT count(1)
+FROM test_bug_optimization
+WHERE alias_in_error;

From eb5016a8af5dee4bf40d9206b391e782c0c5964e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 Nov 2023 13:09:10 +0100
Subject: [PATCH 0996/1097] Fix 02888_obsolete_settings

---
 tests/queries/0_stateless/02888_obsolete_settings.reference | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02888_obsolete_settings.reference b/tests/queries/0_stateless/02888_obsolete_settings.reference
index 63553092c0c..378a5c7c389 100644
--- a/tests/queries/0_stateless/02888_obsolete_settings.reference
+++ b/tests/queries/0_stateless/02888_obsolete_settings.reference
@@ -40,6 +40,7 @@ multiple_joins_rewriter_version
 odbc_max_field_size
 optimize_duplicate_order_by_and_distinct
 optimize_fuse_sum_count_avg
+optimize_move_functions_out_of_any
 parallel_replicas_min_number_of_granules_to_enable
 partial_merge_join_optimizations
 query_cache_store_results_of_queries_with_nondeterministic_functions

From 683827c6df992c253c05f87db9e4b27eb31d3402 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Fri, 24 Nov 2023 14:01:52 +0100
Subject: [PATCH 0997/1097] Add comment.

---
 .../test_replicated_merge_tree_encryption_codec/test.py        | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/test.py b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
index 39eb4ee02eb..c8b63f9502a 100644
--- a/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
+++ b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
@@ -96,6 +96,9 @@ def test_different_keys():
     node2.query("INSERT INTO tbl VALUES (2, 'str2')")
     node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
 
+    # After "SYSTEM SYNC REPLICA" we expect node1 and node2 here both having a part for (1, 'str1') encrypted with "key_a",
+    # and a part for (2, 'str2') encrypted with "key_b".
+    # So the command "SELECT * from tbl" must fail on both nodes because each node has only one encryption key.
     assert "BAD_DECRYPT" in node1.query_and_get_error("SELECT * FROM tbl")
     assert "BAD_DECRYPT" in node2.query_and_get_error("SELECT * FROM tbl")
 

From d18828c826e84a1ecbde3d9aeb6d3f6436d3a1c8 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Fri, 24 Nov 2023 14:04:42 +0100
Subject: [PATCH 0998/1097] Revert "Mark select() as harmful function"

---
 base/harmful/harmful.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c
index 6526b7e577e..78796ca0c05 100644
--- a/base/harmful/harmful.c
+++ b/base/harmful/harmful.c
@@ -145,8 +145,6 @@ TRAP(qecvt)
 TRAP(qfcvt)
 TRAP(register_printf_function)
 TRAP(seed48)
-TRAP(select)
-TRAP(pselect)
 //TRAP(setenv)
 TRAP(setfsent)
 TRAP(setgrent)

From 740cc3c053c5281bc5f1a13222472aed9f09fd3d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 24 Nov 2023 12:32:02 +0100
Subject: [PATCH 0999/1097] Set log_comment to the file name while processing
 files in client

This will be useful for fuzzer, to know which file had been processed to
trigger the crash, since right now you need to find unique parts of the
query that had not been added by fuzzer to reproduce.

But I guess this will be useful not only for fuzzing, but for general
introspection as well.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Client/ClientBase.cpp                     |  8 ++++++++
 .../02930_client_file_log_comment.reference   |  4 ++++
 .../02930_client_file_log_comment.sh          | 19 +++++++++++++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 tests/queries/0_stateless/02930_client_file_log_comment.reference
 create mode 100755 tests/queries/0_stateless/02930_client_file_log_comment.sh

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index b513e623829..9607e989bed 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -2566,6 +2566,14 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
     ReadBufferFromFile in(file_name);
     readStringUntilEOF(queries_from_file, in);
 
+    if (!global_context->getSettings().log_comment.changed)
+    {
+        Settings settings = global_context->getSettings();
+        /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
+        settings.log_comment = fs::absolute(fs::path(file_name));
+        global_context->setSettings(settings);
+    }
+
     return executeMultiQuery(queries_from_file);
 }
 
diff --git a/tests/queries/0_stateless/02930_client_file_log_comment.reference b/tests/queries/0_stateless/02930_client_file_log_comment.reference
new file mode 100644
index 00000000000..09639302c0f
--- /dev/null
+++ b/tests/queries/0_stateless/02930_client_file_log_comment.reference
@@ -0,0 +1,4 @@
+42
+select 42\n	/dev/stdin
+4242
+select 4242\n	foo
diff --git a/tests/queries/0_stateless/02930_client_file_log_comment.sh b/tests/queries/0_stateless/02930_client_file_log_comment.sh
new file mode 100755
index 00000000000..c425f28ecbe
--- /dev/null
+++ b/tests/queries/0_stateless/02930_client_file_log_comment.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --queries-file /dev/stdin <<<'select 42'
+$CLICKHOUSE_CLIENT -nm -q "
+    system flush logs;
+    select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 42\n' and type != 'QueryStart';
+"
+
+$CLICKHOUSE_CLIENT --log_comment foo --queries-file /dev/stdin <<<'select 4242'
+$CLICKHOUSE_CLIENT -nm -q "
+    system flush logs;
+    select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 4242\n' and type != 'QueryStart';
+"

From 472027e948c0bbce4b24623a8444fa894ad8a930 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 24 Nov 2023 21:47:00 +0800
Subject: [PATCH 1000/1097] Fix JOIN plan with normal projection

---
 .../Optimizations/optimizeUseNormalProjection.cpp   |  4 ++--
 .../01710_normal_projection_join_plan_fix.reference |  0
 .../01710_normal_projection_join_plan_fix.sql       | 13 +++++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01710_normal_projection_join_plan_fix.reference
 create mode 100644 tests/queries/0_stateless/01710_normal_projection_join_plan_fix.sql

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
index 6880d21facb..e4b3e4f84ab 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
@@ -268,7 +268,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
     }
     else
     {
-        const auto & main_stream = iter->node->children.front()->step->getOutputStream();
+        const auto & main_stream = iter->node->children[iter->next_child - 1]->step->getOutputStream();
         const auto * proj_stream = &next_node->step->getOutputStream();
 
         if (auto materializing = makeMaterializingDAG(proj_stream->header, main_stream.header))
@@ -284,7 +284,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
         auto & union_node = nodes.emplace_back();
         DataStreams input_streams = {main_stream, *proj_stream};
         union_node.step = std::make_unique<UnionStep>(std::move(input_streams));
-        union_node.children = {iter->node->children.front(), next_node};
+        union_node.children = {iter->node->children[iter->next_child - 1], next_node};
         iter->node->children[iter->next_child - 1] = &union_node;
     }
 
diff --git a/tests/queries/0_stateless/01710_normal_projection_join_plan_fix.reference b/tests/queries/0_stateless/01710_normal_projection_join_plan_fix.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01710_normal_projection_join_plan_fix.sql b/tests/queries/0_stateless/01710_normal_projection_join_plan_fix.sql
new file mode 100644
index 00000000000..40847a301c2
--- /dev/null
+++ b/tests/queries/0_stateless/01710_normal_projection_join_plan_fix.sql
@@ -0,0 +1,13 @@
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+
+CREATE TABLE t1 (id UInt32, s String) Engine = MergeTree ORDER BY id;
+CREATE TABLE t2 (id1 UInt32, id2 UInt32) Engine = MergeTree ORDER BY id1 SETTINGS index_granularity = 1;
+INSERT INTO t2 SELECT number, number from numbers(100);
+ALTER TABLE t2 ADD PROJECTION proj (SELECT id2 ORDER BY id2);
+INSERT INTO t2 SELECT number, number from numbers(100);
+
+SELECT s FROM t1 as lhs LEFT JOIN (SELECT * FROM t2 WHERE id2 = 2) as rhs ON lhs.id = rhs.id2;
+
+DROP TABLE t1;
+DROP TABLE t2;

From 81da52bdf4e2e8b6d64da1d4ecc2df84f55d3034 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 23 Nov 2023 17:22:39 +0100
Subject: [PATCH 1001/1097] Fix 02903_rmt_retriable_merge_exception flakiness
 for replicated database

In case of replicated database system stop pulling replication log for
rmt2 should be done on all replicas, otherwise some replica may merge
the part and all other replicas may fetch it.

Also, since SYSTEM STOP PULLING REPLICATION LOG does not waits for the
current pull, let's trigger log pull explicitly to provide at least some
guarantee that replication log pulling had been stopped, otherwise race
is possible [1].

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/57155/f68717ccd0a07a499911c9b0db7537ae8205e41b/stateless_tests_flaky_check__asan_.html

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../02903_rmt_retriable_merge_exception.sh         | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh
index 074a3a6725e..095239954f4 100755
--- a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh
+++ b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh
@@ -10,7 +10,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # (i.e. "No active replica has part X or covering part")
 # does not appears as errors (level=Error), only as info message (level=Information).
 
-$CLICKHOUSE_CLIENT -nm -q "
+cluster=default
+if [[ $($CLICKHOUSE_CLIENT -q "select count()>0 from system.clusters where cluster = 'test_cluster_database_replicated'") = 1 ]]; then
+    cluster=test_cluster_database_replicated
+fi
+
+$CLICKHOUSE_CLIENT -nm --distributed_ddl_output_mode=none -q "
     drop table if exists rmt1;
     drop table if exists rmt2;
 
@@ -21,7 +26,12 @@ $CLICKHOUSE_CLIENT -nm -q "
     insert into rmt1 values (2);
 
     system sync replica rmt1;
-    system stop pulling replication log rmt2;
+    -- SYSTEM STOP PULLING REPLICATION LOG does not waits for the current pull,
+    -- trigger it explicitly to 'avoid race' (though proper way will be to wait
+    -- for current pull in the StorageReplicatedMergeTree::getActionLock())
+    system sync replica rmt2;
+    -- NOTE: CLICKHOUSE_DATABASE is required
+    system stop pulling replication log on cluster $cluster $CLICKHOUSE_DATABASE.rmt2;
     optimize table rmt1 final settings alter_sync=0, optimize_throw_if_noop=1;
 " || exit 1
 

From 106053dee8adb455522833eb73cdeaa7b317b712 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 24 Nov 2023 14:22:39 +0000
Subject: [PATCH 1002/1097] Add a test.

---
 .../0_stateless/02915_analyzer_fuzz_6.reference    |  0
 .../queries/0_stateless/02915_analyzer_fuzz_6.sql  | 14 ++++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 tests/queries/0_stateless/02915_analyzer_fuzz_6.reference
 create mode 100644 tests/queries/0_stateless/02915_analyzer_fuzz_6.sql

diff --git a/tests/queries/0_stateless/02915_analyzer_fuzz_6.reference b/tests/queries/0_stateless/02915_analyzer_fuzz_6.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02915_analyzer_fuzz_6.sql b/tests/queries/0_stateless/02915_analyzer_fuzz_6.sql
new file mode 100644
index 00000000000..9e54207c9ee
--- /dev/null
+++ b/tests/queries/0_stateless/02915_analyzer_fuzz_6.sql
@@ -0,0 +1,14 @@
+set allow_suspicious_low_cardinality_types=1;
+CREATE TABLE t__fuzz_307 (`k1` DateTime, `k2` LowCardinality(Nullable(Float64)), `v` Nullable(UInt32)) ENGINE =
+ ReplacingMergeTree ORDER BY (k1, k2) settings allow_nullable_key=1;
+ set allow_experimental_analyzer=1;
+ insert into t__fuzz_307 select * from generateRandom() limit 10;
+ SELECT arrayJoin([tuple([(toNullable(NULL), -9223372036854775808, toNullable(3.4028234663852886e38), arrayJoin(
+[tuple([(toNullable(NULL), 2147483647, toNullable(0.5), k2)])]), k2)])]) AS row, arrayJoin([(1024, k2)]), -9223372036854775807, 256, tupleElement(row, 1048576, 1024) AS k FROM t__fuzz_307 FINAL ORDER BY (toNullable('655.36'), 2, toNullable
+('0.2147483648'), k2) ASC, toNullable('102.3') DESC NULLS FIRST, '10.25' DESC, k ASC NULLS FIRST format Null;
+
+CREATE TABLE t__fuzz_282 (`k1` DateTime, `k2` LowCardinality(Nullable(Float64)), `v` Nullable(UInt32)) ENGINE = ReplacingMergeTree ORDER BY (k1, k2) SETTINGS allow_nullable_key = 1;
+SET optimize_on_insert = 0;
+INSERT INTO t__fuzz_282 VALUES (1, 2, 3) (1, 2, 4) (2, 3, 4), (2, 3, 5);
+
+SELECT arrayJoin([tuple([(toNullable(NULL), -9223372036854775808, toNullable(3.4028234663852886e38), arrayJoin([tuple([(toNullable(NULL), 2147483647, toNullable(0.5), k2)])]), k2)])]) AS row, arrayJoin([(1024, k2)]), -9223372036854775807, 256, tupleElement(row, 1048576, 1024) AS k FROM t__fuzz_282 FINAL ORDER BY (toNullable('655.36'), 2, toNullable('0.2147483648'), k2) ASC, toNullable('102.3') DESC NULLS FIRST, '10.25' DESC, k ASC NULLS FIRST;

From 75ceaf6200976eabd219ffa818df9f9186acac3f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 24 Nov 2023 14:31:39 +0000
Subject: [PATCH 1003/1097] Docs: Improve math function docs

---
 .../sql-reference/functions/math-functions.md | 166 +++++++++++++++++-
 1 file changed, 162 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md
index 9eab2274210..9edd35ce354 100644
--- a/docs/en/sql-reference/functions/math-functions.md
+++ b/docs/en/sql-reference/functions/math-functions.md
@@ -6,11 +6,9 @@ sidebar_label: Mathematical
 
 # Mathematical Functions
 
-All the functions return a Float64 number. Results are generally as close to the actual result as possible, but in some cases less precise than the machine-representable number.
-
 ## e
 
-Returns e.
+Returns e ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant))
 
 **Syntax**
 
@@ -18,15 +16,22 @@ Returns e.
 e()
 ```
 
+**Returned value**
+
+Type: [Float64](../../sql-reference/data-types/float.md).
+
 ## pi
 
-Returns π.
+Returns π ([Pi](https://en.wikipedia.org/wiki/Pi)).
 
 **Syntax**
 
 ```sql
 pi()
 ```
+**Returned value**
+
+Type: [Float64](../../sql-reference/data-types/float.md).
 
 ## exp
 
@@ -38,6 +43,14 @@ Returns e to the power of the given argument.
 exp(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## log
 
 Returns the natural logarithm of the argument.
@@ -50,6 +63,14 @@ log(x)
 
 Alias: `ln(x)`
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## exp2
 
 Returns 2 to the power of the given argument
@@ -60,6 +81,14 @@ Returns 2 to the power of the given argument
 exp2(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## intExp2
 
 Like `exp` but returns a UInt64.
@@ -80,6 +109,14 @@ Returns the binary logarithm of the argument.
 log2(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## exp10
 
 Returns 10 to the power of the given argument.
@@ -90,6 +127,14 @@ Returns 10 to the power of the given argument.
 exp10(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## intExp10
 
 Like `exp10` but returns a UInt64.
@@ -110,6 +155,14 @@ Returns the decimal logarithm of the argument.
 log10(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## sqrt
 
 Returns the square root of the argument.
@@ -118,6 +171,14 @@ Returns the square root of the argument.
 sqrt(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## cbrt
 
 Returns the cubic root of the argument.
@@ -126,6 +187,14 @@ Returns the cubic root of the argument.
 cbrt(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## erf
 
 If `x` is non-negative, then `erf(x / σ√2)` is the probability that a random variable having a normal distribution with standard deviation `σ` takes the value that is separated from the expected value by more than `x`.
@@ -136,6 +205,14 @@ If `x` is non-negative, then `erf(x / σ√2)` is the probability that a random
 erf(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 **Example**
 
 (three sigma rule)
@@ -160,6 +237,14 @@ Returns a number close to `1 - erf(x)` without loss of precision for large ‘x
 erfc(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## lgamma
 
 Returns the logarithm of the gamma function.
@@ -170,6 +255,14 @@ Returns the logarithm of the gamma function.
 lgamma(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## tgamma
 
 Returns the gamma function.
@@ -180,6 +273,14 @@ Returns the gamma function.
 gamma(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## sin
 
 Returns the sine of the argument
@@ -190,6 +291,14 @@ Returns the sine of the argument
 sin(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## cos
 
 Returns the cosine of the argument.
@@ -200,6 +309,14 @@ Returns the cosine of the argument.
 cos(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## tan
 
 Returns the tangent of the argument.
@@ -210,6 +327,14 @@ Returns the tangent of the argument.
 tan(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## asin
 
 Returns the arc sine of the argument.
@@ -220,6 +345,14 @@ Returns the arc sine of the argument.
 asin(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## acos
 
 Returns the arc cosine of the argument.
@@ -230,6 +363,14 @@ Returns the arc cosine of the argument.
 acos(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## atan
 
 Returns the arc tangent of the argument.
@@ -240,6 +381,14 @@ Returns the arc tangent of the argument.
 atan(x)
 ```
 
+**Arguments**
+
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+
+**Returned value**
+
+Type: [Float*](../../sql-reference/data-types/float.md).
+
 ## pow
 
 Returns `x` to the power of `y`.
@@ -252,6 +401,15 @@ pow(x, y)
 
 Alias: `power(x, y)`
 
+**Arguments**
+
+- `x` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md)
+- `y` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md)
+
+**Returned value**
+
+Type: [Float64](../../sql-reference/data-types/float.md).
+
 ## cosh
 
 Returns the [hyperbolic cosine](https://in.mathworks.com/help/matlab/ref/cosh.html) of the argument.

From ca63881357017a642e4ec37c551ea2823b896e3b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 24 Nov 2023 14:46:02 +0000
Subject: [PATCH 1004/1097] Mention that the settings are dev settings

---
 docs/en/operations/settings/settings.md | 60 +++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 93051177ca9..663572d91c8 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4809,6 +4809,10 @@ If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x
 
 Toggles query optimization at the query plan level.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable all optimizations at the query plan level
@@ -4822,6 +4826,10 @@ Limits the total number of optimizations applied to query plan, see setting [que
 Useful to avoid long optimization times for complex queries.
 If the actual number of optimizations exceeds this setting, an exception is thrown.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Type: [UInt64](../../sql-reference/data-types/int-uint.md).
 
 Default value: '10000'
@@ -4831,6 +4839,10 @@ Default value: '10000'
 Toggles a query-plan-level optimization which moves ARRAY JOINs up in the execution plan.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4843,6 +4855,10 @@ Default value: `1`.
 Toggles a query-plan-level optimization which moves LIMITs down in the execution plan.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4852,6 +4868,10 @@ Default value: `1`.
 
 ## query_plan_split_filter
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Toggles a query-plan-level optimization which splits filters into expressions.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
@@ -4867,6 +4887,10 @@ Default value: `1`.
 Toggles a query-plan-level optimization which merges consecutive filters.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4879,6 +4903,10 @@ Default value: `1`.
 Toggles a query-plan-level optimization which moves filters down in the execution plan.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4891,6 +4919,10 @@ Default value: `1`.
 Toggles a query-plan-level optimization which moves expressions after sorting steps.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4903,6 +4935,10 @@ Default value: `1`.
 Toggles a query-plan-level optimization which uses storage sorting when sorting for window functions.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4915,6 +4951,10 @@ Default value: `1`.
 Toggles a query-plan-level optimization which moves larger subtrees of the query plan into union to enable further optimizations.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4927,6 +4967,10 @@ Default value: `1`.
 Toggles the distinct in-order optimization query-plan-level optimization.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4939,6 +4983,10 @@ Default value: `1`.
 Toggles the read in-order optimization query-plan-level optimization.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4951,6 +4999,10 @@ Default value: `1`.
 Toggles the aggregation in-order query-plan-level optimization.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4963,6 +5015,10 @@ Default value: `0`.
 Toggles a query-plan-level optimization which removes redundant sorting steps, e.g. in subqueries.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable
@@ -4975,6 +5031,10 @@ Default value: `1`.
 Toggles a query-plan-level optimization which removes redundant DISTINCT steps.
 Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
 
+:::note
+This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
+:::
+
 Possible values:
 
 - 0 - Disable

From fc0f690b3ac82a8794b1ed43220a32353bf6341a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 24 Nov 2023 14:54:58 +0000
Subject: [PATCH 1005/1097] Simplify test.

---
 .../0_stateless/02915_analyzer_fuzz_6.reference       |  2 ++
 tests/queries/0_stateless/02915_analyzer_fuzz_6.sql   | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02915_analyzer_fuzz_6.reference b/tests/queries/0_stateless/02915_analyzer_fuzz_6.reference
index e69de29bb2d..b5c035d8576 100644
--- a/tests/queries/0_stateless/02915_analyzer_fuzz_6.reference
+++ b/tests/queries/0_stateless/02915_analyzer_fuzz_6.reference
@@ -0,0 +1,2 @@
+[(0,0)]
+[(1,1)]
diff --git a/tests/queries/0_stateless/02915_analyzer_fuzz_6.sql b/tests/queries/0_stateless/02915_analyzer_fuzz_6.sql
index 9e54207c9ee..b4eb1b4aff4 100644
--- a/tests/queries/0_stateless/02915_analyzer_fuzz_6.sql
+++ b/tests/queries/0_stateless/02915_analyzer_fuzz_6.sql
@@ -1,14 +1,19 @@
 set allow_suspicious_low_cardinality_types=1;
+set allow_experimental_analyzer=1;
+
+create table tab (x LowCardinality(Nullable(Float64))) engine = MergeTree order by x settings allow_nullable_key=1;
+insert into tab select number from numbers(2);
+SELECT [(arrayJoin([x]), x)] AS row FROM tab;
+
+
 CREATE TABLE t__fuzz_307 (`k1` DateTime, `k2` LowCardinality(Nullable(Float64)), `v` Nullable(UInt32)) ENGINE =
  ReplacingMergeTree ORDER BY (k1, k2) settings allow_nullable_key=1;
- set allow_experimental_analyzer=1;
  insert into t__fuzz_307 select * from generateRandom() limit 10;
  SELECT arrayJoin([tuple([(toNullable(NULL), -9223372036854775808, toNullable(3.4028234663852886e38), arrayJoin(
 [tuple([(toNullable(NULL), 2147483647, toNullable(0.5), k2)])]), k2)])]) AS row, arrayJoin([(1024, k2)]), -9223372036854775807, 256, tupleElement(row, 1048576, 1024) AS k FROM t__fuzz_307 FINAL ORDER BY (toNullable('655.36'), 2, toNullable
 ('0.2147483648'), k2) ASC, toNullable('102.3') DESC NULLS FIRST, '10.25' DESC, k ASC NULLS FIRST format Null;
 
 CREATE TABLE t__fuzz_282 (`k1` DateTime, `k2` LowCardinality(Nullable(Float64)), `v` Nullable(UInt32)) ENGINE = ReplacingMergeTree ORDER BY (k1, k2) SETTINGS allow_nullable_key = 1;
-SET optimize_on_insert = 0;
 INSERT INTO t__fuzz_282 VALUES (1, 2, 3) (1, 2, 4) (2, 3, 4), (2, 3, 5);
 
-SELECT arrayJoin([tuple([(toNullable(NULL), -9223372036854775808, toNullable(3.4028234663852886e38), arrayJoin([tuple([(toNullable(NULL), 2147483647, toNullable(0.5), k2)])]), k2)])]) AS row, arrayJoin([(1024, k2)]), -9223372036854775807, 256, tupleElement(row, 1048576, 1024) AS k FROM t__fuzz_282 FINAL ORDER BY (toNullable('655.36'), 2, toNullable('0.2147483648'), k2) ASC, toNullable('102.3') DESC NULLS FIRST, '10.25' DESC, k ASC NULLS FIRST;
+SELECT arrayJoin([tuple([(toNullable(NULL), -9223372036854775808, toNullable(3.4028234663852886e38), arrayJoin([tuple([(toNullable(NULL), 2147483647, toNullable(0.5), k2)])]), k2)])]) AS row, arrayJoin([(1024, k2)]), -9223372036854775807, 256, tupleElement(row, 1048576, 1024) AS k FROM t__fuzz_282 FINAL ORDER BY (toNullable('655.36'), 2, toNullable('0.2147483648'), k2) ASC, toNullable('102.3') DESC NULLS FIRST, '10.25' DESC, k ASC NULLS FIRST format Null;

From 0bf0e9b0864959b9cbfe202a6892ff7578392039 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 Nov 2023 16:00:29 +0100
Subject: [PATCH 1006/1097] Add test for #47366

---
 .../02923_join_use_nulls_modulo.reference     |  0
 .../02923_join_use_nulls_modulo.sql           | 22 +++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 tests/queries/0_stateless/02923_join_use_nulls_modulo.reference
 create mode 100644 tests/queries/0_stateless/02923_join_use_nulls_modulo.sql

diff --git a/tests/queries/0_stateless/02923_join_use_nulls_modulo.reference b/tests/queries/0_stateless/02923_join_use_nulls_modulo.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02923_join_use_nulls_modulo.sql b/tests/queries/0_stateless/02923_join_use_nulls_modulo.sql
new file mode 100644
index 00000000000..4134a42c599
--- /dev/null
+++ b/tests/queries/0_stateless/02923_join_use_nulls_modulo.sql
@@ -0,0 +1,22 @@
+--https://github.com/ClickHouse/ClickHouse/issues/47366
+SELECT
+    id % 255,
+    toTypeName(d.id)
+FROM
+(
+    SELECT
+        toLowCardinality(1048577) AS id,
+        toLowCardinality(9223372036854775807) AS value
+    GROUP BY
+        GROUPING SETS (
+        (toLowCardinality(1024)),
+        (id % 10.0001),
+        ((id % 2147483646) != -9223372036854775807),
+        ((id % -1) != 255))
+    ) AS a
+    SEMI LEFT JOIN
+(
+    SELECT toLowCardinality(9223372036854775807) AS id
+    WHERE (id % 2147483646) != NULL
+) AS d USING (id)
+SETTINGS join_use_nulls=1;

From ed4bd7b4e87ccce7fb8a94793e9ddf2514ab3779 Mon Sep 17 00:00:00 2001
From: Mike Kot <myrrc@yandex-team.ru>
Date: Fri, 24 Nov 2023 15:16:44 +0000
Subject: [PATCH 1007/1097] do not demangle stack frames from __functional

---
 src/Common/StackTrace.cpp | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index b323f1e4363..21235914f7c 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -291,9 +291,20 @@ void StackTrace::tryCapture()
 constexpr std::pair<std::string_view, std::string_view> replacements[]
     = {{"::__1", ""}, {"std::basic_string<char, std::char_traits<char>, std::allocator<char>>", "String"}};
 
-String collapseNames(String && haystack)
+// Demangle @c symbol_name if it's not from __functional header (as such functions don't provide any useful
+// information but pollute stack traces).
+// Replace parts from @c replacements with shorter aliases
+String demangleAndCollapseNames(std::string_view file, const char * const symbol_name)
 {
-    // TODO: surely there is a written version already for better in place search&replace
+    std::string_view file_copy = file;
+    if (auto trim_pos = file.find_last_of('/'); trim_pos != file.npos)
+        file_copy.remove_suffix(file.size() - trim_pos);
+    if (file_copy.ends_with("functional"))
+        return "?";
+
+    String haystack = demangle(symbol_name);
+
+    // TODO myrrc surely there is a written version already for better in place search&replace
     for (auto [needle, to] : replacements)
     {
         size_t pos = 0;
@@ -354,6 +365,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
         DB::WriteBufferFromOwnString out;
         out << i << ". ";
 
+        String file;
         if (std::error_code ec; object && std::filesystem::exists(object->name, ec) && !ec)
         {
             auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
@@ -361,11 +373,14 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
             DB::Dwarf::LocationInfo location;
 
             if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, mode, inline_frames))
-                out << location.file.toString() << ":" << location.line << ": ";
+            {
+                file = location.file.toString();
+                out << file << ":" << location.line << ": ";
+            }
         }
 
         if (const auto * const symbol = symbol_index.findSymbol(virtual_addr))
-            out << collapseNames(demangle(symbol->name));
+            out << demangleAndCollapseNames(file, symbol->name);
         else
             out << "?";
 
@@ -380,13 +395,14 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
         for (size_t j = 0; j < inline_frames.size(); ++j)
         {
             const auto & frame = inline_frames[j];
+            const String file_for_inline_frame = frame.location.file.toString();
             callback(fmt::format(
                 "{}.{}. inlined from {}:{}: {}",
                 i,
                 j + 1,
-                frame.location.file.toString(),
+                file_for_inline_frame,
                 frame.location.line,
-                collapseNames(demangle(frame.name))));
+                demangleAndCollapseNames(file_for_inline_frame, frame.name)));
         }
 
         callback(out.str());

From 22fcf5b99f677e805824187ad75751379341cced Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 Nov 2023 17:39:14 +0100
Subject: [PATCH 1008/1097] Add test for #51321

---
 .../0_stateless/02923_explain_expired_context.reference        | 0
 tests/queries/0_stateless/02923_explain_expired_context.sql    | 3 +++
 2 files changed, 3 insertions(+)
 create mode 100644 tests/queries/0_stateless/02923_explain_expired_context.reference
 create mode 100644 tests/queries/0_stateless/02923_explain_expired_context.sql

diff --git a/tests/queries/0_stateless/02923_explain_expired_context.reference b/tests/queries/0_stateless/02923_explain_expired_context.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02923_explain_expired_context.sql b/tests/queries/0_stateless/02923_explain_expired_context.sql
new file mode 100644
index 00000000000..fa365004cb0
--- /dev/null
+++ b/tests/queries/0_stateless/02923_explain_expired_context.sql
@@ -0,0 +1,3 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/51321
+EXPLAIN ESTIMATE  SELECT any(toTypeName(s)) FROM (SELECT 'bbbbbbbb', toTypeName(s), CAST('', 'LowCardinality(String)'), NULL, CAST('\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', 'String') AS s WITH TOTALS) AS t1 FULL OUTER JOIN (SELECT CAST('bbbbb\0\0bbb\0bb\0bb', 'LowCardinality(String)'), CAST(CAST('a', 'String'), 'LowCardinality(String)') AS s GROUP BY CoNnEcTiOn_Id()) AS t2 USING (s) WITH TOTALS;
+EXPLAIN ESTIMATE SELECT any(s) FROM (SELECT '' AS s) AS t1 JOIN (SELECT '' AS s GROUP BY connection_id()) AS t2 USING (s);

From 0eb20620cd99259ec3fe37f9e201392bb3880f2d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 24 Nov 2023 16:42:59 +0000
Subject: [PATCH 1009/1097] Fix type for arrayJoin(LC)

---
 src/Functions/array/arrayJoin.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Functions/array/arrayJoin.cpp b/src/Functions/array/arrayJoin.cpp
index cc854dc6807..264c65bc963 100644
--- a/src/Functions/array/arrayJoin.cpp
+++ b/src/Functions/array/arrayJoin.cpp
@@ -50,6 +50,7 @@ public:
     }
 
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
 
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {

From b6bd5ecb199ef8a10e3008a4ea3d96087db8a8c1 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 24 Nov 2023 17:46:25 +0100
Subject: [PATCH 1010/1097] Fix possible crash of fuzzy finder in client

Refs: https://github.com/lotabout/tuikit/pull/51
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 rust/skim/Cargo.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rust/skim/Cargo.toml b/rust/skim/Cargo.toml
index 0381ad81619..35bd0fa018b 100644
--- a/rust/skim/Cargo.toml
+++ b/rust/skim/Cargo.toml
@@ -23,3 +23,6 @@ debug = true
 inherits = "release"
 # We use LTO here as well to slightly decrease binary size
 lto = true
+
+[patch.crates-io]
+tuikit = { git = "https://github.com/azat-rust/tuikit.git", rev = "e1994c0e03ff02c49cf1471f0cc3cbf185ce0104" }

From c3b617bd7baf2a3f61aa883d440f72f39d568b8a Mon Sep 17 00:00:00 2001
From: Aleksandr Musorin <aleksandr.musorin@semrush.com>
Date: Fri, 24 Nov 2023 19:00:00 +0100
Subject: [PATCH 1011/1097] Move class functions upper

---
 .../RocksDB/StorageEmbeddedRocksDB.cpp        | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 7897bd1cba7..524d8e0b614 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -690,17 +690,6 @@ Chunk StorageEmbeddedRocksDB::getBySerializedKeys(
     return Chunk(std::move(columns), num_rows);
 }
 
-void registerStorageEmbeddedRocksDB(StorageFactory & factory)
-{
-    StorageFactory::StorageFeatures features{
-        .supports_sort_order = true,
-        .supports_ttl = true,
-        .supports_parallel_insert = true,
-    };
-
-    factory.registerStorage("EmbeddedRocksDB", create, features);
-}
-
 std::optional<UInt64> StorageEmbeddedRocksDB::totalRows(const Settings & settings) const
 {
     if (!settings.optimize_trivial_approximate_count_query)
@@ -725,4 +714,14 @@ std::optional<UInt64> StorageEmbeddedRocksDB::totalBytes(const Settings & /*sett
     return estimated_bytes;
 }
 
+void registerStorageEmbeddedRocksDB(StorageFactory & factory)
+{
+    StorageFactory::StorageFeatures features{
+        .supports_sort_order = true,
+        .supports_ttl = true,
+        .supports_parallel_insert = true,
+    };
+
+    factory.registerStorage("EmbeddedRocksDB", create, features);
+}
 }

From 57445d46cc0bc9cf4f2129fd2f7e545f56da452a Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 23 Nov 2023 22:24:53 +0100
Subject: [PATCH 1012/1097] Slightly improve the generator readbility

---
 utils/security-generator/generate_security.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/utils/security-generator/generate_security.py b/utils/security-generator/generate_security.py
index 83180ccce1c..ccf9a82067e 100755
--- a/utils/security-generator/generate_security.py
+++ b/utils/security-generator/generate_security.py
@@ -52,10 +52,7 @@ def generate_supported_versions() -> str:
     with open(VERSIONS_FILE, "r", encoding="utf-8") as fd:
         versions = [line.split(maxsplit=1)[0][1:] for line in fd.readlines()]
 
-    # The versions in VERSIONS_FILE are ordered ascending, so the first one is
-    # the greatest one. We may have supported versions in the previous year
-    greatest_year = int(versions[0].split(".", maxsplit=1)[0])
-    unsupported_year = greatest_year - 2
+    supported_year = 0  # set automatically when all supported versions are filled
     # 3 regular versions
     regular = []  # type: List[str]
     max_regular = 3
@@ -82,14 +79,12 @@ def generate_supported_versions() -> str:
             lts.append(version)
             to_append = f"| {version} | ✔️ |"
         if to_append:
-            if len(regular) == max_regular and len(lts) == max_lts:
-                # if we reached the max number of supported versions, the rest
-                # are unsopported, so year.* will be used
-                unsupported_year = min(greatest_year - 1, year)
+            if len(regular) == max_regular or len(lts) == max_lts:
+                supported_year = year
             table.append(to_append)
             continue
-        if year <= unsupported_year:
-            # The whole year is unsopported
+        if year < supported_year:
+            # The whole year is unsupported
             version = f"{year}.*"
         if not version in unsupported:
             unsupported.append(version)

From dce47b4af4f247288c09e98209f532efc9e2b95c Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 24 Nov 2023 20:14:32 +0100
Subject: [PATCH 1013/1097] Remove long-living remainings

---
 docker/server/.dockerignore | 8 --------
 1 file changed, 8 deletions(-)
 delete mode 100644 docker/server/.dockerignore

diff --git a/docker/server/.dockerignore b/docker/server/.dockerignore
deleted file mode 100644
index d360712c18f..00000000000
--- a/docker/server/.dockerignore
+++ /dev/null
@@ -1,8 +0,0 @@
-# post / preinstall scripts (not needed, we do it in Dockerfile)
-alpine-root/install/*
-
-# docs (looks useless)
-alpine-root/usr/share/doc/*
-
-# packages, etc. (used by alpine-build.sh)
-tgz-packages/*

From d0b4917fd33ba5ee096e3d0f9edaf589ac4a891a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 24 Nov 2023 20:28:18 +0100
Subject: [PATCH 1014/1097] Fix test_distributed_storage_configuration
 flakiness

It fails [1] due to extra reservation on disk2 for some system.*_log
tables, sure we can turn them off, but better to fix it explicitly with
a separate policy.

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/0/98cddf5312722e403dcea429639ac13dc6cada33/integration_tests__tsan__[2_6].html

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../configs/config.d/overrides.xml                            | 4 ++--
 .../test_distributed_storage_configuration/test.py            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_distributed_storage_configuration/configs/config.d/overrides.xml b/tests/integration/test_distributed_storage_configuration/configs/config.d/overrides.xml
index 91a22a81a22..e1e2444992a 100644
--- a/tests/integration/test_distributed_storage_configuration/configs/config.d/overrides.xml
+++ b/tests/integration/test_distributed_storage_configuration/configs/config.d/overrides.xml
@@ -27,14 +27,14 @@
         </disks>
 
         <policies>
-            <default>
+            <jbod_policy>
                 <volumes>
                     <main>
                         <disk>disk1</disk>
                         <disk>disk2</disk>
                     </main>
                 </volumes>
-            </default>
+            </jbod_policy>
         </policies>
     </storage_configuration>
 </clickhouse>
diff --git a/tests/integration/test_distributed_storage_configuration/test.py b/tests/integration/test_distributed_storage_configuration/test.py
index b0e17da37b2..00620668bd9 100644
--- a/tests/integration/test_distributed_storage_configuration/test.py
+++ b/tests/integration/test_distributed_storage_configuration/test.py
@@ -53,7 +53,7 @@ def test_insert(start_cluster):
         test,
         foo,
         key%2,
-        'default'
+        'jbod_policy'
     )
     """
     )

From bccd33932b5fe17ced2dc2f27813da0b1c034afa Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 24 Nov 2023 22:59:29 +0300
Subject: [PATCH 1015/1097] Update Cargo.toml

---
 rust/skim/Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rust/skim/Cargo.toml b/rust/skim/Cargo.toml
index 35bd0fa018b..22af40c3e33 100644
--- a/rust/skim/Cargo.toml
+++ b/rust/skim/Cargo.toml
@@ -25,4 +25,5 @@ inherits = "release"
 lto = true
 
 [patch.crates-io]
+# Ref: https://github.com/lotabout/tuikit/pull/51
 tuikit = { git = "https://github.com/azat-rust/tuikit.git", rev = "e1994c0e03ff02c49cf1471f0cc3cbf185ce0104" }

From 1b623b0e48251303881fd4e56e50021112cf0811 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Fri, 24 Nov 2023 20:03:02 +0000
Subject: [PATCH 1016/1097] Done

---
 .../settings.md                               |  2 ++
 .../settings.md                               |  2 ++
 src/Server/PrometheusMetricsWriter.cpp        | 23 ++++++++++++++++++-
 src/Server/PrometheusMetricsWriter.h          |  2 ++
 4 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 3e4f1f4313f..5957e150518 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -1840,6 +1840,7 @@ Settings:
 - `metrics` – Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
 - `events` – Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
 - `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
+- `errors` - Flag that sets to expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well.
 
 **Example**
 
@@ -1855,6 +1856,7 @@ Settings:
         <metrics>true</metrics>
         <events>true</events>
         <asynchronous_metrics>true</asynchronous_metrics>
+        <errors>true</errors>
     </prometheus>
     <!-- highlight-end -->
 </clickhouse>
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 99ea7894ef8..3139a4526c8 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -1216,6 +1216,7 @@ ClickHouse использует потоки из глобального пул
 -   `metrics` – флаг для экспорта текущих значений метрик из таблицы [system.metrics](../system-tables/metrics.md#system_tables-metrics).
 -   `events` – флаг для экспорта текущих значений метрик из таблицы [system.events](../system-tables/events.md#system_tables-events).
 -   `asynchronous_metrics` – флаг для экспорта текущих значений значения метрик из таблицы [system.asynchronous_metrics](../system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics).
+-   `errors` - флаг для экспорта количества ошибок (по кодам) случившихся с момента последнего рестарта сервера. Эта информация может быть получена из таблицы [system.errors](../system-tables/asynchronous_metrics.md#system_tables-errors)
 
 **Пример**
 
@@ -1226,6 +1227,7 @@ ClickHouse использует потоки из глобального пул
         <metrics>true</metrics>
         <events>true</events>
         <asynchronous_metrics>true</asynchronous_metrics>
+        <errors>true</errors>
     </prometheus>
 ```
 
diff --git a/src/Server/PrometheusMetricsWriter.cpp b/src/Server/PrometheusMetricsWriter.cpp
index a7d90b9985c..fa8b8f9c352 100644
--- a/src/Server/PrometheusMetricsWriter.cpp
+++ b/src/Server/PrometheusMetricsWriter.cpp
@@ -50,6 +50,7 @@ PrometheusMetricsWriter::PrometheusMetricsWriter(
     , send_events(config.getBool(config_name + ".events", true))
     , send_metrics(config.getBool(config_name + ".metrics", true))
     , send_asynchronous_metrics(config.getBool(config_name + ".asynchronous_metrics", true))
+    , send_errors(config.getBool(config_name + ".errors", true))
 {
 }
 
@@ -112,12 +113,32 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
             std::string metric_doc{value.documentation};
             convertHelpToSingleLine(metric_doc);
 
-            // TODO: add HELP section? asynchronous_metrics contains only key and value
             writeOutLine(wb, "# HELP", key, metric_doc);
             writeOutLine(wb, "# TYPE", key, "gauge");
             writeOutLine(wb, key, value.value);
         }
     }
+
+    if (send_errors)
+    {
+        for (size_t i = 0, end = ErrorCodes::end(); i < end; ++i)
+        {
+            const auto & error = ErrorCodes::values[i].get();
+            std::string_view name = ErrorCodes::getName(static_cast<ErrorCodes::ErrorCode>(i));
+
+            if (name.empty())
+                continue;
+
+            std::string key{error_metrics_prefix + toString(name)};
+            std::string help = fmt::format("The number of {} errors since last server restart", name);
+
+            writeOutLine(wb, "# HELP", key, help);
+            writeOutLine(wb, "# TYPE", key, "counter");
+            /// We are interested in errors which are happened only on this server.
+            writeOutLine(wb, key, error.local.count);
+        }
+    }
+
 }
 
 }
diff --git a/src/Server/PrometheusMetricsWriter.h b/src/Server/PrometheusMetricsWriter.h
index b05eeaf0a3a..b909a0ddcf6 100644
--- a/src/Server/PrometheusMetricsWriter.h
+++ b/src/Server/PrometheusMetricsWriter.h
@@ -27,10 +27,12 @@ private:
     const bool send_events;
     const bool send_metrics;
     const bool send_asynchronous_metrics;
+    const bool send_errors;
 
     static inline constexpr auto profile_events_prefix = "ClickHouseProfileEvents_";
     static inline constexpr auto current_metrics_prefix = "ClickHouseMetrics_";
     static inline constexpr auto asynchronous_metrics_prefix = "ClickHouseAsyncMetrics_";
+    static inline constexpr auto error_metrics_prefix = "ClickHouseErrorMetric_";
 };
 
 }

From bad410c6c4bc0222d4da301bd07cd0d89a1676c0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 24 Nov 2023 23:10:48 +0300
Subject: [PATCH 1017/1097] Update run-fuzzer.sh

---
 docker/test/fuzzer/run-fuzzer.sh | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index fd977b4b420..8aeb06ec27b 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -212,11 +212,11 @@ quit
 
     gdb -batch -command script.gdb -p $server_pid &
     sleep 5
-    # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
+    # gdb will send SIGSTOP, spend some time loading debug info, and then send SIGCONT, wait for it (up to send_timeout, 300s)
     time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
 
     # Check connectivity after we attach gdb, because it might cause the server
-    # to freeze and the fuzzer will fail. In debug build it can take a lot of time.
+    # to freeze, and the fuzzer will fail. In debug build, it can take a lot of time.
     for _ in {1..180}
     do
         if clickhouse-client --query "select 1"
@@ -226,15 +226,15 @@ quit
         sleep 1
     done
     kill -0 $server_pid # This checks that it is our server that is started and not some other one
-    echo 'Server started and responded'
+    echo 'Server started and responded.'
 
     setup_logs_replication
 
     # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
-    # SC2046: Quote this to prevent word splitting. Actually I need word splitting.
+    # SC2046: Quote this to prevent word splitting. Actually, I need word splitting.
     # shellcheck disable=SC2012,SC2046
     timeout -s TERM --preserve-status 30m clickhouse-client \
-        --max-memory-usage-for-client=1000000000 \
+        --max_memory_usage_in_client=1000000000 \
         --receive_timeout=10 \
         --receive_data_timeout_ms=10000 \
         --stacktrace \
@@ -254,10 +254,10 @@ quit
     wait "$fuzzer_pid" || fuzzer_exit_code=$?
     echo "Fuzzer exit code is $fuzzer_exit_code"
 
-    # If the server dies, most often the fuzzer returns code 210: connetion
+    # If the server dies, most often the fuzzer returns Code 210: Connetion
     # refused, and sometimes also code 32: attempt to read after eof. For
-    # simplicity, check again whether the server is accepting connections, using
-    # clickhouse-client. We don't check for existence of server process, because
+    # simplicity, check again whether the server is accepting connections using
+    # clickhouse-client. We don't check for the existence of the server process, because
     # the process is still present while the server is terminating and not
     # accepting the connections anymore.
 

From 13181db20e07039afb6de9f79b077815df2523a7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 24 Nov 2023 23:11:23 +0300
Subject: [PATCH 1018/1097] Update stress.py

---
 tests/ci/stress.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/stress.py b/tests/ci/stress.py
index 0a6b379e714..aec27af6381 100755
--- a/tests/ci/stress.py
+++ b/tests/ci/stress.py
@@ -125,7 +125,7 @@ def call_with_retry(query: str, timeout: int = 30, retry_count: int = 5) -> None
 def make_query_command(query: str) -> str:
     return (
         f'clickhouse client -q "{query}" --max_untracked_memory=1Gi '
-        "--memory_profiler_step=1Gi --max_memory_usage_for_user=0 --max-memory-usage-for-client=1000000000"
+        "--memory_profiler_step=1Gi --max_memory_usage_for_user=0 --max_memory_usage_in_client=1000000000"
     )
 
 
From 78fe2eef2dad0160ffa6be4fe27e0badb8545eb5 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Fri, 24 Nov 2023 20:51:18 +0000
Subject: [PATCH 1019/1097] Better test

---
 ..._segfault_nullable_materialized_update.reference |  5 +++--
 .../02919_segfault_nullable_materialized_update.sql | 13 ++++++++-----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.reference b/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.reference
index e9f3e9ff01d..a1ce6a27bb4 100644
--- a/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.reference
+++ b/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.reference
@@ -1,2 +1,3 @@
-1	1
-0	0
+0	0	false
+1	1	true
+0	0	false
diff --git a/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.sql b/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.sql
index 78a8ade99ab..f531ec0311d 100644
--- a/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.sql
+++ b/tests/queries/0_stateless/02919_segfault_nullable_materialized_update.sql
@@ -2,14 +2,17 @@ DROP TABLE IF EXISTS crash_02919;
 
 CREATE TABLE crash_02919 (
     b Int64,
-    c Int64 MATERIALIZED b
+    c Nullable(Int64) MATERIALIZED b,
+    d Nullable(Bool) MATERIALIZED b
 )
 ENGINE = MergeTree
 ORDER BY tuple();
 
-INSERT INTO crash_02919 VALUES (1);
-SELECT b, c FROM crash_02919;
-ALTER TABLE crash_02919 UPDATE b = 0 WHERE 1=1 SETTINGS mutations_sync = 1;
-SELECT b, c FROM crash_02919;
+INSERT INTO crash_02919 VALUES (0);
+SELECT b, c, d FROM crash_02919;
+ALTER TABLE crash_02919 UPDATE b = 1 WHERE 1=1 SETTINGS mutations_sync = 1;
+SELECT b, c, d FROM crash_02919;
+ALTER TABLE crash_02919 UPDATE b = 0.1 WHERE 1=1 SETTINGS mutations_sync = 1;
+SELECT b, c, d FROM crash_02919;
 
 DROP TABLE crash_02919;

From 143617e303a04121ff4cf9613974960165b92ec6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 25 Nov 2023 02:54:41 +0100
Subject: [PATCH 1020/1097] Remove garbage

---
 contrib/libunwind-cmake/CMakeLists.txt | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt
index 0d872bae5d1..8f3cd8bd07b 100644
--- a/contrib/libunwind-cmake/CMakeLists.txt
+++ b/contrib/libunwind-cmake/CMakeLists.txt
@@ -20,15 +20,7 @@ set(LIBUNWIND_ASM_SOURCES
     "${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S"
     "${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S")
 
-# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1]
-# Workaround these two issues by compiling as C.
-#
-#   [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771
-if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19)
-    set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
-else()
-    enable_language(ASM)
-endif()
+enable_language(ASM)
 
 set(LIBUNWIND_SOURCES
     ${LIBUNWIND_CXX_SOURCES}

From 5eb3cafb52508406469255244592bbd80b370707 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 25 Nov 2023 02:54:57 +0100
Subject: [PATCH 1021/1097] libunwind: fix slowness under Musl

---
 contrib/libunwind | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/libunwind b/contrib/libunwind
index 30cc1d3fd36..40d8eadf96b 160000
--- a/contrib/libunwind
+++ b/contrib/libunwind
@@ -1 +1 @@
-Subproject commit 30cc1d3fd3655a5cfa0ab112fe320fb9fc0a8344
+Subproject commit 40d8eadf96b127d9b22d53ce7a4fc52aaedea965

From 9f174b1c5e7e5d2e44633ce62d746706dc41d252 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 25 Nov 2023 15:17:48 +0000
Subject: [PATCH 1022/1097] Support use alias column in indices

---
 src/Storages/IndicesDescription.cpp           |  6 ++++
 src/Storages/ReplaceAliasToExprVisitor.cpp    | 31 +++++++++++++++++++
 src/Storages/ReplaceAliasToExprVisitor.h      | 27 ++++++++++++++++
 ..._support_alias_column_in_indices.reference | 24 ++++++++++++++
 .../02911_support_alias_column_in_indices.sql | 31 +++++++++++++++++++
 5 files changed, 119 insertions(+)
 create mode 100644 src/Storages/ReplaceAliasToExprVisitor.cpp
 create mode 100644 src/Storages/ReplaceAliasToExprVisitor.h
 create mode 100644 tests/queries/0_stateless/02911_support_alias_column_in_indices.reference
 create mode 100644 tests/queries/0_stateless/02911_support_alias_column_in_indices.sql

diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp
index 06518a52c61..ebe4636c517 100644
--- a/src/Storages/IndicesDescription.cpp
+++ b/src/Storages/IndicesDescription.cpp
@@ -10,6 +10,8 @@
 #include <Parsers/parseQuery.h>
 #include <Storages/extractKeyExpressionList.h>
 
+#include <Storages/ReplaceAliasToExprVisitor.h>
+
 #include <Core/Defines.h>
 #include "Common/Exception.h"
 
@@ -94,6 +96,10 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
     if (index_definition->expr)
     {
         expr_list = extractKeyExpressionList(index_definition->expr->clone());
+
+        ReplaceAliasToExprVisitor::Data data{columns};
+        ReplaceAliasToExprVisitor{data}.visit(expr_list);
+
         result.expression_list_ast = expr_list->clone();
     }
     else
diff --git a/src/Storages/ReplaceAliasToExprVisitor.cpp b/src/Storages/ReplaceAliasToExprVisitor.cpp
new file mode 100644
index 00000000000..8dafaa32cb3
--- /dev/null
+++ b/src/Storages/ReplaceAliasToExprVisitor.cpp
@@ -0,0 +1,31 @@
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Storages/ColumnsDescription.h>
+#include <Storages/ReplaceAliasToExprVisitor.h>
+#include <Common/typeid_cast.h>
+
+namespace DB
+{
+
+void ReplaceAliasToExprMatcher::visit(ASTPtr & ast, Data & data)
+{
+    if (auto * identifier = ast->as<ASTIdentifier>())
+    {
+        visit(*identifier, ast, data);
+    }
+}
+
+void ReplaceAliasToExprMatcher::visit(const ASTIdentifier & column, ASTPtr & ast, Data & data)
+{
+    const auto & column_name = column.name();
+    if (data.columns.hasAlias(column_name))
+    {
+        /// Alias expr is saved in default expr.
+        if (auto col_default = data.columns.getDefault(column_name))
+        {
+            ast = col_default->expression->clone();
+        }
+    }
+}
+
+}
diff --git a/src/Storages/ReplaceAliasToExprVisitor.h b/src/Storages/ReplaceAliasToExprVisitor.h
new file mode 100644
index 00000000000..509dc5ece6f
--- /dev/null
+++ b/src/Storages/ReplaceAliasToExprVisitor.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Interpreters/InDepthNodeVisitor.h>
+
+namespace DB
+{
+
+class ASTFunction;
+class ColumnsDescription;
+class ASTIdentifier;
+
+class ReplaceAliasToExprMatcher
+{
+public:
+    struct Data
+    {
+        const ColumnsDescription & columns;
+    };
+
+    static void visit(ASTPtr & ast, Data &);
+    static void visit(const ASTIdentifier &, ASTPtr & ast, Data &);
+    static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
+};
+
+using ReplaceAliasToExprVisitor = InDepthNodeVisitor<ReplaceAliasToExprMatcher, true>;
+}
diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference
new file mode 100644
index 00000000000..8d633a7431e
--- /dev/null
+++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference
@@ -0,0 +1,24 @@
+Expression ((Projection + Before ORDER BY))
+  Filter (WHERE)
+    ReadFromMergeTree (02911_support_alias_column_in_indices.test)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          x
+        Condition: (plus(x, 1) in [11, +Inf))
+        Parts: 1/2
+        Granules: 1/2
+      Skip
+        Name: i_y
+        Description: minmax GRANULARITY 1
+        Parts: 1/1
+        Granules: 1/1
+Expression ((Projection + Before ORDER BY))
+  Filter (WHERE)
+    ReadFromMergeTree (02911_support_alias_column_in_indices.test1)
+    Indexes:
+      Skip
+        Name: i_y
+        Description: minmax GRANULARITY 1
+        Parts: 2/2
+        Granules: 2/2
diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
new file mode 100644
index 00000000000..338d0923700
--- /dev/null
+++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
@@ -0,0 +1,31 @@
+-- Tags: no-parallel
+
+drop database if exists 02911_support_alias_column_in_indices;
+create database 02911_support_alias_column_in_indices;
+use 02911_support_alias_column_in_indices;
+
+CREATE TABLE test
+(
+    x UInt32,
+    y ALIAS x + 1,
+    INDEX i_y (y) TYPE minmax
+) ENGINE = MergeTree ORDER BY x;
+
+insert into test select * from numbers(10);
+insert into test select * from numbers(11, 20);
+
+CREATE TABLE test1
+(
+    x UInt32,
+    y1 ALIAS x + 1,
+    y2 ALIAS y1 + 1,
+    INDEX i_y (y2) TYPE minmax
+) ENGINE = MergeTree ORDER BY tuple();
+
+insert into test1 select * from numbers(10);
+insert into test1 select * from numbers(11, 20);
+
+explain indexes = 1 select * from test where y > 10;
+explain indexes = 1 select * from test1 where y2 > 10;
+
+drop database 02911_support_alias_column_in_indices;

From c2367016140e9035b21262a137b1e44228565543 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 25 Nov 2023 15:46:07 +0000
Subject: [PATCH 1023/1097] update test

---
 .../02911_support_alias_column_in_indices.reference           | 4 ++--
 .../0_stateless/02911_support_alias_column_in_indices.sql     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference
index 8d633a7431e..a28d1523c7e 100644
--- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference
+++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference
@@ -20,5 +20,5 @@ Expression ((Projection + Before ORDER BY))
       Skip
         Name: i_y
         Description: minmax GRANULARITY 1
-        Parts: 2/2
-        Granules: 2/2
+        Parts: 1/2
+        Granules: 1/2
diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
index 338d0923700..eba960e02a5 100644
--- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
+++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
@@ -26,6 +26,6 @@ insert into test1 select * from numbers(10);
 insert into test1 select * from numbers(11, 20);
 
 explain indexes = 1 select * from test where y > 10;
-explain indexes = 1 select * from test1 where y2 > 10;
+explain indexes = 1 select * from test1 where y2 > 15;
 
 drop database 02911_support_alias_column_in_indices;

From 2222d8cbf20209874a84383426d6e52ea8e65857 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 25 Nov 2023 18:47:21 +0100
Subject: [PATCH 1024/1097] Update Sentry

---
 contrib/sentry-native | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/sentry-native b/contrib/sentry-native
index ae10fb8c224..d1f0883e020 160000
--- a/contrib/sentry-native
+++ b/contrib/sentry-native
@@ -1 +1 @@
-Subproject commit ae10fb8c224c3f41571446e1ed7fd57b9e5e366b
+Subproject commit d1f0883e020f5c1b182c2b36ea26ea9c6fa64e39

From 1e00048cf7aea3f1123d34443968195f6fee21d8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 25 Nov 2023 19:11:24 +0100
Subject: [PATCH 1025/1097] Update Sentry

---
 contrib/sentry-native-cmake/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/contrib/sentry-native-cmake/CMakeLists.txt b/contrib/sentry-native-cmake/CMakeLists.txt
index 377f955f856..6364e75db28 100644
--- a/contrib/sentry-native-cmake/CMakeLists.txt
+++ b/contrib/sentry-native-cmake/CMakeLists.txt
@@ -13,6 +13,7 @@ set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native")
 
 set (SRCS
     ${SRC_DIR}/vendor/mpack.c
+    ${SRC_DIR}/vendor/stb_sprintf.c
     ${SRC_DIR}/src/sentry_alloc.c
     ${SRC_DIR}/src/sentry_backend.c
     ${SRC_DIR}/src/sentry_core.c
@@ -21,6 +22,7 @@ set (SRCS
     ${SRC_DIR}/src/sentry_json.c
     ${SRC_DIR}/src/sentry_logger.c
     ${SRC_DIR}/src/sentry_options.c
+    ${SRC_DIR}/src/sentry_os.c
     ${SRC_DIR}/src/sentry_random.c
     ${SRC_DIR}/src/sentry_ratelimiter.c
     ${SRC_DIR}/src/sentry_scope.c
@@ -29,6 +31,7 @@ set (SRCS
     ${SRC_DIR}/src/sentry_string.c
     ${SRC_DIR}/src/sentry_sync.c
     ${SRC_DIR}/src/sentry_transport.c
+    ${SRC_DIR}/src/sentry_tracing.c
     ${SRC_DIR}/src/sentry_utils.c
     ${SRC_DIR}/src/sentry_uuid.c
     ${SRC_DIR}/src/sentry_value.c

From e58d2ae5d62b2d955081c5d3f55ed4e63ce61969 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 25 Nov 2023 18:36:28 +0000
Subject: [PATCH 1026/1097] Update version_date.tsv and changelogs after
 v23.10.5.20-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.10.5.20-stable.md | 28 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  3 +++
 5 files changed, 34 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.10.5.20-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 63de9f6c462..b174dfde675 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index d26bb344fef..d4498abda6a 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 53a36818121..08e95cd535b 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.10.5.20-stable.md b/docs/changelogs/v23.10.5.20-stable.md
new file mode 100644
index 00000000000..03e8c47481b
--- /dev/null
+++ b/docs/changelogs/v23.10.5.20-stable.md
@@ -0,0 +1,28 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.10.5.20-stable (e84001e5c61) FIXME as compared to v23.10.4.25-stable (330fd687d41)
+
+#### Improvement
+* Backported in [#56924](https://github.com/ClickHouse/ClickHouse/issues/56924): There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57023](https://github.com/ClickHouse/ClickHouse/issues/57023): There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix client suggestions for user without grants [#56234](https://github.com/ClickHouse/ClickHouse/pull/56234) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 014ee5e9a17..fed19917fba 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,12 +1,15 @@
+v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
 v23.10.2.13-stable	2023-11-08
 v23.10.1.1976-stable	2023-11-02
+v23.9.6.20-stable	2023-11-25
 v23.9.5.29-stable	2023-11-17
 v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
 v23.8.5.16-lts	2023-10-31

From 2bb12386dd239686edd6faaee3ef8fcfceff2c26 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 25 Nov 2023 18:37:17 +0000
Subject: [PATCH 1027/1097] Update version_date.tsv and changelogs after
 v23.9.6.20-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.9.6.20-stable.md | 28 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  4 ++++
 5 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.9.6.20-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 63de9f6c462..b174dfde675 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index d26bb344fef..d4498abda6a 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 53a36818121..08e95cd535b 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.9.6.20-stable.md b/docs/changelogs/v23.9.6.20-stable.md
new file mode 100644
index 00000000000..b4aed625fea
--- /dev/null
+++ b/docs/changelogs/v23.9.6.20-stable.md
@@ -0,0 +1,28 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.9.6.20-stable (cf7e84bb8cf) FIXME as compared to v23.9.5.29-stable (f8554c1a1ff)
+
+#### Improvement
+* Backported in [#56930](https://github.com/ClickHouse/ClickHouse/issues/56930): There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57022](https://github.com/ClickHouse/ClickHouse/issues/57022): There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix client suggestions for user without grants [#56234](https://github.com/ClickHouse/ClickHouse/pull/56234) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 014ee5e9a17..ebe138d597a 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,12 +1,15 @@
+v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
 v23.10.2.13-stable	2023-11-08
 v23.10.1.1976-stable	2023-11-02
+v23.9.6.20-stable	2023-11-25
 v23.9.5.29-stable	2023-11-17
 v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
 v23.8.5.16-lts	2023-10-31
@@ -34,6 +37,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.18.15-lts	2023-11-25
 v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08
 v23.3.15.29-lts	2023-10-31

From f78612f37eec9d45431cf7d1cf79ff3ebd907dcc Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 25 Nov 2023 18:40:50 +0000
Subject: [PATCH 1028/1097] Update version_date.tsv and changelogs after
 v23.3.18.15-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.3.18.15-lts.md   | 26 ++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  4 ++++
 5 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.3.18.15-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 63de9f6c462..b174dfde675 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index d26bb344fef..d4498abda6a 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 53a36818121..08e95cd535b 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.3.18.15-lts.md b/docs/changelogs/v23.3.18.15-lts.md
new file mode 100644
index 00000000000..3bf993a0960
--- /dev/null
+++ b/docs/changelogs/v23.3.18.15-lts.md
@@ -0,0 +1,26 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.3.18.15-lts (7228475d77a) FIXME as compared to v23.3.17.13-lts (e867d59020f)
+
+#### Improvement
+* Backported in [#56928](https://github.com/ClickHouse/ClickHouse/issues/56928): There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57019](https://github.com/ClickHouse/ClickHouse/issues/57019): There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 014ee5e9a17..ebe138d597a 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,12 +1,15 @@
+v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
 v23.10.2.13-stable	2023-11-08
 v23.10.1.1976-stable	2023-11-02
+v23.9.6.20-stable	2023-11-25
 v23.9.5.29-stable	2023-11-17
 v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
 v23.8.5.16-lts	2023-10-31
@@ -34,6 +37,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.18.15-lts	2023-11-25
 v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08
 v23.3.15.29-lts	2023-10-31

From 2e7c16e13893db29e0d69eadfea89174bd640a03 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sat, 25 Nov 2023 19:47:32 +0000
Subject: [PATCH 1029/1097] Fix broken links

---
 .../sql-reference/functions/math-functions.md | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md
index 9edd35ce354..b27668caf0c 100644
--- a/docs/en/sql-reference/functions/math-functions.md
+++ b/docs/en/sql-reference/functions/math-functions.md
@@ -45,7 +45,7 @@ exp(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -65,7 +65,7 @@ Alias: `ln(x)`
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -83,7 +83,7 @@ exp2(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -111,7 +111,7 @@ log2(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -129,7 +129,7 @@ exp10(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -157,7 +157,7 @@ log10(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -173,7 +173,7 @@ sqrt(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -189,7 +189,7 @@ cbrt(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -207,7 +207,7 @@ erf(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -239,7 +239,7 @@ erfc(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -257,7 +257,7 @@ lgamma(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -275,7 +275,7 @@ gamma(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -293,7 +293,7 @@ sin(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -311,7 +311,7 @@ cos(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -329,7 +329,7 @@ tan(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -347,7 +347,7 @@ asin(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -365,7 +365,7 @@ acos(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 
@@ -383,7 +383,7 @@ atan(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-type/decimal.md)
+- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
 
 **Returned value**
 

From ff34726e0c1f3a72a5b70619a88a1a666407b65c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 25 Nov 2023 19:48:29 +0000
Subject: [PATCH 1030/1097] Update version_date.tsv and changelogs after
 v23.8.8.20-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.8.8.20-lts.md    | 28 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  4 ++++
 5 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.8.8.20-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 63de9f6c462..b174dfde675 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index d26bb344fef..d4498abda6a 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 53a36818121..08e95cd535b 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.10.4.25"
+ARG VERSION="23.10.5.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.8.8.20-lts.md b/docs/changelogs/v23.8.8.20-lts.md
new file mode 100644
index 00000000000..345cfcccf17
--- /dev/null
+++ b/docs/changelogs/v23.8.8.20-lts.md
@@ -0,0 +1,28 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.8.8.20-lts (5e012a03bf2) FIXME as compared to v23.8.7.24-lts (812b95e14ba)
+
+#### Improvement
+* Backported in [#56509](https://github.com/ClickHouse/ClickHouse/issues/56509): Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#56929](https://github.com/ClickHouse/ClickHouse/issues/56929): There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57020](https://github.com/ClickHouse/ClickHouse/issues/57020): There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 014ee5e9a17..ebe138d597a 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,12 +1,15 @@
+v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
 v23.10.2.13-stable	2023-11-08
 v23.10.1.1976-stable	2023-11-02
+v23.9.6.20-stable	2023-11-25
 v23.9.5.29-stable	2023-11-17
 v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
 v23.8.5.16-lts	2023-10-31
@@ -34,6 +37,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.18.15-lts	2023-11-25
 v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08
 v23.3.15.29-lts	2023-10-31

From ced0bbd932059f81fda6c6dc0a8aa44a8dd8046e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 25 Nov 2023 22:00:56 +0100
Subject: [PATCH 1031/1097] Change cursor style for overwrite mode (INS) to
 blinking in client

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Client/ReplxxLineReader.cpp | 11 +++++++++++
 src/Client/ReplxxLineReader.h   |  1 +
 2 files changed, 12 insertions(+)

diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp
index 681d06ce583..8ebbbf9ce94 100644
--- a/src/Client/ReplxxLineReader.cpp
+++ b/src/Client/ReplxxLineReader.cpp
@@ -447,6 +447,17 @@ ReplxxLineReader::ReplxxLineReader(
         uint32_t reverse_search = Replxx::KEY::control('R');
         return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search);
     });
+
+    /// Change cursor style for overwrite mode to blinking (see console_codes(5))
+    rx.bind_key(Replxx::KEY::INSERT, [this](char32_t)
+    {
+        overwrite_mode = !overwrite_mode;
+        if (overwrite_mode)
+            rx.print("%s", "\033[5 q");
+        else
+            rx.print("%s", "\033[0 q");
+        return rx.invoke(Replxx::ACTION::TOGGLE_OVERWRITE_MODE, 0);
+    });
 }
 
 ReplxxLineReader::~ReplxxLineReader()
diff --git a/src/Client/ReplxxLineReader.h b/src/Client/ReplxxLineReader.h
index 9cc2853f77a..6ad149e38f2 100644
--- a/src/Client/ReplxxLineReader.h
+++ b/src/Client/ReplxxLineReader.h
@@ -41,6 +41,7 @@ private:
     bool bracketed_paste_enabled = false;
 
     std::string editor;
+    bool overwrite_mode = false;
 };
 
 }

From 92e580997c5fbd436f829de62d7073504380986a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 26 Nov 2023 00:57:12 +0300
Subject: [PATCH 1032/1097] Update IDataType.h

---
 src/DataTypes/IDataType.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index 81524c5a697..bfc6ff3d0ee 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -177,9 +177,9 @@ public:
     /// Checks that two instances belong to the same type
     virtual bool equals(const IDataType & rhs) const = 0;
 
-    /** Checks that two types is exactly identical. Note that two types may equals but not identical.
+    /** Checks that two types are exactly identical. Note that two types may be equal but not identical.
       * For example, `SimpleAggregateFunction(max, String)` and `SimpleAggregateFunction(anyLast, String)`
-      * This is used when creating replicated table. Column types in different replicas must be identical
+      * This is used when creating a replicated table. Column types in different replicas must be identical
       */
     virtual bool identical(const IDataType & rhs) const;
 

From e074629749857da0a41908be6b54b252334c7e79 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sat, 25 Nov 2023 22:04:49 +0000
Subject: [PATCH 1033/1097] Docs: Mention alias 'database' for 'name' in
 system.databases

---
 docs/en/operations/system-tables/databases.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md
index f3d3d388c36..e3b0ded96e8 100644
--- a/docs/en/operations/system-tables/databases.md
+++ b/docs/en/operations/system-tables/databases.md
@@ -14,6 +14,7 @@ Columns:
 - `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Database UUID.
 - `comment` ([String](../../sql-reference/data-types/enum.md)) — Database comment.
 - `engine_full` ([String](../../sql-reference/data-types/enum.md)) — Parameters of the database engine.
+- `database` ([String](../../sql-reference/data-types/string.md)) – Alias for `name`.
 
 The `name` column from this system table is used for implementing the `SHOW DATABASES` query.
 

From e60941f7c53dda4a4b3fa4bdda82495dae10b515 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 26 Nov 2023 04:30:05 +0300
Subject: [PATCH 1034/1097] Revert "Update Sentry"

---
 contrib/sentry-native                      | 2 +-
 contrib/sentry-native-cmake/CMakeLists.txt | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/contrib/sentry-native b/contrib/sentry-native
index d1f0883e020..ae10fb8c224 160000
--- a/contrib/sentry-native
+++ b/contrib/sentry-native
@@ -1 +1 @@
-Subproject commit d1f0883e020f5c1b182c2b36ea26ea9c6fa64e39
+Subproject commit ae10fb8c224c3f41571446e1ed7fd57b9e5e366b
diff --git a/contrib/sentry-native-cmake/CMakeLists.txt b/contrib/sentry-native-cmake/CMakeLists.txt
index 6364e75db28..377f955f856 100644
--- a/contrib/sentry-native-cmake/CMakeLists.txt
+++ b/contrib/sentry-native-cmake/CMakeLists.txt
@@ -13,7 +13,6 @@ set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native")
 
 set (SRCS
     ${SRC_DIR}/vendor/mpack.c
-    ${SRC_DIR}/vendor/stb_sprintf.c
     ${SRC_DIR}/src/sentry_alloc.c
     ${SRC_DIR}/src/sentry_backend.c
     ${SRC_DIR}/src/sentry_core.c
@@ -22,7 +21,6 @@ set (SRCS
     ${SRC_DIR}/src/sentry_json.c
     ${SRC_DIR}/src/sentry_logger.c
     ${SRC_DIR}/src/sentry_options.c
-    ${SRC_DIR}/src/sentry_os.c
     ${SRC_DIR}/src/sentry_random.c
     ${SRC_DIR}/src/sentry_ratelimiter.c
     ${SRC_DIR}/src/sentry_scope.c
@@ -31,7 +29,6 @@ set (SRCS
     ${SRC_DIR}/src/sentry_string.c
     ${SRC_DIR}/src/sentry_sync.c
     ${SRC_DIR}/src/sentry_transport.c
-    ${SRC_DIR}/src/sentry_tracing.c
     ${SRC_DIR}/src/sentry_utils.c
     ${SRC_DIR}/src/sentry_uuid.c
     ${SRC_DIR}/src/sentry_value.c

From b92c416ced49dc6b0657ac5502ff8e698f3f44ce Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 26 Nov 2023 02:34:18 +0100
Subject: [PATCH 1035/1097] Remove test `01280_ttl_where_group_by`

---
 .../01280_ttl_where_group_by.reference        |  26 ----
 .../0_stateless/01280_ttl_where_group_by.sh   | 120 ------------------
 2 files changed, 146 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01280_ttl_where_group_by.reference
 delete mode 100755 tests/queries/0_stateless/01280_ttl_where_group_by.sh

diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.reference b/tests/queries/0_stateless/01280_ttl_where_group_by.reference
deleted file mode 100644
index 65e7e5b158f..00000000000
--- a/tests/queries/0_stateless/01280_ttl_where_group_by.reference
+++ /dev/null
@@ -1,26 +0,0 @@
-ttl_01280_1
-1	1	0	4
-1	2	3	7
-1	3	0	5
-2	1	0	1
-2	1	20	1
-ttl_01280_2
-1	1	[0,2,3]	4
-1	1	[5,4,1]	13
-1	3	[1,0,1,0]	17
-2	1	[3,1,0,3]	8
-3	1	[2,4,5]	8
-ttl_01280_3
-1	1	0	4
-1	1	10	6
-2	1	0	3
-3	1	8	2
-ttl_01280_4
-0	4
-13	9
-ttl_01280_5
-1	2	7	5
-2	3	6	5
-ttl_01280_6
-1	3	5
-2	3	5
diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.sh b/tests/queries/0_stateless/01280_ttl_where_group_by.sh
deleted file mode 100755
index e6f83d6edd1..00000000000
--- a/tests/queries/0_stateless/01280_ttl_where_group_by.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-parallel, no-fasttest
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-$CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_1"
-
-function optimize()
-{
-    for _ in {0..20}; do
-        $CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE $1 FINAL SETTINGS optimize_throw_if_noop=1" 2>/dev/null && break
-        sleep 0.3
-    done
-}
-
-# "SETTINGS max_parts_to_merge_at_once = 1" prevents merges to start before our own OPTIMIZE FINAL
-
-echo "ttl_01280_1"
-$CLICKHOUSE_CLIENT -n --query "
-create table ttl_01280_1 (a Int, b Int, x Int, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second delete where x % 10 == 0 and y > 5 SETTINGS max_parts_to_merge_at_once = 1;
-insert into ttl_01280_1 values (1, 1, 0, 4, now() + 10);
-insert into ttl_01280_1 values (1, 1, 10, 6, now());
-insert into ttl_01280_1 values (1, 2, 3, 7, now());
-insert into ttl_01280_1 values (1, 3, 0, 5, now());
-insert into ttl_01280_1 values (2, 1, 20, 1, now());
-insert into ttl_01280_1 values (2, 1, 0, 1, now());
-insert into ttl_01280_1 values (3, 1, 0, 8, now());"
-
-sleep 2
-optimize "ttl_01280_1"
-$CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_1 ORDER BY a, b, x, y"
-
-$CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_2"
-
-echo "ttl_01280_2"
-$CLICKHOUSE_CLIENT -n --query "
-create table ttl_01280_2 (a Int, b Int, x Array(Int32), y Double, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set x = minForEach(x), y = sum(y), d = max(d) SETTINGS max_parts_to_merge_at_once = 1;
-insert into ttl_01280_2 values (1, 1, array(0, 2, 3), 4, now() + 10);
-insert into ttl_01280_2 values (1, 1, array(5, 4, 3), 6, now());
-insert into ttl_01280_2 values (1, 1, array(5, 5, 1), 7, now());
-insert into ttl_01280_2 values (1, 3, array(3, 0, 4), 5, now());
-insert into ttl_01280_2 values (1, 3, array(1, 1, 2, 1), 9, now());
-insert into ttl_01280_2 values (1, 3, array(3, 2, 1, 0), 3, now());
-insert into ttl_01280_2 values (2, 1, array(3, 3, 3), 7, now());
-insert into ttl_01280_2 values (2, 1, array(11, 1, 0, 3), 1, now());
-insert into ttl_01280_2 values (3, 1, array(2, 4, 5), 8, now());"
-
-sleep 2
-optimize "ttl_01280_2"
-$CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_2 ORDER BY a, b, x, y"
-
-$CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_3"
-
-echo "ttl_01280_3"
-$CLICKHOUSE_CLIENT -n --query "
-create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set b = min(b), x = argMax(x, d), y = argMax(y, d), d = max(d) SETTINGS max_parts_to_merge_at_once = 1;
-insert into ttl_01280_3 values (1, 1, 0, 4, now() + 10);
-insert into ttl_01280_3 values (1, 1, 10, 6, now() + 1);
-insert into ttl_01280_3 values (1, 2, 3, 7, now());
-insert into ttl_01280_3 values (1, 3, 0, 5, now());
-insert into ttl_01280_3 values (2, 1, 20, 1, now());
-insert into ttl_01280_3 values (2, 1, 0, 3, now() + 1);
-insert into ttl_01280_3 values (3, 1, 0, 3, now());
-insert into ttl_01280_3 values (3, 2, 8, 2, now() + 1);
-insert into ttl_01280_3 values (3, 5, 5, 8, now());"
-
-sleep 2
-optimize "ttl_01280_3"
-$CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_3 ORDER BY a, b, x, y"
-
-$CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_4"
-
-echo "ttl_01280_4"
-$CLICKHOUSE_CLIENT -n --query "
-create table ttl_01280_4 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), -(a + b)) ttl d + interval 1 second group by toDate(d) set x = sum(x), y = max(y) SETTINGS max_parts_to_merge_at_once = 1;
-insert into ttl_01280_4 values (1, 1, 0, 4, now() + 10);
-insert into ttl_01280_4 values (10, 2, 3, 3, now());
-insert into ttl_01280_4 values (2, 10, 1, 7, now());
-insert into ttl_01280_4 values (3, 3, 5, 2, now());
-insert into ttl_01280_4 values (1, 5, 4, 9, now())"
-
-sleep 2
-optimize "ttl_01280_4"
-$CLICKHOUSE_CLIENT --query "select x, y from ttl_01280_4 ORDER BY a, b, x, y"
-
-$CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_5"
-
-echo "ttl_01280_5"
-$CLICKHOUSE_CLIENT -n --query "create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x), b = argMax(b, -b) SETTINGS max_parts_to_merge_at_once = 1;
-insert into ttl_01280_5 values (1, 2, 3, 5, now());
-insert into ttl_01280_5 values (2, 10, 1, 5, now());
-insert into ttl_01280_5 values (2, 3, 5, 5, now());
-insert into ttl_01280_5 values (1, 5, 4, 5, now());"
-
-sleep 2
-optimize "ttl_01280_5"
-$CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_5 ORDER BY a, b, x, y"
-
-$CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_6"
-
-echo "ttl_01280_6"
-$CLICKHOUSE_CLIENT -n --query "
-create table ttl_01280_6 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a SETTINGS max_parts_to_merge_at_once = 1;
-insert into ttl_01280_6 values (1, 2, 3, 5, now());
-insert into ttl_01280_6 values (2, 10, 3, 5, now());
-insert into ttl_01280_6 values (2, 3, 3, 5, now());
-insert into ttl_01280_6 values (1, 5, 3, 5, now())"
-
-sleep 2
-optimize "ttl_01280_6"
-$CLICKHOUSE_CLIENT --query "select a, x, y from ttl_01280_6 ORDER BY a, b, x, y"
-
-$CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_1"
-$CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_2"
-$CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_3"
-$CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_4"
-$CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_5"
-$CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_6"

From 96a445d3ac3e9547998ea80d2f29829a4986d426 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Sun, 26 Nov 2023 02:12:41 +0000
Subject: [PATCH 1036/1097] Update Libpqxx for Mem Leak

---
 contrib/libpqxx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/libpqxx b/contrib/libpqxx
index 791d68fd899..c995193a3a1 160000
--- a/contrib/libpqxx
+++ b/contrib/libpqxx
@@ -1 +1 @@
-Subproject commit 791d68fd89902835133c50435e380ec7a73271b7
+Subproject commit c995193a3a14d71f4711f1f421f65a1a1db64640

From 32da588d5e85264ef678e05b0e3cef4751f14f8a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 26 Nov 2023 05:57:54 +0300
Subject: [PATCH 1037/1097] Revert "Add debugging info for
 01600_parts_types_metrics on failures"

---
 .../01600_parts_types_metrics.reference       |  3 --
 .../0_stateless/01600_parts_types_metrics.sh  | 53 -------------------
 2 files changed, 56 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01600_parts_types_metrics.reference
 delete mode 100755 tests/queries/0_stateless/01600_parts_types_metrics.sh

diff --git a/tests/queries/0_stateless/01600_parts_types_metrics.reference b/tests/queries/0_stateless/01600_parts_types_metrics.reference
deleted file mode 100644
index e8183f05f5d..00000000000
--- a/tests/queries/0_stateless/01600_parts_types_metrics.reference
+++ /dev/null
@@ -1,3 +0,0 @@
-1
-1
-1
diff --git a/tests/queries/0_stateless/01600_parts_types_metrics.sh b/tests/queries/0_stateless/01600_parts_types_metrics.sh
deleted file mode 100755
index d4ffbfed613..00000000000
--- a/tests/queries/0_stateless/01600_parts_types_metrics.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-s3-storage, no-asan, long, no-parallel
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-set -e
-set -o pipefail
-
-# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time.
-# So, there is inherent race condition (especially in fasttest that runs tests in parallel).
-#
-# But it should get the expected result eventually.
-# In case of test failure, this code will do infinite loop and timeout.
-verify()
-{
-    for ((i = 0; i < 100; ++i)); do
-        # NOTE: database = $CLICKHOUSE_DATABASE is unwanted
-        result=$( $CLICKHOUSE_CLIENT -m --query "SELECT
-            (SELECT sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) =
-            (SELECT countIf(part_type = 'Compact'), countIf(part_type = 'Wide') FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))")
-        if [ "$result" = "1" ]; then
-            echo 1
-            return
-        fi
-        sleep 0.1
-    done
-
-    echo "ERROR: metrics does not match:" >&2
-    $CLICKHOUSE_CLIENT -nm --query "
-        -- { echo }
-        SELECT sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics;
-        SELECT countIf(part_type = 'Compact'), countIf(part_type = 'Wide') FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts);
-    "
-}
-
-$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS data_01600"
-# Compact  - (5..10]
-# Wide     - >10
-$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10, index_granularity = 8192, index_granularity_bytes = '10Mi'"
-
-# Compact
-$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Compact', number FROM system.numbers LIMIT 6"
-verify
-
-# Wide
-$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Wide', number FROM system.numbers LIMIT 11 OFFSET 6"
-verify
-
-# DROP and check
-$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE data_01600"
-verify

From 315906b380750f91e60358136579d149de51de9d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 20 Oct 2023 15:35:18 +0200
Subject: [PATCH 1038/1097] Add ability to parse hours/months without leading
 zeros

Recently I was looking into one dataset, that has such format.

So I've changed the meaning of the following formatters for
formatDateTime()/parseDateTime(), like in other implementations [1]:

- %l/%k - to handle hours without leading zeros
- %c - handle months without leading zeros

  [1]: https://www.php.net/strftime

And since this is kind of behaviour change (even though it is unlikely
will break something), there is a new setting to control this new
behaviour:
- parsedatetime_enable_format_without_leading_zeros=1 (default ON)
- formatdatetime_enable_format_without_leading_zeros=0 (default OFF, since this could change users output)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../functions/date-time-functions.md          |  8 +-
 src/Core/Settings.h                           |  2 +
 src/Core/SettingsChangesHistory.h             |  1 +
 src/Functions/formatDateTime.cpp              | 85 +++++++++++++++++--
 src/Functions/parseDateTime.cpp               | 54 ++++++++++--
 .../00718_format_datetime.reference           | 10 +++
 .../0_stateless/00718_format_datetime.sql     | 12 +++
 .../02668_parse_datetime.reference            | 27 ++++++
 .../0_stateless/02668_parse_datetime.sql      | 19 +++++
 9 files changed, 200 insertions(+), 18 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 43f7c9cc61e..25c0d4114e9 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -2539,7 +2539,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 |----------|---------------------------------------------------------|------------|
 | %a       | abbreviated weekday name (Mon-Sun)                      | Mon        |
 | %b       | abbreviated month name (Jan-Dec)                        | Jan        |
-| %c       | month as an integer number (01-12)                      | 01         |
+| %c       | month as an integer number (01-12) (see Note3 below)    | 1          |
 | %C       | year divided by 100 and truncated to integer (00-99)    | 20         |
 | %d       | day of the month, zero-padded (01-31)                   | 02         |
 | %D       | Short MM/DD/YY date, equivalent to %m/%d/%y             | 01/02/18   |
@@ -2553,8 +2553,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %i       | minute (00-59)                                          | 33         |
 | %I       | hour in 12h format (01-12)                              | 10         |
 | %j       | day of the year (001-366)                               | 002        |
-| %k       | hour in 24h format (00-23)                              | 22         |
-| %l       | hour in 12h format (01-12)                              | 09         |
+| %k       | hour in 24h format (0-23) (see Note3 below)             | 2          |
+| %l       | hour in 12h format (1-12) (see Note3 below)             | 9          |
 | %m       | month as an integer number (01-12)                      | 01         |
 | %M       | full month name (January-December), see 'Note 2' below  | January    |
 | %n       | new-line character (‘’)                                 |            |
@@ -2579,6 +2579,8 @@ Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0)
 
 Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
 
+Note 3: In ClickHouse versions earlier than v23.11, `%l`/`%k`/`%c` parses format with leading zeros required, after leading zeros are optional. The previous behaviour can be restored setting `parsedatetime_enable_format_without_leading_zeros = 0`. Though `formatDateTime` still prints those with leading zeros, this behaviour can be changed with `formatdatetime_enable_format_without_leading_zeros = 1`.
+
 **Example**
 
 ``` sql
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b734fa4c390..d3c507c6a4b 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -514,6 +514,8 @@ class IColumn;
     M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
     M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
     M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' produces the month name instead of minutes.", 0) \
+    M(Bool, parsedatetime_enable_format_without_leading_zeros, true, "Formatters '%l'/'%k'/'%c' in functions 'parseDateTime()' parses format without leading zeros.", 0) \
+    M(Bool, formatdatetime_enable_format_without_leading_zeros, false, "Formatters '%l'/'%k'/'%c' in functions 'formatDateTime()' produces format without leading zeros.", 0) \
     \
     M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
     M(Bool, throw_on_max_partitions_per_insert_block, true, "Used with max_partitions_per_insert_block. If true (default), an exception will be thrown when max_partitions_per_insert_block is reached. If false, details of the insert query reaching this limit with the number of partitions will be logged. This can be useful if you're trying to understand the impact on users when changing max_partitions_per_insert_block.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 38039839e1e..71411c8f8d1 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -124,6 +124,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
     {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
     {"23.4", {{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}}},
     {"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
+    {"23.11", {{"parsedatetime_enable_format_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
     {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
     {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
               {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index 1fb47832418..626a8c6074a 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -322,6 +322,18 @@ private:
             return writeNumber2(dest, ToMonthImpl::execute(source, timezone));
         }
 
+        size_t mysqlMonthWithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        {
+            auto month = ToMonthImpl::execute(source, timezone);
+            if (month < 10)
+            {
+                dest[0] = '0' + month;
+                return 1;
+            }
+            else
+                return writeNumber2(dest, month);
+        }
+
         static size_t monthOfYearText(char * dest, Time source, bool abbreviate, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             auto month = ToMonthImpl::execute(source, timezone);
@@ -404,12 +416,37 @@ private:
             return writeNumber2(dest, ToHourImpl::execute(source, timezone));
         }
 
+        size_t mysqlHour24WithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        {
+            auto hour = ToHourImpl::execute(source, timezone);
+            if (hour < 10)
+            {
+                dest[0] = '0' + hour;
+                return 1;
+            }
+            else
+                return writeNumber2(dest, hour);
+        }
+
         size_t mysqlHour12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             auto x = ToHourImpl::execute(source, timezone);
             return writeNumber2(dest, x == 0 ? 12 : (x > 12 ? x - 12 : x));
         }
 
+        size_t mysqlHour12WithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        {
+            auto hour = ToHourImpl::execute(source, timezone);
+            hour = hour == 0 ? 12 : (hour > 12 ? hour - 12 : hour);
+            if (hour < 10)
+            {
+                dest[0] = '0' + hour;
+                return 1;
+            }
+            else
+                return writeNumber2(dest, hour);
+        }
+
         size_t mysqlMinute(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber2(dest, ToMinuteImpl::execute(source, timezone));
@@ -689,10 +726,11 @@ private:
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "'%' must not be the last character in the format string, use '%%' instead");
     }
 
-    static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name)
+    static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name, bool print_without_leading_zeros)
     {
         static constexpr std::array variable_width_formatter = {'W'};
         static constexpr std::array variable_width_formatter_M_is_month_name = {'W', 'M'};
+        static constexpr std::array variable_width_formatter_leading_zeros = {'c', 'l', 'k'};
 
         for (size_t i = 0; i < format.size(); ++i)
         {
@@ -701,6 +739,13 @@ private:
                 case '%':
                     if (i + 1 >= format.size())
                         throwLastCharacterIsPercentException();
+                    if (print_without_leading_zeros)
+                    {
+                        if (std::any_of(
+                                variable_width_formatter_leading_zeros.begin(), variable_width_formatter_leading_zeros.end(),
+                                [&](char c){ return c == format[i + 1]; }))
+                            return false;
+                    }
                     if (mysql_M_is_month_name)
                     {
                         if (std::any_of(
@@ -727,6 +772,7 @@ private:
 
     const bool mysql_M_is_month_name;
     const bool mysql_f_prints_single_zero;
+    const bool print_without_leading_zeros;
 
 public:
     static constexpr auto name = Name::name;
@@ -736,6 +782,7 @@ public:
     explicit FunctionFormatDateTimeImpl(ContextPtr context)
         : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
         , mysql_f_prints_single_zero(context->getSettings().formatdatetime_f_prints_single_zero)
+        , print_without_leading_zeros(context->getSettings().formatdatetime_enable_format_without_leading_zeros)
     {
     }
 
@@ -885,7 +932,7 @@ public:
         ///   column rows are NOT populated with the template and left uninitialized. We run the normal instructions for formatters AND
         ///   instructions that copy literal characters before/between/after formatters. As a result, each byte of each result row is
         ///   written which is obviously slow.
-        bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name) : false;
+        bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name, print_without_leading_zeros) : false;
 
         using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
         std::vector<Instruction<T>> instructions;
@@ -1077,12 +1124,22 @@ public:
                         break;
                     }
 
-                    // Month as a integer number (01-12)
+                    // Month as a integer number:
+                    // - if formatdatetime_enable_format_without_leading_zeros=true, prints without leading zero, i.e. 1-12
+                    // - otherwise with, i.e. 01-12
                     case 'c':
                     {
                         Instruction<T> instruction;
-                        instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
-                        instructions.push_back(std::move(instruction));
+                        if (print_without_leading_zeros)
+                        {
+                            instruction.setMysqlFunc(&Instruction<T>::mysqlMonthWithoutLeadingZero);
+                            instructions.push_back(std::move(instruction));
+                        }
+                        else
+                        {
+                            instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
+                            instructions.push_back(std::move(instruction));
+                        }
                         out_template += "00";
                         break;
                     }
@@ -1391,20 +1448,30 @@ public:
                         break;
                     }
 
-                    // Hour in 24h format (00-23)
+                    // Hour in 24h format:
+                    // - if formatdatetime_enable_format_without_leading_zeros=true, prints without leading zero, i.e. 0-23
+                    // - otherwise with, i.e. 00-23
                     case 'k':
                     {
                         static constexpr std::string_view val = "00";
-                        add_time_instruction(&Instruction<T>::mysqlHour24, val);
+                        if (print_without_leading_zeros)
+                            add_time_instruction(&Instruction<T>::mysqlHour24WithoutLeadingZero, val);
+                        else
+                            add_time_instruction(&Instruction<T>::mysqlHour24, val);
                         out_template += val;
                         break;
                     }
 
-                    // Hour in 12h format (01-12)
+                    // Hour in 12h format:
+                    // - if formatdatetime_enable_format_without_leading_zeros=true, prints without leading zero, i.e. 0-12
+                    // - otherwise with, i.e. 00-12
                     case 'l':
                     {
                         static constexpr std::string_view val = "12";
-                        add_time_instruction(&Instruction<T>::mysqlHour12, val);
+                        if (print_without_leading_zeros)
+                            add_time_instruction(&Instruction<T>::mysqlHour12WithoutLeadingZero, val);
+                        else
+                            add_time_instruction(&Instruction<T>::mysqlHour12, val);
                         out_template += val;
                         break;
                     }
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index c6721b29c1c..2a757410f32 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -466,12 +466,14 @@ namespace
     {
     public:
         const bool mysql_M_is_month_name;
+        const bool enable_format_without_leading_zeros;
 
         static constexpr auto name = Name::name;
         static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionParseDateTimeImpl>(context); }
 
         explicit FunctionParseDateTimeImpl(ContextPtr context)
             : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
+            , enable_format_without_leading_zeros(context->getSettings().parsedatetime_enable_format_without_leading_zeros)
         {
         }
 
@@ -835,6 +837,14 @@ namespace
                 return cur;
             }
 
+            static Pos mysqlMonthWithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
+            {
+                Int32 month;
+                cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, month);
+                date.setMonth(month);
+                return cur;
+            }
+
             static Pos mysqlCentury(Pos cur, Pos end, const String & fragment, DateTime & date)
             {
                 Int32 century;
@@ -1131,6 +1141,14 @@ namespace
                 return cur;
             }
 
+            static Pos mysqlHour12WithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
+            {
+                Int32 hour;
+                cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, hour);
+                date.setHour(hour, true, true);
+                return cur;
+            }
+
             static Pos mysqlHour24(Pos cur, Pos end, const String & fragment, DateTime & date)
             {
                 Int32 hour;
@@ -1139,6 +1157,14 @@ namespace
                 return cur;
             }
 
+            static Pos mysqlHour24WithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
+            {
+                Int32 hour;
+                cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, hour);
+                date.setHour(hour, false, false);
+                return cur;
+            }
+
             static Pos readNumberWithVariableLength(
                 Pos cur,
                 Pos end,
@@ -1490,11 +1516,17 @@ namespace
                             instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthOfYearTextShort));
                             break;
 
-                        // Month as a decimal number (01-12)
+                        // Month as a decimal number:
+                        // - if parsedatetime_enable_format_without_leading_zeros=true, possibly without leading zero, i.e. 1-12
+                        // - else with leading zero required, i.e. 01-12
                         case 'c':
-                            instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth));
+                            if (enable_format_without_leading_zeros)
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthWithoutLeadingZero));
+                            else
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth));
                             break;
 
+
                         // Year, divided by 100, zero-padded
                         case 'C':
                             instructions.emplace_back(ACTION_ARGS(Instruction::mysqlCentury));
@@ -1645,14 +1677,24 @@ namespace
                             instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
                             break;
 
-                        // Hour in 24h format (00-23)
+                        // Hour in 24h format:
+                        // - if parsedatetime_enable_format_without_leading_zeros=true, possibly without leading zero, i.e. 0-23
+                        // - else with leading zero required, i.e. 00-23
                         case 'k':
-                            instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24));
+                            if (enable_format_without_leading_zeros)
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24WithoutLeadingZero));
+                            else
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24));
                             break;
 
-                        // Hour in 12h format (01-12)
+                        // Hour in 12h format:
+                        // - if parsedatetime_enable_format_without_leading_zeros=true, possibly without leading zero, i.e. 0-12
+                        // - else with leading zero required, i.e. 00-12
                         case 'l':
-                            instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
+                            if (enable_format_without_leading_zeros)
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12WithoutLeadingZero));
+                            else
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
                             break;
 
                         case 't':
diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference
index 50874ac9b2e..f22c953e739 100644
--- a/tests/queries/0_stateless/00718_format_datetime.reference
+++ b/tests/queries/0_stateless/00718_format_datetime.reference
@@ -64,3 +64,13 @@ no formatting pattern	no formatting pattern
 2022-12-08 18:11:29.000000
 2022-12-08 00:00:00.000000
 2022-12-08 00:00:00.000000
+01
+01
+02
+02
+02
+1
+01
+2
+2
+02
diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql
index c0db6a4f64e..67a77c481a9 100644
--- a/tests/queries/0_stateless/00718_format_datetime.sql
+++ b/tests/queries/0_stateless/00718_format_datetime.sql
@@ -90,3 +90,15 @@ select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T
 select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
+
+-- formatdatetime_enable_format_without_leading_zeros
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_enable_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_enable_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_enable_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_enable_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_enable_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_enable_format_without_leading_zeros=1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_enable_format_without_leading_zeros=1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_enable_format_without_leading_zeros=1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_enable_format_without_leading_zeros=1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_enable_format_without_leading_zeros=1;
diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference
index f6c53ce1887..6331e624ab8 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.reference
+++ b/tests/queries/0_stateless/02668_parse_datetime.reference
@@ -243,3 +243,30 @@ select parseDateTime('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH
 select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 -- Fuzzer crash bug #53715
 select parseDateTime('', '', toString(number)) from numbers(13); -- { serverError ILLEGAL_COLUMN }
+-- %h
+select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC');
+2022-08-13 19:58:32
+-- %l
+select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
+2022-08-13 19:58:32
+select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
+2022-08-13 19:58:32
+-- %H
+select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %H:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %H:%i:%s', 'UTC');
+2022-08-13 07:58:32
+-- %k
+select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
+2022-08-13 07:58:32
+select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
+2022-08-13 07:58:32
+-- %m
+select parseDateTime('8 13, 2022, 7:58:32', '%m %e, %G, %k:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('08 13, 2022, 07:58:32', '%m %e, %G, %k:%i:%s', 'UTC');
+2022-08-13 07:58:32
+-- %c
+select parseDateTime('8 13, 2022, 7:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
+2022-08-13 07:58:32
+select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
+2022-08-13 07:58:32
diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql
index d8f2a94e188..2cd815720c3 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.sql
+++ b/tests/queries/0_stateless/02668_parse_datetime.sql
@@ -168,4 +168,23 @@ select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverE
 -- Fuzzer crash bug #53715
 select parseDateTime('', '', toString(number)) from numbers(13); -- { serverError ILLEGAL_COLUMN }
 
+-- %h
+select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC');
+-- %l
+select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
+select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
+-- %H
+select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %H:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %H:%i:%s', 'UTC');
+-- %k
+select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
+select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
+-- %m
+select parseDateTime('8 13, 2022, 7:58:32', '%m %e, %G, %k:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('08 13, 2022, 07:58:32', '%m %e, %G, %k:%i:%s', 'UTC');
+-- %c
+select parseDateTime('8 13, 2022, 7:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
+select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
+
 -- { echoOff }

From faa3a43803a6b7ede6784fa701584272afa8dec6 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sun, 26 Nov 2023 19:21:39 +0000
Subject: [PATCH 1039/1097] Fix docs

---
 .../table-engines/mergetree-family/mergetree.md    | 12 ++++++------
 .../functions/comparison-functions.md              | 12 ++++++------
 .../table-engines/mergetree-family/mergetree.md    | 14 +++++++-------
 .../functions/comparison-functions.md              | 12 ++++++------
 .../table-engines/mergetree-family/mergetree.md    | 12 ++++++------
 .../functions/comparison-functions.md              | 12 ++++++------
 6 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index f0bc45b9f53..9cbb48ef847 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -504,8 +504,8 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
 
 | Function (operator) / Index                                                                                | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | inverted |
 |------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|----------|
-| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals)                | ✔           | ✔      | ✔          | ✔          | ✔            | ✔        |
-| [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals)    | ✔           | ✔      | ✔          | ✔          | ✔            | ✔        |
+| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#equals)                | ✔           | ✔      | ✔          | ✔          | ✔            | ✔        |
+| [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals)    | ✔           | ✔      | ✔          | ✔          | ✔            | ✔        |
 | [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like)                         | ✔           | ✔      | ✔          | ✔          | ✗            | ✔        |
 | [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike)                   | ✔           | ✔      | ✔          | ✔          | ✗            | ✔        |
 | [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith)                             | ✔           | ✔      | ✔          | ✔          | ✗            | ✔        |
@@ -513,10 +513,10 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
 | [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany)     | ✗           | ✗      | ✔          | ✗          | ✗            | ✔        |
 | [in](/docs/en/sql-reference/functions/in-functions#in-functions)                                           | ✔           | ✔      | ✔          | ✔          | ✔            | ✔        |
 | [notIn](/docs/en/sql-reference/functions/in-functions#in-functions)                                        | ✔           | ✔      | ✔          | ✔          | ✔            | ✔        |
-| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less)                        | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
-| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater)                  | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
-| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals)       | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
-| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
+| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#less)                        | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
+| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#greater)                  | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
+| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#lessorequals)       | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
+| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#greaterorequals) | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
 | [empty](/docs/en/sql-reference/functions/array-functions#function-empty)                                   | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
 | [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty)                             | ✔           | ✔      | ✗          | ✗          | ✗            | ✗        |
 | [has](/docs/en/sql-reference/functions/array-functions#function-has)                                       | ✗           | ✗      | ✔          | ✔          | ✔            | ✔        |
diff --git a/docs/en/sql-reference/functions/comparison-functions.md b/docs/en/sql-reference/functions/comparison-functions.md
index 297d84eb8a5..abe923adeb3 100644
--- a/docs/en/sql-reference/functions/comparison-functions.md
+++ b/docs/en/sql-reference/functions/comparison-functions.md
@@ -20,7 +20,7 @@ Strings are compared byte-by-byte. Note that this may lead to unexpected results
 
 A string S1 which has another string S2 as prefix is considered longer than S2.
 
-## equals, `=`, `==` operators
+## equals, `=`, `==` operators {#equals}
 
 **Syntax**
 
@@ -32,7 +32,7 @@ Alias:
 - `a = b` (operator)
 - `a == b` (operator)
 
-## notEquals, `!=`, `<>` operators
+## notEquals, `!=`, `<>` operators {#notequals}
 
 **Syntax**
 
@@ -44,7 +44,7 @@ Alias:
 - `a != b` (operator)
 - `a <> b` (operator)
 
-## less, `<` operator
+## less, `<` operator {#less}
 
 **Syntax**
 
@@ -55,7 +55,7 @@ less(a, b)
 Alias:
 - `a < b` (operator)
 
-## greater, `>` operator
+## greater, `>` operator {#greater}
 
 **Syntax**
 
@@ -66,7 +66,7 @@ greater(a, b)
 Alias:
 - `a > b` (operator)
 
-## lessOrEquals, `<=` operator
+## lessOrEquals, `<=` operator {#lessorequals}
 
 **Syntax**
 
@@ -77,7 +77,7 @@ lessOrEquals(a, b)
 Alias:
 - `a <= b` (operator)
 
-## greaterOrEquals, `>=` operator
+## greaterOrEquals, `>=` operator {#greaterorequals}
 
 **Syntax**
 
diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index 00eb830c9ef..7195ee38af6 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -337,7 +337,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
 
     Поддерживаемые типы данных: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`.
 
-    Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
+    Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md#equals), [notEquals](../../../sql-reference/functions/comparison-functions.md#notequals), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
 
 **Примеры**
 
@@ -354,8 +354,8 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
 
 | Функция (оператор) / Индекс                                                                                | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
 |------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
-| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals)                 | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#function-notequals)         | ✔           | ✔      | ✔           | ✔           | ✔             |
+| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#equals)                 | ✔           | ✔      | ✔           | ✔           | ✔             |
+| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#notequals)         | ✔           | ✔      | ✔           | ✔           | ✔             |
 | [like](../../../sql-reference/functions/string-search-functions.md#function-like)                          | ✔           | ✔      | ✔           | ✔           | ✗             |
 | [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike)                    | ✔           | ✔      | ✔           | ✔           | ✗             |
 | [startsWith](../../../sql-reference/functions/string-functions.md#startswith)                              | ✔           | ✔      | ✔           | ✔           | ✗             |
@@ -363,10 +363,10 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
 | [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany)      | ✗           | ✗      | ✔           | ✗           | ✗             |
 | [in](../../../sql-reference/functions/in-functions.md#in-functions)                                        | ✔           | ✔      | ✔           | ✔           | ✔             |
 | [notIn](../../../sql-reference/functions/in-functions.md#in-functions)                                     | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less)                        | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater)                  | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals)       | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔           | ✔      | ✗           | ✗           | ✗             |
+| [less (\<)](../../../sql-reference/functions/comparison-functions.md#less)                        | ✔           | ✔      | ✗           | ✗           | ✗             |
+| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#greater)                  | ✔           | ✔      | ✗           | ✗           | ✗             |
+| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#lessorequals)       | ✔           | ✔      | ✗           | ✗           | ✗             |
+| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#greaterorequals) | ✔           | ✔      | ✗           | ✗           | ✗             |
 | [empty](../../../sql-reference/functions/array-functions.md#function-empty)                                | ✔           | ✔      | ✗           | ✗           | ✗             |
 | [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty)                          | ✔           | ✔      | ✗           | ✗           | ✗             |
 | hasToken                                                                                                   | ✗           | ✗      | ✗           | ✔           | ✗             |
diff --git a/docs/ru/sql-reference/functions/comparison-functions.md b/docs/ru/sql-reference/functions/comparison-functions.md
index f66b42977cc..bb9322d5a82 100644
--- a/docs/ru/sql-reference/functions/comparison-functions.md
+++ b/docs/ru/sql-reference/functions/comparison-functions.md
@@ -23,14 +23,14 @@ sidebar_label: "Функции сравнения"
 
 Замечание. До версии 1.1.54134 сравнение знаковых и беззнаковых целых чисел производилось также, как в C++. То есть, вы могли получить неверный результат в таких случаях: SELECT 9223372036854775807 \> -1. С версии 1.1.54134 поведение изменилось и стало математически корректным.
 
-## equals, оператор a = b и a == b {#function-equals}
+## equals, оператор a = b и a == b {#equals}
 
-## notEquals, оператор a != b и a `<>` b {#function-notequals}
+## notEquals, оператор a != b и a `<>` b {#notequals}
 
-## less, оператор `<` {#function-less}
+## less, оператор `<` {#less}
 
-## greater, оператор `>` {#function-greater}
+## greater, оператор `>` {#greater}
 
-## lessOrEquals, оператор `<=` {#function-lessorequals}
+## lessOrEquals, оператор `<=` {#lessorequals}
 
-## greaterOrEquals, оператор `>=` {#function-greaterorequals}
+## greaterOrEquals, оператор `>=` {#greaterorequals}
diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
index cec4cb09047..57c49d88732 100644
--- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
@@ -349,8 +349,8 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
 
 | 函数 (操作符) / 索引                                         | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
 | ------------------------------------------------------------ | ----------- | ------ | ---------- | ---------- | ------------ |
-| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔           | ✔      | ✔          | ✔          | ✔            |
-| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔           | ✔      | ✔          | ✔          | ✔            |
+| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#equals) | ✔           | ✔      | ✔          | ✔          | ✔            |
+| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#notequals) | ✔           | ✔      | ✔          | ✔          | ✔            |
 | [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔           | ✔      | ✔          | ✔          | ✔            |
 | [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔           | ✔      | ✗          | ✗          | ✗            |
 | [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔           | ✔      | ✔          | ✔          | ✗            |
@@ -358,10 +358,10 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
 | [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗           | ✗      | ✔          | ✗          | ✗            |
 | [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔           | ✔      | ✔          | ✔          | ✔            |
 | [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔           | ✔      | ✔          | ✔          | ✔            |
-| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔           | ✔      | ✗          | ✗          | ✗            |
-| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔           | ✔      | ✗          | ✗          | ✗            |
-| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔           | ✔      | ✗          | ✗          | ✗            |
-| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [less (\<)](../../../sql-reference/functions/comparison-functions.md#less) | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#greater) | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#lessorequals) | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#greaterorequals) | ✔           | ✔      | ✗          | ✗          | ✗            |
 | [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔           | ✔      | ✗          | ✗          | ✗            |
 | [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔           | ✔      | ✗          | ✗          | ✗            |
 | hasToken                                                     | ✗           | ✗      | ✗          | ✔          | ✗            |
diff --git a/docs/zh/sql-reference/functions/comparison-functions.md b/docs/zh/sql-reference/functions/comparison-functions.md
index ef3adf427f1..ed33dc40531 100644
--- a/docs/zh/sql-reference/functions/comparison-functions.md
+++ b/docs/zh/sql-reference/functions/comparison-functions.md
@@ -21,14 +21,14 @@ sidebar_label: 比较函数
 
 字符串按字节进行比较。较短的字符串小于以其开头并且至少包含一个字符的所有字符串。
 
-## 等于，a=b和a==b 运算符 {#equals-a-b-and-a-b-operator}
+## 等于，a=b和a==b 运算符 {#equals}
 
-## 不等于，a!=b和a&lt;&gt;b 运算符 {#notequals-a-operator-b-and-a-b}
+## 不等于，a!=b和a&lt;&gt;b 运算符 {#notequals}
 
-## 少, &lt; 运算符 {#less-operator}
+## 少, &lt; 运算符 {#less}
 
-## 大于, &gt; 运算符 {#greater-operator}
+## 大于, &gt; 运算符 {#greater}
 
-## 小于等于, &lt;= 运算符 {#lessorequals-operator}
+## 小于等于, &lt;= 运算符 {#lessorequals}
 
-## 大于等于, &gt;= 运算符 {#greaterorequals-operator}
+## 大于等于, &gt;= 运算符 {#greaterorequals}

From dd49b46ca50d8b338accdbd36238817ec3aa52dc Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sun, 26 Nov 2023 22:32:11 +0000
Subject: [PATCH 1040/1097] Fix style

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 5d11185ff76..4077802460a 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1,4 +1,4 @@
-personal_ws-1.1 en 2633 
+personal_ws-1.1 en 2643 
 AArch
 ACLs
 ALTERs
@@ -261,6 +261,7 @@ FOSDEM
 FQDN
 Failover
 FarmHash
+FileLog
 FilesystemCacheBytes
 FilesystemCacheElements
 FilesystemCacheFiles
@@ -278,7 +279,6 @@ FilesystemMainPathTotalBytes
 FilesystemMainPathTotalINodes
 FilesystemMainPathUsedBytes
 FilesystemMainPathUsedINodes
-FileLog
 FixedString
 Flink
 ForEach
@@ -571,13 +571,13 @@ NetworkSendPackets
 NodeJs
 NuRaft
 NumHexagons
+NumPy
 NumToString
 NumToStringClassC
 NumberOfDatabases
 NumberOfDetachedByUserParts
 NumberOfDetachedParts
 NumberOfTables
-NumPy
 OFNS
 OLAP
 OLTP
@@ -588,10 +588,10 @@ OSGuestNiceTimeNormalized
 OSGuestTime
 OSGuestTimeCPU
 OSGuestTimeNormalized
+OSIOWaitMicroseconds
 OSIOWaitTime
 OSIOWaitTimeCPU
 OSIOWaitTimeNormalized
-OSIOWaitMicroseconds
 OSIdleTime
 OSIdleTimeCPU
 OSIdleTimeNormalized
@@ -1470,12 +1470,12 @@ fastops
 fcoverage
 fibonacci
 fifo
+filelog
 filesystem
 filesystemAvailable
 filesystemCapacity
 filesystemFree
 filesystems
-filelog
 finalizeAggregation
 fips
 firstLine

From 124fae7549597b7b8b6651b4e79aab4a3de6ba18 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sun, 26 Nov 2023 23:58:34 +0000
Subject: [PATCH 1041/1097] Fix style

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 4077802460a..49afc562ae8 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1,4 +1,4 @@
-personal_ws-1.1 en 2643 
+personal_ws-1.1 en 2646 
 AArch
 ACLs
 ALTERs
@@ -1554,6 +1554,7 @@ graphql
 greatCircleAngle
 greatCircleDistance
 greaterOrEquals
+greaterorequals
 greenspace
 groupArray
 groupArrayInsertAt
@@ -1739,6 +1740,7 @@ lemmatize
 lemmatized
 lengthUTF
 lessOrEquals
+lessorequals
 levenshtein
 levenshteinDistance
 lexicographically
@@ -1917,6 +1919,7 @@ notEquals
 notILike
 notIn
 notLike
+notequals
 notretry
 nowInBlock
 ntile

From f33f23172501453ad42b6f8339abaacd75683939 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 27 Nov 2023 10:48:01 +0800
Subject: [PATCH 1042/1097] fix failed ut

---
 src/Interpreters/ArrayJoinAction.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp
index 54501a5b50f..54ff53764e7 100644
--- a/src/Interpreters/ArrayJoinAction.cpp
+++ b/src/Interpreters/ArrayJoinAction.cpp
@@ -213,7 +213,10 @@ Block ArrayJoinResultIterator::next()
 
         /// Reuse cut_any_col if possible to avoid unnecessary cut.
         if (!is_unaligned && !is_left && current.name == *columns.begin())
+        {
             current.column = cut_any_col;
+            current.type = getArrayJoinDataType(current.type);
+        }
         else
             current.column = current.column->cut(current_row, next_row - current_row);
 

From f995ac87c5ee449437eb17d0f2c87438204ea243 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Mon, 27 Nov 2023 03:50:34 +0000
Subject: [PATCH 1043/1097] review fix

---
 src/Storages/IndicesDescription.cpp           |  7 +++-
 ...pp => ReplaceAliasByExpressionVisitor.cpp} |  7 ++--
 .../ReplaceAliasByExpressionVisitor.h         | 40 +++++++++++++++++++
 src/Storages/ReplaceAliasToExprVisitor.h      | 27 -------------
 .../02911_support_alias_column_in_indices.sql | 18 ++++-----
 5 files changed, 59 insertions(+), 40 deletions(-)
 rename src/Storages/{ReplaceAliasToExprVisitor.cpp => ReplaceAliasByExpressionVisitor.cpp} (71%)
 create mode 100644 src/Storages/ReplaceAliasByExpressionVisitor.h
 delete mode 100644 src/Storages/ReplaceAliasToExprVisitor.h

diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp
index ebe4636c517..c723fa4225c 100644
--- a/src/Storages/IndicesDescription.cpp
+++ b/src/Storages/IndicesDescription.cpp
@@ -10,7 +10,7 @@
 #include <Parsers/parseQuery.h>
 #include <Storages/extractKeyExpressionList.h>
 
-#include <Storages/ReplaceAliasToExprVisitor.h>
+#include <Storages/ReplaceAliasByExpressionVisitor.h>
 
 #include <Core/Defines.h>
 #include "Common/Exception.h"
@@ -24,6 +24,11 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+using ReplaceAliasToExprVisitor = InDepthNodeVisitor<ReplaceAliasByExpressionMatcher, true>;
+}
+
 IndexDescription::IndexDescription(const IndexDescription & other)
     : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
     , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr)
diff --git a/src/Storages/ReplaceAliasToExprVisitor.cpp b/src/Storages/ReplaceAliasByExpressionVisitor.cpp
similarity index 71%
rename from src/Storages/ReplaceAliasToExprVisitor.cpp
rename to src/Storages/ReplaceAliasByExpressionVisitor.cpp
index 8dafaa32cb3..b7836cfa9c4 100644
--- a/src/Storages/ReplaceAliasToExprVisitor.cpp
+++ b/src/Storages/ReplaceAliasByExpressionVisitor.cpp
@@ -1,13 +1,14 @@
+#include <Storages/ReplaceAliasByExpressionVisitor.h>
+
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
 #include <Storages/ColumnsDescription.h>
-#include <Storages/ReplaceAliasToExprVisitor.h>
 #include <Common/typeid_cast.h>
 
 namespace DB
 {
 
-void ReplaceAliasToExprMatcher::visit(ASTPtr & ast, Data & data)
+void ReplaceAliasByExpressionMatcher::visit(ASTPtr & ast, Data & data)
 {
     if (auto * identifier = ast->as<ASTIdentifier>())
     {
@@ -15,7 +16,7 @@ void ReplaceAliasToExprMatcher::visit(ASTPtr & ast, Data & data)
     }
 }
 
-void ReplaceAliasToExprMatcher::visit(const ASTIdentifier & column, ASTPtr & ast, Data & data)
+void ReplaceAliasByExpressionMatcher::visit(const ASTIdentifier & column, ASTPtr & ast, Data & data)
 {
     const auto & column_name = column.name();
     if (data.columns.hasAlias(column_name))
diff --git a/src/Storages/ReplaceAliasByExpressionVisitor.h b/src/Storages/ReplaceAliasByExpressionVisitor.h
new file mode 100644
index 00000000000..4acc1fd4be7
--- /dev/null
+++ b/src/Storages/ReplaceAliasByExpressionVisitor.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Interpreters/InDepthNodeVisitor.h>
+
+namespace DB
+{
+
+class ASTFunction;
+class ColumnsDescription;
+class ASTIdentifier;
+
+
+/* The Visitor is used to replace ALIAS by EXPRESSION when we refer to ALIAS
+ * column in index definition.
+ *
+ * For example, if we have following create statement:
+ * CREATE TABLE t
+ * (
+ *     col UInt8,
+ *     col_alias ALIAS  col + 1
+ *     INDEX idx (col_alias) TYPE minmax
+ * ) ENGINE = MergeTree ORDER BY col;
+ * we need call the visitor to replace `col_alias` by `col` + 1 when get index
+ * description from index definition AST.
+*/
+class ReplaceAliasByExpressionMatcher
+{
+public:
+    struct Data
+    {
+        const ColumnsDescription & columns;
+    };
+
+    static void visit(ASTPtr & ast, Data &);
+    static void visit(const ASTIdentifier &, ASTPtr & ast, Data &);
+    static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
+};
+
+}
diff --git a/src/Storages/ReplaceAliasToExprVisitor.h b/src/Storages/ReplaceAliasToExprVisitor.h
deleted file mode 100644
index 509dc5ece6f..00000000000
--- a/src/Storages/ReplaceAliasToExprVisitor.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-#include <Interpreters/InDepthNodeVisitor.h>
-
-namespace DB
-{
-
-class ASTFunction;
-class ColumnsDescription;
-class ASTIdentifier;
-
-class ReplaceAliasToExprMatcher
-{
-public:
-    struct Data
-    {
-        const ColumnsDescription & columns;
-    };
-
-    static void visit(ASTPtr & ast, Data &);
-    static void visit(const ASTIdentifier &, ASTPtr & ast, Data &);
-    static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
-};
-
-using ReplaceAliasToExprVisitor = InDepthNodeVisitor<ReplaceAliasToExprMatcher, true>;
-}
diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
index eba960e02a5..bc5842df6d4 100644
--- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
+++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
@@ -4,23 +4,23 @@ drop database if exists 02911_support_alias_column_in_indices;
 create database 02911_support_alias_column_in_indices;
 use 02911_support_alias_column_in_indices;
 
-CREATE TABLE test
+create table test
 (
     x UInt32,
-    y ALIAS x + 1,
-    INDEX i_y (y) TYPE minmax
-) ENGINE = MergeTree ORDER BY x;
+    y alias x + 1,
+    index i_y (y) type minmax
+) engine = MergeTree order by x;
 
 insert into test select * from numbers(10);
 insert into test select * from numbers(11, 20);
 
-CREATE TABLE test1
+create table test1
 (
     x UInt32,
-    y1 ALIAS x + 1,
-    y2 ALIAS y1 + 1,
-    INDEX i_y (y2) TYPE minmax
-) ENGINE = MergeTree ORDER BY tuple();
+    y1 alias x + 1,
+    y2 alias y1 + 1,
+    index i_y (y2) type minmax
+) engine = MergeTree order by tuple();
 
 insert into test1 select * from numbers(10);
 insert into test1 select * from numbers(11, 20);

From a8671049e1db82858b1b29023475b62c20f39a82 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 27 Nov 2023 08:47:11 +0000
Subject: [PATCH 1044/1097] Various smaller fixups

---
 .../functions/date-time-functions.md          | 11 ++++----
 src/Core/Settings.h                           |  8 +++---
 src/Core/SettingsChangesHistory.h             |  2 +-
 src/Functions/formatDateTime.cpp              | 28 +++++++++----------
 src/Functions/parseDateTime.cpp               | 23 ++++++++-------
 .../0_stateless/00718_format_datetime.sql     | 20 ++++++-------
 6 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 25c0d4114e9..36a89871781 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -2533,13 +2533,14 @@ formatDateTime(Time, Format[, Timezone])
 Returns time and date values according to the determined format.
 
 **Replacement fields**
+
 Using replacement fields, you can define a pattern for the resulting string. “Example” column shows formatting result for `2018-01-02 22:33:44`.
 
-| Placeholder | Description                                             | Example    |
+| Placeholder | Description                                          | Example    |
 |----------|---------------------------------------------------------|------------|
 | %a       | abbreviated weekday name (Mon-Sun)                      | Mon        |
 | %b       | abbreviated month name (Jan-Dec)                        | Jan        |
-| %c       | month as an integer number (01-12) (see Note3 below)    | 1          |
+| %c       | month as an integer number (01-12), see 'Note 3' below  | 1          |
 | %C       | year divided by 100 and truncated to integer (00-99)    | 20         |
 | %d       | day of the month, zero-padded (01-31)                   | 02         |
 | %D       | Short MM/DD/YY date, equivalent to %m/%d/%y             | 01/02/18   |
@@ -2553,8 +2554,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %i       | minute (00-59)                                          | 33         |
 | %I       | hour in 12h format (01-12)                              | 10         |
 | %j       | day of the year (001-366)                               | 002        |
-| %k       | hour in 24h format (0-23) (see Note3 below)             | 2          |
-| %l       | hour in 12h format (1-12) (see Note3 below)             | 9          |
+| %k       | hour in 24h format (0-23), see 'Note 3' below           | 2          |
+| %l       | hour in 12h format (1-12), see 'Note 3' below           | 9          |
 | %m       | month as an integer number (01-12)                      | 01         |
 | %M       | full month name (January-December), see 'Note 2' below  | January    |
 | %n       | new-line character (‘’)                                 |            |
@@ -2579,7 +2580,7 @@ Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0)
 
 Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
 
-Note 3: In ClickHouse versions earlier than v23.11, `%l`/`%k`/`%c` parses format with leading zeros required, after leading zeros are optional. The previous behaviour can be restored setting `parsedatetime_enable_format_without_leading_zeros = 0`. Though `formatDateTime` still prints those with leading zeros, this behaviour can be changed with `formatdatetime_enable_format_without_leading_zeros = 1`.
+Note 3: In ClickHouse versions earlier than v23.11, `%c`, `%l` and `%k` in function `parseDateTime()` required leading zeros for month and hours, e.g. `07`. In later versions, the leading zero may be omitted, e.g. `7`. The previous behavior can be restored using setting `parsedatetime_parse_without_leading_zeros = 0`. Note that function `formatDateTime()` by default still prints leading zeros for `%c`, `%l` and `%k` to not break existing use cases. This behavior can be changed by setting `formatdatetime_format_without_leading_zeros = 1`.
 
 **Example**
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index d3c507c6a4b..6c34922095f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -512,10 +512,10 @@ class IColumn;
     M(Bool, splitby_max_substrings_includes_remaining_string, false, "Functions 'splitBy*()' with 'max_substrings' argument > 0 include the remaining string as last element in the result", 0) \
     \
     M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
-    M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
-    M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' produces the month name instead of minutes.", 0) \
-    M(Bool, parsedatetime_enable_format_without_leading_zeros, true, "Formatters '%l'/'%k'/'%c' in functions 'parseDateTime()' parses format without leading zeros.", 0) \
-    M(Bool, formatdatetime_enable_format_without_leading_zeros, false, "Formatters '%l'/'%k'/'%c' in functions 'formatDateTime()' produces format without leading zeros.", 0) \
+    M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' prints a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
+    M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' print/parse the month name instead of minutes.", 0) \
+    M(Bool, parsedatetime_parse_without_leading_zeros, true, "Formatters '%c', '%l' and '%k' in function 'parseDateTime()' parse months and hours without leading zeros.", 0) \
+    M(Bool, formatdatetime_format_without_leading_zeros, false, "Formatters '%c', '%l' and '%k' in function 'formatDateTime()' print months and hours without leading zeros.", 0) \
     \
     M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
     M(Bool, throw_on_max_partitions_per_insert_block, true, "Used with max_partitions_per_insert_block. If true (default), an exception will be thrown when max_partitions_per_insert_block is reached. If false, details of the insert query reaching this limit with the number of partitions will be logged. This can be useful if you're trying to understand the impact on users when changing max_partitions_per_insert_block.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 71411c8f8d1..90a18b9b2f5 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -124,7 +124,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
     {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
     {"23.4", {{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}}},
     {"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
-    {"23.11", {{"parsedatetime_enable_format_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
+    {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
     {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
     {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
               {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index 626a8c6074a..db30b04d678 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -726,7 +726,7 @@ private:
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "'%' must not be the last character in the format string, use '%%' instead");
     }
 
-    static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name, bool print_without_leading_zeros)
+    static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name, bool mysql_format_ckl_without_leading_zeros)
     {
         static constexpr std::array variable_width_formatter = {'W'};
         static constexpr std::array variable_width_formatter_M_is_month_name = {'W', 'M'};
@@ -739,7 +739,7 @@ private:
                 case '%':
                     if (i + 1 >= format.size())
                         throwLastCharacterIsPercentException();
-                    if (print_without_leading_zeros)
+                    if (mysql_format_ckl_without_leading_zeros)
                     {
                         if (std::any_of(
                                 variable_width_formatter_leading_zeros.begin(), variable_width_formatter_leading_zeros.end(),
@@ -772,7 +772,7 @@ private:
 
     const bool mysql_M_is_month_name;
     const bool mysql_f_prints_single_zero;
-    const bool print_without_leading_zeros;
+    const bool mysql_format_ckl_without_leading_zeros;
 
 public:
     static constexpr auto name = Name::name;
@@ -782,7 +782,7 @@ public:
     explicit FunctionFormatDateTimeImpl(ContextPtr context)
         : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
         , mysql_f_prints_single_zero(context->getSettings().formatdatetime_f_prints_single_zero)
-        , print_without_leading_zeros(context->getSettings().formatdatetime_enable_format_without_leading_zeros)
+        , mysql_format_ckl_without_leading_zeros(context->getSettings().formatdatetime_format_without_leading_zeros)
     {
     }
 
@@ -932,7 +932,7 @@ public:
         ///   column rows are NOT populated with the template and left uninitialized. We run the normal instructions for formatters AND
         ///   instructions that copy literal characters before/between/after formatters. As a result, each byte of each result row is
         ///   written which is obviously slow.
-        bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name, print_without_leading_zeros) : false;
+        bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name, mysql_format_ckl_without_leading_zeros) : false;
 
         using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
         std::vector<Instruction<T>> instructions;
@@ -1125,12 +1125,12 @@ public:
                     }
 
                     // Month as a integer number:
-                    // - if formatdatetime_enable_format_without_leading_zeros=true, prints without leading zero, i.e. 1-12
-                    // - otherwise with, i.e. 01-12
+                    // - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 1-12
+                    // - otherwise: print with leading zeros: i.e. 01-12
                     case 'c':
                     {
                         Instruction<T> instruction;
-                        if (print_without_leading_zeros)
+                        if (mysql_format_ckl_without_leading_zeros)
                         {
                             instruction.setMysqlFunc(&Instruction<T>::mysqlMonthWithoutLeadingZero);
                             instructions.push_back(std::move(instruction));
@@ -1449,12 +1449,12 @@ public:
                     }
 
                     // Hour in 24h format:
-                    // - if formatdatetime_enable_format_without_leading_zeros=true, prints without leading zero, i.e. 0-23
-                    // - otherwise with, i.e. 00-23
+                    // - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 0-23
+                    // - otherwise: print with leading zeros: i.e. 00-23
                     case 'k':
                     {
                         static constexpr std::string_view val = "00";
-                        if (print_without_leading_zeros)
+                        if (mysql_format_ckl_without_leading_zeros)
                             add_time_instruction(&Instruction<T>::mysqlHour24WithoutLeadingZero, val);
                         else
                             add_time_instruction(&Instruction<T>::mysqlHour24, val);
@@ -1463,12 +1463,12 @@ public:
                     }
 
                     // Hour in 12h format:
-                    // - if formatdatetime_enable_format_without_leading_zeros=true, prints without leading zero, i.e. 0-12
-                    // - otherwise with, i.e. 00-12
+                    // - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 0-12
+                    // - otherwise: print with leading zeros: i.e. 00-12
                     case 'l':
                     {
                         static constexpr std::string_view val = "12";
-                        if (print_without_leading_zeros)
+                        if (mysql_format_ckl_without_leading_zeros)
                             add_time_instruction(&Instruction<T>::mysqlHour12WithoutLeadingZero, val);
                         else
                             add_time_instruction(&Instruction<T>::mysqlHour12, val);
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index 2a757410f32..860603dc503 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -466,14 +466,14 @@ namespace
     {
     public:
         const bool mysql_M_is_month_name;
-        const bool enable_format_without_leading_zeros;
+        const bool mysql_parse_ckl_without_leading_zeros;
 
         static constexpr auto name = Name::name;
         static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionParseDateTimeImpl>(context); }
 
         explicit FunctionParseDateTimeImpl(ContextPtr context)
             : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
-            , enable_format_without_leading_zeros(context->getSettings().parsedatetime_enable_format_without_leading_zeros)
+            , mysql_parse_ckl_without_leading_zeros(context->getSettings().parsedatetime_parse_without_leading_zeros)
         {
         }
 
@@ -1517,16 +1517,15 @@ namespace
                             break;
 
                         // Month as a decimal number:
-                        // - if parsedatetime_enable_format_without_leading_zeros=true, possibly without leading zero, i.e. 1-12
-                        // - else with leading zero required, i.e. 01-12
+                        // - if parsedatetime_parse_without_leading_zeros = true: possibly without leading zero, i.e. 1-12
+                        // - else: with leading zero required, i.e. 01-12
                         case 'c':
-                            if (enable_format_without_leading_zeros)
+                            if (mysql_parse_ckl_without_leading_zeros)
                                 instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthWithoutLeadingZero));
                             else
                                 instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth));
                             break;
 
-
                         // Year, divided by 100, zero-padded
                         case 'C':
                             instructions.emplace_back(ACTION_ARGS(Instruction::mysqlCentury));
@@ -1678,20 +1677,20 @@ namespace
                             break;
 
                         // Hour in 24h format:
-                        // - if parsedatetime_enable_format_without_leading_zeros=true, possibly without leading zero, i.e. 0-23
-                        // - else with leading zero required, i.e. 00-23
+                        // - if parsedatetime_parse_without_leading_zeros = true, possibly without leading zero: i.e. 0-23
+                        // - else with leading zero required: i.e. 00-23
                         case 'k':
-                            if (enable_format_without_leading_zeros)
+                            if (mysql_parse_ckl_without_leading_zeros)
                                 instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24WithoutLeadingZero));
                             else
                                 instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24));
                             break;
 
                         // Hour in 12h format:
-                        // - if parsedatetime_enable_format_without_leading_zeros=true, possibly without leading zero, i.e. 0-12
-                        // - else with leading zero required, i.e. 00-12
+                        // - if parsedatetime_parse_without_leading_zeros = true: possibly without leading zero, i.e. 0-12
+                        // - else with leading zero required: i.e. 00-12
                         case 'l':
-                            if (enable_format_without_leading_zeros)
+                            if (mysql_parse_ckl_without_leading_zeros)
                                 instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12WithoutLeadingZero));
                             else
                                 instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql
index 67a77c481a9..4675d71ffe0 100644
--- a/tests/queries/0_stateless/00718_format_datetime.sql
+++ b/tests/queries/0_stateless/00718_format_datetime.sql
@@ -92,13 +92,13 @@ select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 
 -- formatdatetime_enable_format_without_leading_zeros
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_enable_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_enable_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_enable_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_enable_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_enable_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_enable_format_without_leading_zeros=1;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_enable_format_without_leading_zeros=1;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_enable_format_without_leading_zeros=1;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_enable_format_without_leading_zeros=1;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_enable_format_without_leading_zeros=1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_format_without_leading_zeros=0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_format_without_leading_zeros=1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_format_without_leading_zeros=1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_format_without_leading_zeros=1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_format_without_leading_zeros=1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_format_without_leading_zeros=1;

From deea84cac14c14002abe57efadab8a71788a5e10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 27 Nov 2023 10:11:10 +0100
Subject: [PATCH 1045/1097] Adjust a test so it's a valid query

---
 tests/queries/0_stateless/02923_explain_expired_context.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02923_explain_expired_context.sql b/tests/queries/0_stateless/02923_explain_expired_context.sql
index fa365004cb0..68277508eb2 100644
--- a/tests/queries/0_stateless/02923_explain_expired_context.sql
+++ b/tests/queries/0_stateless/02923_explain_expired_context.sql
@@ -1,3 +1,3 @@
 -- https://github.com/ClickHouse/ClickHouse/issues/51321
-EXPLAIN ESTIMATE  SELECT any(toTypeName(s)) FROM (SELECT 'bbbbbbbb', toTypeName(s), CAST('', 'LowCardinality(String)'), NULL, CAST('\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', 'String') AS s WITH TOTALS) AS t1 FULL OUTER JOIN (SELECT CAST('bbbbb\0\0bbb\0bb\0bb', 'LowCardinality(String)'), CAST(CAST('a', 'String'), 'LowCardinality(String)') AS s GROUP BY CoNnEcTiOn_Id()) AS t2 USING (s) WITH TOTALS;
+EXPLAIN ESTIMATE  SELECT any(toTypeName(s)) FROM (SELECT 'bbbbbbbb', toTypeName(s), CAST('', 'LowCardinality(String)'), NULL, CAST('\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', 'String') AS s) AS t1 FULL OUTER JOIN (SELECT CAST('bbbbb\0\0bbb\0bb\0bb', 'LowCardinality(String)'), CAST(CAST('a', 'String'), 'LowCardinality(String)') AS s GROUP BY CoNnEcTiOn_Id()) AS t2 USING (s) WITH TOTALS;
 EXPLAIN ESTIMATE SELECT any(s) FROM (SELECT '' AS s) AS t1 JOIN (SELECT '' AS s GROUP BY connection_id()) AS t2 USING (s);

From 3941520cb06354116910e7fa94aab9c55b3df819 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 27 Nov 2023 10:08:06 +0000
Subject: [PATCH 1046/1097] Avoid memory exceptions in asio workers

---
 src/Common/LockMemoryExceptionInThread.cpp | 17 +++++++++++++++++
 src/Common/LockMemoryExceptionInThread.h   |  3 +++
 src/Coordination/KeeperServer.cpp          | 10 ++++++++++
 3 files changed, 30 insertions(+)

diff --git a/src/Common/LockMemoryExceptionInThread.cpp b/src/Common/LockMemoryExceptionInThread.cpp
index 606f02abcb0..b5565a9e328 100644
--- a/src/Common/LockMemoryExceptionInThread.cpp
+++ b/src/Common/LockMemoryExceptionInThread.cpp
@@ -1,4 +1,5 @@
 #include <Common/LockMemoryExceptionInThread.h>
+#include <base/defines.h>
 
 /// LockMemoryExceptionInThread
 thread_local uint64_t LockMemoryExceptionInThread::counter = 0;
@@ -18,3 +19,19 @@ LockMemoryExceptionInThread::~LockMemoryExceptionInThread()
     level = previous_level;
     block_fault_injections = previous_block_fault_injections;
 }
+
+void LockMemoryExceptionInThread::addUniqueLock(VariableContext level_, bool block_fault_injections_)
+{
+    chassert(counter == 0);
+    counter = 1;
+    level = level_;
+    block_fault_injections = block_fault_injections_;
+}
+
+void LockMemoryExceptionInThread::removeUniqueLock()
+{
+    chassert(counter == 1);
+    counter = 0;
+    level = VariableContext::Global;
+    block_fault_injections = false;
+}
diff --git a/src/Common/LockMemoryExceptionInThread.h b/src/Common/LockMemoryExceptionInThread.h
index ec8f69806d7..d0f298e8d92 100644
--- a/src/Common/LockMemoryExceptionInThread.h
+++ b/src/Common/LockMemoryExceptionInThread.h
@@ -33,6 +33,9 @@ public:
     LockMemoryExceptionInThread(const LockMemoryExceptionInThread &) = delete;
     LockMemoryExceptionInThread & operator=(const LockMemoryExceptionInThread &) = delete;
 
+    static void addUniqueLock(VariableContext level_ = VariableContext::User, bool block_fault_injections_ = true);
+    static void removeUniqueLock();
+
     static bool isBlocked(VariableContext current_level, bool fault_injection)
     {
         return counter > 0 && current_level >= level && (!fault_injection || block_fault_injections);
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 56c873bbbb9..44b73be0cbc 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -329,6 +329,16 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
     params.return_method_ = nuraft::raft_params::async_handler;
 
     nuraft::asio_service::options asio_opts{};
+    asio_opts.worker_start_ = [](uint32_t /*worker_id*/)
+    {
+        LockMemoryExceptionInThread::addUniqueLock(VariableContext::Global);
+    };
+
+    asio_opts.worker_stop_ = [](uint32_t /*worker_id*/)
+    {
+        LockMemoryExceptionInThread::removeUniqueLock();
+    };
+
     if (state_manager->isSecure())
     {
 #if USE_SSL

From 4c236b34f04847a60dcaf989ca732c5ec2b425cb Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 27 Nov 2023 10:44:26 +0000
Subject: [PATCH 1047/1097] Another small cleanup

---
 .../functions/date-time-functions.md          |  8 +++----
 src/Functions/formatDateTime.cpp              | 19 ++++++++--------
 .../0_stateless/00718_format_datetime.sql     | 22 +++++++++----------
 .../02668_parse_datetime.reference            |  6 ++---
 .../0_stateless/02668_parse_datetime.sql      |  6 ++---
 5 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 36a89871781..565486275e6 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -2540,7 +2540,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 |----------|---------------------------------------------------------|------------|
 | %a       | abbreviated weekday name (Mon-Sun)                      | Mon        |
 | %b       | abbreviated month name (Jan-Dec)                        | Jan        |
-| %c       | month as an integer number (01-12), see 'Note 3' below  | 1          |
+| %c       | month as an integer number (01-12), see 'Note 3' below  | 01         |
 | %C       | year divided by 100 and truncated to integer (00-99)    | 20         |
 | %d       | day of the month, zero-padded (01-31)                   | 02         |
 | %D       | Short MM/DD/YY date, equivalent to %m/%d/%y             | 01/02/18   |
@@ -2554,8 +2554,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %i       | minute (00-59)                                          | 33         |
 | %I       | hour in 12h format (01-12)                              | 10         |
 | %j       | day of the year (001-366)                               | 002        |
-| %k       | hour in 24h format (0-23), see 'Note 3' below           | 2          |
-| %l       | hour in 12h format (1-12), see 'Note 3' below           | 9          |
+| %k       | hour in 24h format (00-23), see 'Note 3' below          | 14         |
+| %l       | hour in 12h format (01-12), see 'Note 3' below          | 09         |
 | %m       | month as an integer number (01-12)                      | 01         |
 | %M       | full month name (January-December), see 'Note 2' below  | January    |
 | %n       | new-line character (‘’)                                 |            |
@@ -2580,7 +2580,7 @@ Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0)
 
 Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
 
-Note 3: In ClickHouse versions earlier than v23.11, `%c`, `%l` and `%k` in function `parseDateTime()` required leading zeros for month and hours, e.g. `07`. In later versions, the leading zero may be omitted, e.g. `7`. The previous behavior can be restored using setting `parsedatetime_parse_without_leading_zeros = 0`. Note that function `formatDateTime()` by default still prints leading zeros for `%c`, `%l` and `%k` to not break existing use cases. This behavior can be changed by setting `formatdatetime_format_without_leading_zeros = 1`.
+Note 3: In ClickHouse versions earlier than v23.11, function `parseDateTime()` required leading zeros for formatters `%c` (month) and `%l`/`%k` (hour), e.g. `07`. In later versions, the leading zero may be omitted, e.g. `7`. The previous behavior can be restored using setting `parsedatetime_parse_without_leading_zeros = 0`. Note that function `formatDateTime()` by default still prints leading zeros for `%c` and `%l`/`%k` to not break existing use cases. This behavior can be changed by setting `formatdatetime_format_without_leading_zeros = 1`.
 
 **Example**
 
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index db30b04d678..01ef2a733c8 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -430,8 +430,9 @@ private:
 
         size_t mysqlHour12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
-            auto x = ToHourImpl::execute(source, timezone);
-            return writeNumber2(dest, x == 0 ? 12 : (x > 12 ? x - 12 : x));
+            auto hour = ToHourImpl::execute(source, timezone);
+            hour = (hour == 0) ? 12 : (hour > 12 ? hour - 12 : hour);
+            return writeNumber2(dest, hour);
         }
 
         size_t mysqlHour12WithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
@@ -739,13 +740,6 @@ private:
                 case '%':
                     if (i + 1 >= format.size())
                         throwLastCharacterIsPercentException();
-                    if (mysql_format_ckl_without_leading_zeros)
-                    {
-                        if (std::any_of(
-                                variable_width_formatter_leading_zeros.begin(), variable_width_formatter_leading_zeros.end(),
-                                [&](char c){ return c == format[i + 1]; }))
-                            return false;
-                    }
                     if (mysql_M_is_month_name)
                     {
                         if (std::any_of(
@@ -753,6 +747,13 @@ private:
                                 [&](char c){ return c == format[i + 1]; }))
                             return false;
                     }
+                    if (mysql_format_ckl_without_leading_zeros)
+                    {
+                        if (std::any_of(
+                                variable_width_formatter_leading_zeros.begin(), variable_width_formatter_leading_zeros.end(),
+                                [&](char c){ return c == format[i + 1]; }))
+                            return false;
+                    }
                     else
                     {
                         if (std::any_of(
diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql
index 4675d71ffe0..4f2ce70965b 100644
--- a/tests/queries/0_stateless/00718_format_datetime.sql
+++ b/tests/queries/0_stateless/00718_format_datetime.sql
@@ -91,14 +91,14 @@ select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 
--- formatdatetime_enable_format_without_leading_zeros
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_format_without_leading_zeros=0;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_format_without_leading_zeros=1;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_format_without_leading_zeros=1;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_format_without_leading_zeros=1;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_format_without_leading_zeros=1;
-select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_format_without_leading_zeros=1;
+-- %c %k %l with different formatdatetime_format_without_leading_zeros
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_format_without_leading_zeros = 0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_format_without_leading_zeros = 0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_format_without_leading_zeros = 0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_format_without_leading_zeros = 0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_format_without_leading_zeros = 0;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_format_without_leading_zeros = 1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_format_without_leading_zeros = 1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_format_without_leading_zeros = 1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_format_without_leading_zeros = 1;
+select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_format_without_leading_zeros = 1;
diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference
index 6331e624ab8..d21a51ce70c 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.reference
+++ b/tests/queries/0_stateless/02668_parse_datetime.reference
@@ -247,7 +247,7 @@ select parseDateTime('', '', toString(number)) from numbers(13); -- { serverErro
 select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC');
 2022-08-13 19:58:32
--- %l
+-- %l accepts single or double digits inputs
 select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
 2022-08-13 19:58:32
 select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
@@ -256,7 +256,7 @@ select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC
 select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %H:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %H:%i:%s', 'UTC');
 2022-08-13 07:58:32
--- %k
+-- %k accepts single or double digits inputs
 select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
 2022-08-13 07:58:32
 select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
@@ -265,7 +265,7 @@ select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
 select parseDateTime('8 13, 2022, 7:58:32', '%m %e, %G, %k:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('08 13, 2022, 07:58:32', '%m %e, %G, %k:%i:%s', 'UTC');
 2022-08-13 07:58:32
--- %c
+-- %c accepts single or double digits inputs
 select parseDateTime('8 13, 2022, 7:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
 2022-08-13 07:58:32
 select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql
index 2cd815720c3..02ac0c5f35c 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.sql
+++ b/tests/queries/0_stateless/02668_parse_datetime.sql
@@ -171,19 +171,19 @@ select parseDateTime('', '', toString(number)) from numbers(13); -- { serverErro
 -- %h
 select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC');
--- %l
+-- %l accepts single or double digits inputs
 select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
 select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
 -- %H
 select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %H:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %H:%i:%s', 'UTC');
--- %k
+-- %k accepts single or double digits inputs
 select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
 select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
 -- %m
 select parseDateTime('8 13, 2022, 7:58:32', '%m %e, %G, %k:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('08 13, 2022, 07:58:32', '%m %e, %G, %k:%i:%s', 'UTC');
--- %c
+-- %c accepts single or double digits inputs
 select parseDateTime('8 13, 2022, 7:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
 select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
 

From 9707796869a9d994a6401bab3f2838d45a09bb08 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 27 Nov 2023 12:25:41 +0000
Subject: [PATCH 1048/1097] Ignore comments when comparing column descriptions

---
 src/Storages/ColumnsDescription.cpp           |  2 -
 .../test_replicated_database/test.py          | 45 +++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 5b21b8f120b..2ae5d177de0 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -60,7 +60,6 @@ bool ColumnDescription::identical(const ColumnDescription & other) const
     return name == other.name
         && type->identical(*other.type)
         && default_desc == other.default_desc
-        && comment == other.comment
         && ast_to_str(codec) == ast_to_str(other.codec)
         && ast_to_str(ttl) == ast_to_str(other.ttl);
 }
@@ -72,7 +71,6 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
     return name == other.name
         && type->equals(*other.type)
         && default_desc == other.default_desc
-        && comment == other.comment
         && ast_to_str(codec) == ast_to_str(other.codec)
         && ast_to_str(ttl) == ast_to_str(other.ttl);
 }
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index f45841124d9..a591c93d264 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -1351,3 +1351,48 @@ def test_replicated_table_structure_alter(started_cluster):
     assert "1\t2\t3\t0\n1\t2\t3\t4\n" == dummy_node.query(
         "SELECT * FROM table_structure.rmt ORDER BY k"
     )
+
+
+def test_modify_comment(started_cluster):
+    main_node.query(
+        "CREATE DATABASE modify_comment_db ENGINE = Replicated('/test/modify_comment', 'shard1', 'replica' || '1');"
+    )
+
+    dummy_node.query(
+        "CREATE DATABASE modify_comment_db ENGINE = Replicated('/test/modify_comment', 'shard1', 'replica' || '2');"
+    )
+
+    main_node.query(
+        "CREATE TABLE modify_comment_db.modify_comment_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);"
+    )
+
+    def restart_verify_not_readonly():
+        main_node.restart_clickhouse()
+        assert (
+            main_node.query(
+                "SELECT is_readonly FROM system.replicas WHERE table = 'modify_comment_table'"
+            )
+            == "0\n"
+        )
+        dummy_node.restart_clickhouse()
+        assert (
+            dummy_node.query(
+                "SELECT is_readonly FROM system.replicas WHERE table = 'modify_comment_table'"
+            )
+            == "0\n"
+        )
+
+    main_node.query(
+        "ALTER TABLE modify_comment_db.modify_comment_table COMMENT COLUMN d 'Some comment'"
+    )
+
+    restart_verify_not_readonly()
+
+    main_node.query(
+        "ALTER TABLE modify_comment_db.modify_comment_table MODIFY COMMENT 'Some error comment'"
+    )
+
+    restart_verify_not_readonly()
+
+    main_node.query("DROP DATABASE modify_comment_db SYNC")
+    dummy_node.query("DROP DATABASE modify_comment_db SYNC")

From b26767d5bb0e5b0094c7c227bd71c41d0c722608 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 27 Nov 2023 13:43:40 +0100
Subject: [PATCH 1049/1097] Add some additional groups to CI

---
 tests/ci/lambda_shared_package/lambda_shared/__init__.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
index aa88342fcc3..9e6c5dde298 100644
--- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py
+++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
@@ -19,6 +19,12 @@ RUNNER_TYPE_LABELS = [
     "stress-tester",
     "style-checker",
     "style-checker-aarch64",
+    # private runners
+    "private-style-checker",
+    "private-builder",
+    "private-func-tester",
+    "private-fuzzer-unit-tester",
+    "private-stress-tester",
 ]
 
 
From 4bfbbfbd75e155622a4ccb073b35198ec16e7ff2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 27 Nov 2023 13:39:01 +0000
Subject: [PATCH 1050/1097] Fix proto file installation

---
 programs/CMakeLists.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 58595c8aad8..b3a5af6d6c9 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -460,8 +460,7 @@ endif ()
 
 if (TARGET ch_contrib::protobuf)
     get_property(google_proto_files TARGET ch_contrib::protobuf PROPERTY google_proto_files)
-    message(WARNING "PROTO FILES ${google_proto_files}")
     foreach (proto_file IN LISTS google_proto_files)
-        install(FILES ${proto_file} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/proto/google/protobuf)
+        install(FILES ${proto_file} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/protos/google/protobuf)
     endforeach()
 endif ()

From adf21e37c43bdbe53de5217ba042ffc9911218b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 27 Nov 2023 13:39:28 +0000
Subject: [PATCH 1051/1097] Make tests use the installed protobuf files

---
 programs/client/clickhouse-client.xml         |   5 +-
 programs/server/config.d/path.xml             |   2 +-
 tests/config/client_config.xml                |   4 +
 .../02266_protobuf_format_google_wrappers.sh  |   2 +-
 ...2266_protobuf_format_google_wrappers.proto |   2 +-
 .../0_stateless/format_schemas/wrappers.proto | 123 ------------------
 6 files changed, 11 insertions(+), 127 deletions(-)
 delete mode 100644 tests/queries/0_stateless/format_schemas/wrappers.proto

diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml
index dbfb267d778..d0deb818c1e 100644
--- a/programs/client/clickhouse-client.xml
+++ b/programs/client/clickhouse-client.xml
@@ -37,7 +37,7 @@
         <production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
     </prompt_by_server_display_name>
 
-    <!-- 
+    <!--
         Settings adjustable via command-line parameters
         can take their defaults from that config file, see examples:
 
@@ -58,6 +58,9 @@
         The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
     -->
 
+    <!-- Directory containing the proto files for the well-known Protobuf types.
+      -->
+    <google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
 
     <!-- Analog of .netrc -->
     <![CDATA[
diff --git a/programs/server/config.d/path.xml b/programs/server/config.d/path.xml
index c5c12af532f..7afada689d7 100644
--- a/programs/server/config.d/path.xml
+++ b/programs/server/config.d/path.xml
@@ -3,7 +3,7 @@
     <tmp_path replace="replace">./tmp/</tmp_path>
     <user_files_path replace="replace">./user_files/</user_files_path>
     <format_schema_path replace="replace">./format_schemas/</format_schema_path>
-    <google_protos_path replace="replace">./google_protos/</google_protos_path>
+    <google_protos_path replace="replace">../../contrib/google-protobuf/src/</google_protos_path>
     <access_control_path replace="replace">./access/</access_control_path>
     <top_level_domains_path replace="replace">./top_level_domains/</top_level_domains_path>
 </clickhouse>
diff --git a/tests/config/client_config.xml b/tests/config/client_config.xml
index a93935ee111..b91952b8773 100644
--- a/tests/config/client_config.xml
+++ b/tests/config/client_config.xml
@@ -14,4 +14,8 @@
 
     <!-- Default timeout is 5 sec. Set it to 10 to avoid tests flakiness with slow builds (debug, tsan) -->
     <sync_request_timeout>10</sync_request_timeout>
+
+    <!-- Directory containing the proto files for the well-known Protobuf types. -->
+    <google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
+
 </config>
diff --git a/tests/queries/0_stateless/02266_protobuf_format_google_wrappers.sh b/tests/queries/0_stateless/02266_protobuf_format_google_wrappers.sh
index 9654d3146e2..ae2a2351c6b 100755
--- a/tests/queries/0_stateless/02266_protobuf_format_google_wrappers.sh
+++ b/tests/queries/0_stateless/02266_protobuf_format_google_wrappers.sh
@@ -90,7 +90,7 @@ hexdump -C $BINARY_FILE_PATH
 
 echo
 echo "Decoded with protoc:"
-(cd $SCHEMADIR && $PROTOC_BINARY --decode Message "$PROTOBUF_FILE_NAME".proto) < $BINARY_FILE_PATH
+(cd $SCHEMADIR && $PROTOC_BINARY --proto_path=. --proto_path=/usr/share/clickhouse/protos --decode Message "$PROTOBUF_FILE_NAME".proto) < $BINARY_FILE_PATH
 
 echo
 echo "Proto message with wrapper for (NULL, 1), ('', 2), ('str', 3):"
diff --git a/tests/queries/0_stateless/format_schemas/02266_protobuf_format_google_wrappers.proto b/tests/queries/0_stateless/format_schemas/02266_protobuf_format_google_wrappers.proto
index e5283907936..7f72d599707 100644
--- a/tests/queries/0_stateless/format_schemas/02266_protobuf_format_google_wrappers.proto
+++ b/tests/queries/0_stateless/format_schemas/02266_protobuf_format_google_wrappers.proto
@@ -1,6 +1,6 @@
 syntax = "proto3";
 
-import "wrappers.proto";
+import "google/protobuf/wrappers.proto";
 
 message Message {
     google.protobuf.StringValue str = 1;
diff --git a/tests/queries/0_stateless/format_schemas/wrappers.proto b/tests/queries/0_stateless/format_schemas/wrappers.proto
deleted file mode 100644
index c571f096879..00000000000
--- a/tests/queries/0_stateless/format_schemas/wrappers.proto
+++ /dev/null
@@ -1,123 +0,0 @@
-// Protocol Buffers - Google's data interchange format
-// Copyright 2008 Google Inc.  All rights reserved.
-// https://developers.google.com/protocol-buffers/
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Wrappers for primitive (non-message) types. These types are useful
-// for embedding primitives in the `google.protobuf.Any` type and for places
-// where we need to distinguish between the absence of a primitive
-// typed field and its default value.
-//
-// These wrappers have no meaningful use within repeated fields as they lack
-// the ability to detect presence on individual elements.
-// These wrappers have no meaningful use within a map or a oneof since
-// individual entries of a map or fields of a oneof can already detect presence.
-
-syntax = "proto3";
-
-package google.protobuf;
-
-option csharp_namespace = "Google.Protobuf.WellKnownTypes";
-option cc_enable_arenas = true;
-option go_package = "google.golang.org/protobuf/types/known/wrapperspb";
-option java_package = "com.google.protobuf";
-option java_outer_classname = "WrappersProto";
-option java_multiple_files = true;
-option objc_class_prefix = "GPB";
-
-// Wrapper message for `double`.
-//
-// The JSON representation for `DoubleValue` is JSON number.
-message DoubleValue {
-  // The double value.
-  double value = 1;
-}
-
-// Wrapper message for `float`.
-//
-// The JSON representation for `FloatValue` is JSON number.
-message FloatValue {
-  // The float value.
-  float value = 1;
-}
-
-// Wrapper message for `int64`.
-//
-// The JSON representation for `Int64Value` is JSON string.
-message Int64Value {
-  // The int64 value.
-  int64 value = 1;
-}
-
-// Wrapper message for `uint64`.
-//
-// The JSON representation for `UInt64Value` is JSON string.
-message UInt64Value {
-  // The uint64 value.
-  uint64 value = 1;
-}
-
-// Wrapper message for `int32`.
-//
-// The JSON representation for `Int32Value` is JSON number.
-message Int32Value {
-  // The int32 value.
-  int32 value = 1;
-}
-
-// Wrapper message for `uint32`.
-//
-// The JSON representation for `UInt32Value` is JSON number.
-message UInt32Value {
-  // The uint32 value.
-  uint32 value = 1;
-}
-
-// Wrapper message for `bool`.
-//
-// The JSON representation for `BoolValue` is JSON `true` and `false`.
-message BoolValue {
-  // The bool value.
-  bool value = 1;
-}
-
-// Wrapper message for `string`.
-//
-// The JSON representation for `StringValue` is JSON string.
-message StringValue {
-  // The string value.
-  string value = 1;
-}
-
-// Wrapper message for `bytes`.
-//
-// The JSON representation for `BytesValue` is JSON string.
-message BytesValue {
-  // The bytes value.
-  bytes value = 1;
-}
\ No newline at end of file

From 3ab1d4ae68468ad5a24312f8e3594e593ca78471 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 27 Nov 2023 14:42:03 +0100
Subject: [PATCH 1052/1097] Add "formatters" into spell excludes

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 711108863cb..fcbb4e9cb27 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1502,6 +1502,7 @@ formatRowNoNewline
 formated
 formatschema
 formatter
+formatters
 freezed
 fromDaysSinceYearZero
 fromModifiedJulianDay

From b38469ddf8b408c02896667f7207cbfcc489896f Mon Sep 17 00:00:00 2001
From: lgbo <lgbo.ustc@gmail.com>
Date: Mon, 27 Nov 2023 22:11:56 +0800
Subject: [PATCH 1053/1097] Optimization for getting value from map,
 arrayElement(2/2) (#55957)

---
 src/Functions/array/arrayElement.cpp | 175 ++++++++++++++++++---------
 1 file changed, 121 insertions(+), 54 deletions(-)

diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp
index e628cd87164..51f2ef659cd 100644
--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@@ -195,7 +195,11 @@ struct ArrayElementNumImpl
 
             if (index < array_size)
             {
-                size_t j = !negative ? (current_offset + index) : (offsets[i] - index - 1);
+                size_t j;
+                if constexpr (negative)
+                    j = offsets[i] - index - 1;
+                else
+                    j = current_offset + index;
                 result[i] = data[j];
                 if (builder)
                     builder.update(j);
@@ -260,7 +264,7 @@ struct ArrayElementNumImpl
 
 struct ArrayElementStringImpl
 {
-    template <bool negative>
+    template <bool negative, bool used_builder>
     static void vectorConst(
         const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets,
         const ColumnArray::Offset index,
@@ -269,21 +273,31 @@ struct ArrayElementStringImpl
     {
         size_t size = offsets.size();
         result_offsets.resize(size);
-        result_data.reserve(data.size());
 
         ColumnArray::Offset current_offset = 0;
-        ColumnArray::Offset current_result_offset = 0;
+        /// get the total result bytes at first, and reduce the cost of result_data.resize.
+        size_t total_result_bytes = 0;
+        ColumnString::Chars zero_buf(1);
+        zero_buf.push_back(0);
+        std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
+        selected_bufs.reserve(size);
         for (size_t i = 0; i < size; ++i)
         {
             size_t array_size = offsets[i] - current_offset;
 
             if (index < array_size)
             {
-                size_t adjusted_index = !negative ? index : (array_size - index - 1);
+                size_t adjusted_index;
+                if constexpr (negative)
+                    adjusted_index = array_size - index - 1;
+                else
+                     adjusted_index = index;
 
-                size_t j = current_offset + adjusted_index;
-                if (builder)
+                if constexpr (used_builder)
+                {
+                    size_t j = current_offset + adjusted_index;
                     builder.update(j);
+                }
 
                 ColumnArray::Offset string_pos = current_offset == 0 && adjusted_index == 0
                     ? 0
@@ -291,30 +305,36 @@ struct ArrayElementStringImpl
 
                 ColumnArray::Offset string_size = string_offsets[current_offset + adjusted_index] - string_pos;
 
-                result_data.resize(current_result_offset + string_size);
-                memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size);
-                current_result_offset += string_size;
-                result_offsets[i] = current_result_offset;
+                total_result_bytes += string_size;
+                selected_bufs.emplace_back(&data[string_pos], string_size);
+                result_offsets[i] = total_result_bytes;
             }
             else
             {
                 /// Insert an empty row.
-                result_data.resize(current_result_offset + 1);
-                result_data[current_result_offset] = 0;
-                current_result_offset += 1;
-                result_offsets[i] = current_result_offset;
+                total_result_bytes += 1;
+                selected_bufs.emplace_back(zero_buf.data(), 1);
+                result_offsets[i] = total_result_bytes;
 
-                if (builder)
+                if constexpr (used_builder)
                     builder.update();
             }
 
             current_offset = offsets[i];
         }
+
+        ColumnArray::Offset current_result_offset = 0;
+        result_data.resize(total_result_bytes);
+        for (const auto & buf : selected_bufs)
+        {
+            memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], buf.first, buf.second);
+            current_result_offset += buf.second;
+        }
     }
 
     /** Implementation for non-constant index.
       */
-    template <typename TIndex>
+    template <typename TIndex, bool used_builder>
     static void vector(
         const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets,
         const PaddedPODArray<TIndex> & indices,
@@ -323,10 +343,14 @@ struct ArrayElementStringImpl
     {
         size_t size = offsets.size();
         result_offsets.resize(size);
-        result_data.reserve(data.size());
 
+        ColumnString::Chars zero_buf(1);
+        zero_buf.push_back(0);
         ColumnArray::Offset current_offset = 0;
-        ColumnArray::Offset current_result_offset = 0;
+        /// get the total result bytes at first, and reduce the cost of result_data.resize.
+        size_t total_result_bytes = 0;
+        std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
+        selected_bufs.reserve(size);
         for (size_t i = 0; i < size; ++i)
         {
             size_t array_size = offsets[i] - current_offset;
@@ -342,35 +366,43 @@ struct ArrayElementStringImpl
 
             if (adjusted_index < array_size)
             {
-                size_t j = current_offset + adjusted_index;
-                if (builder)
+                if constexpr (used_builder)
+                {
+                    size_t j = current_offset + adjusted_index;
                     builder.update(j);
+                }
 
                 ColumnArray::Offset string_pos = current_offset == 0 && adjusted_index == 0
                     ? 0
                     : string_offsets[current_offset + adjusted_index - 1];
 
                 ColumnArray::Offset string_size = string_offsets[current_offset + adjusted_index] - string_pos;
+                total_result_bytes += string_size;
+                selected_bufs.emplace_back(&data[string_pos], string_size);
 
-                result_data.resize(current_result_offset + string_size);
-                memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size);
-                current_result_offset += string_size;
-                result_offsets[i] = current_result_offset;
+                result_offsets[i] = total_result_bytes;
             }
             else
             {
                 /// Insert empty string
-                result_data.resize(current_result_offset + 1);
-                result_data[current_result_offset] = 0;
-                current_result_offset += 1;
-                result_offsets[i] = current_result_offset;
+                total_result_bytes += 1;
+                selected_bufs.emplace_back(zero_buf.data(), 1);
+                result_offsets[i] = total_result_bytes;
 
-                if (builder)
+                if constexpr (used_builder)
                     builder.update();
             }
 
             current_offset = offsets[i];
         }
+
+        ColumnArray::Offset current_result_offset = 0;
+        result_data.resize(total_result_bytes);
+        for (const auto & buf : selected_bufs)
+        {
+            memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], buf.first, buf.second);
+            current_result_offset += buf.second;
+        }
     }
 };
 
@@ -542,23 +574,47 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument
 
     if (index.getType() == Field::Types::UInt64
         || (index.getType() == Field::Types::Int64 && index.get<Int64>() >= 0))
-        ArrayElementStringImpl::vectorConst<false>(
-            col_nested->getChars(),
-            col_array->getOffsets(),
-            col_nested->getOffsets(),
-            index.get<UInt64>() - 1,
-            col_res->getChars(),
-            col_res->getOffsets(),
-            builder);
+    {
+        if (builder)
+            ArrayElementStringImpl::vectorConst<false, true>(
+                col_nested->getChars(),
+                col_array->getOffsets(),
+                col_nested->getOffsets(),
+                index.get<UInt64>() - 1,
+                col_res->getChars(),
+                col_res->getOffsets(),
+                builder);
+        else
+            ArrayElementStringImpl::vectorConst<false, false>(
+                col_nested->getChars(),
+                col_array->getOffsets(),
+                col_nested->getOffsets(),
+                index.get<UInt64>() - 1,
+                col_res->getChars(),
+                col_res->getOffsets(),
+                builder);
+    }
     else if (index.getType() == Field::Types::Int64)
-        ArrayElementStringImpl::vectorConst<true>(
-            col_nested->getChars(),
-            col_array->getOffsets(),
-            col_nested->getOffsets(),
-            -(UInt64(index.get<Int64>()) + 1),
-            col_res->getChars(),
-            col_res->getOffsets(),
-            builder);
+    {
+        if (builder)
+            ArrayElementStringImpl::vectorConst<true, true>(
+                col_nested->getChars(),
+                col_array->getOffsets(),
+                col_nested->getOffsets(),
+                -(UInt64(index.get<Int64>()) + 1),
+                col_res->getChars(),
+                col_res->getOffsets(),
+                builder);
+        else
+            ArrayElementStringImpl::vectorConst<true, false>(
+                col_nested->getChars(),
+                col_array->getOffsets(),
+                col_nested->getOffsets(),
+                -(UInt64(index.get<Int64>()) + 1),
+                col_res->getChars(),
+                col_res->getOffsets(),
+                builder);
+    }
     else
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index");
 
@@ -580,14 +636,25 @@ ColumnPtr FunctionArrayElement::executeString(
         return nullptr;
 
     auto col_res = ColumnString::create();
-    ArrayElementStringImpl::vector<IndexType>(
-        col_nested->getChars(),
-        col_array->getOffsets(),
-        col_nested->getOffsets(),
-        indices,
-        col_res->getChars(),
-        col_res->getOffsets(),
-        builder);
+
+    if (builder)
+        ArrayElementStringImpl::vector<IndexType, true>(
+            col_nested->getChars(),
+            col_array->getOffsets(),
+            col_nested->getOffsets(),
+            indices,
+            col_res->getChars(),
+            col_res->getOffsets(),
+            builder);
+    else
+        ArrayElementStringImpl::vector<IndexType, false>(
+            col_nested->getChars(),
+            col_array->getOffsets(),
+            col_nested->getOffsets(),
+            indices,
+            col_res->getChars(),
+            col_res->getOffsets(),
+            builder);
 
     return col_res;
 }

From 3d256105be0201d378e3948c8e8858d014196d0b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 27 Nov 2023 12:09:36 +0000
Subject: [PATCH 1054/1097] Add analyzer test

---
 ..._support_alias_column_in_indices.reference | 47 +++++++++++++++----
 .../02911_support_alias_column_in_indices.sql | 37 ++++++++-------
 2 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference
index a28d1523c7e..883966ce6b5 100644
--- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference
+++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference
@@ -1,24 +1,55 @@
 Expression ((Projection + Before ORDER BY))
   Filter (WHERE)
-    ReadFromMergeTree (02911_support_alias_column_in_indices.test)
+    ReadFromMergeTree (02911_support_alias_column_in_indices.test1)
     Indexes:
       PrimaryKey
         Keys: 
-          x
-        Condition: (plus(x, 1) in [11, +Inf))
+          c
+        Condition: (plus(c, 1) in [11, +Inf))
         Parts: 1/2
         Granules: 1/2
       Skip
-        Name: i_y
+        Name: i
+        Description: minmax GRANULARITY 1
+        Parts: 1/1
+        Granules: 1/1
+Expression ((Project names + Projection))
+  Filter ((WHERE + Change column names to column identifiers))
+    ReadFromMergeTree (02911_support_alias_column_in_indices.test1)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          c
+        Condition: (_CAST(plus(c, \'UInt64\'), 1) in [11, +Inf))
+        Parts: 1/2
+        Granules: 1/2
+      Skip
+        Name: i
         Description: minmax GRANULARITY 1
         Parts: 1/1
         Granules: 1/1
 Expression ((Projection + Before ORDER BY))
   Filter (WHERE)
-    ReadFromMergeTree (02911_support_alias_column_in_indices.test1)
+    ReadFromMergeTree (02911_support_alias_column_in_indices.test2)
     Indexes:
-      Skip
-        Name: i_y
-        Description: minmax GRANULARITY 1
+      PrimaryKey
+        Keys: 
+          c
+        Condition: (plus(plus(c, 1), 1) in [16, +Inf))
+        Parts: 1/2
+        Granules: 1/2
+      Skip
+        Name: i
+        Description: minmax GRANULARITY 1
+        Parts: 1/1
+        Granules: 1/1
+Expression ((Project names + Projection))
+  Filter ((WHERE + Change column names to column identifiers))
+    ReadFromMergeTree (02911_support_alias_column_in_indices.test2)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          c
+        Condition: (_CAST(plus(_CAST(plus(c, \'UInt64\'), 1), \'UInt64\'), 1) in [16, +Inf))
         Parts: 1/2
         Granules: 1/2
diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
index bc5842df6d4..93d9a1670db 100644
--- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
+++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql
@@ -4,28 +4,31 @@ drop database if exists 02911_support_alias_column_in_indices;
 create database 02911_support_alias_column_in_indices;
 use 02911_support_alias_column_in_indices;
 
-create table test
-(
-    x UInt32,
-    y alias x + 1,
-    index i_y (y) type minmax
-) engine = MergeTree order by x;
-
-insert into test select * from numbers(10);
-insert into test select * from numbers(11, 20);
-
 create table test1
 (
-    x UInt32,
-    y1 alias x + 1,
-    y2 alias y1 + 1,
-    index i_y (y2) type minmax
-) engine = MergeTree order by tuple();
+    c UInt32,
+    a alias c + 1,
+    index i (a) type minmax
+) engine = MergeTree order by c;
 
 insert into test1 select * from numbers(10);
 insert into test1 select * from numbers(11, 20);
 
-explain indexes = 1 select * from test where y > 10;
-explain indexes = 1 select * from test1 where y2 > 15;
+explain indexes = 1 select * from test1 where a > 10 settings allow_experimental_analyzer = 0;
+explain indexes = 1 select * from test1 where a > 10 settings allow_experimental_analyzer = 1;
+
+create table test2
+(
+    c UInt32,
+    a1 alias c + 1,
+    a2 alias a1 + 1,
+    index i (a2) type minmax
+) engine = MergeTree order by c;
+
+insert into test2 select * from numbers(10);
+insert into test2 select * from numbers(11, 20);
+
+explain indexes = 1 select * from test2 where a2 > 15 settings allow_experimental_analyzer = 0;
+explain indexes = 1 select * from test2 where a2 > 15 settings allow_experimental_analyzer = 1; -- buggy, analyzer does not pick up index i
 
 drop database 02911_support_alias_column_in_indices;

From 9077550bb1b0b732842624d35f37b9d433a90582 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 27 Nov 2023 14:25:45 +0000
Subject: [PATCH 1055/1097] Better Readonly metric

---
 .../ReplicatedMergeTreeAttachThread.cpp       | 17 ++++++++++++
 .../ReplicatedMergeTreeRestartingThread.cpp   | 27 ++++++++++---------
 src/Storages/StorageReplicatedMergeTree.cpp   | 11 ++------
 src/Storages/StorageReplicatedMergeTree.h     |  5 +---
 4 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp
index 6b575b7a51c..9dc0a5d04f2 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp
@@ -2,6 +2,11 @@
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Common/ZooKeeper/IKeeper.h>
 
+namespace CurrentMetrics
+{
+    extern const Metric ReadonlyReplica;
+}
+
 namespace DB
 {
 
@@ -67,6 +72,12 @@ void ReplicatedMergeTreeAttachThread::run()
             LOG_ERROR(log, "Initialization failed, table will remain readonly. Error: {}", getCurrentExceptionMessage(/* with_stacktrace */ true));
             storage.initialization_done = true;
         }
+
+        if (!storage.is_readonly_metric_set)
+        {
+            storage.is_readonly_metric_set = true;
+            CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
+        }
     }
 
     if (!first_try_done.exchange(true))
@@ -74,6 +85,12 @@ void ReplicatedMergeTreeAttachThread::run()
 
     if (shutdown_called)
     {
+        if (storage.is_readonly_metric_set)
+        {
+            storage.is_readonly_metric_set = false;
+            CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
+        }
+
         LOG_WARNING(log, "Shutdown called, cancelling initialization");
         return;
     }
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
index 79054ef46da..da30c914fac 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
@@ -79,8 +79,9 @@ void ReplicatedMergeTreeRestartingThread::run()
 
     if (first_time)
     {
-        if (storage.is_readonly)
+        if (storage.is_readonly && !storage.is_readonly_metric_set)
         {
+            storage.is_readonly_metric_set = true;
             /// We failed to start replication, table is still readonly, so we should increment the metric. See also setNotReadonly().
             CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
         }
@@ -360,21 +361,20 @@ void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown)
         return;
 
     if (became_readonly)
-        CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
-
-    /// Replica was already readonly, but we should decrement the metric, because we are detaching/dropping table.
-    /// if first pass wasn't done we don't have to decrement because it wasn't incremented in the first place
-    /// the task should be deactivated if it's full shutdown so no race is present
-    if (!first_time && on_shutdown)
     {
-        CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
-        assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
+        storage.is_readonly_metric_set = true;
+        CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
+        return;
     }
 
-    if (storage.since_metadata_err_incr_readonly_metric)
+    /// Replica was already readonly, but we should decrement the metric if it was set because we are detaching/dropping table.
+    /// the task should be deactivated if it's full shutdown so no race is present
+    chassert(on_shutdown);
+    if (storage.is_readonly_metric_set)
     {
+        storage.is_readonly_metric_set = false;
         CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
-        assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
+        chassert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
     }
 }
 
@@ -384,10 +384,11 @@ void ReplicatedMergeTreeRestartingThread::setNotReadonly()
     /// is_readonly is true on startup, but ReadonlyReplica metric is not incremented,
     /// because we don't want to change this metric if replication is started successfully.
     /// So we should not decrement it when replica stopped being readonly on startup.
-    if (storage.is_readonly.compare_exchange_strong(old_val, false) && !first_time)
+    if (storage.is_readonly.compare_exchange_strong(old_val, false) && storage.is_readonly_metric_set)
     {
+        storage.is_readonly_metric_set = false;
         CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
-        assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
+        chassert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
     }
 }
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 2c26f28b84a..3132cd00215 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4991,9 +4991,9 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
     /// Do not start replication if ZooKeeper is not configured or there is no metadata in zookeeper
     if (!has_metadata_in_zookeeper.has_value() || !*has_metadata_in_zookeeper)
     {
-        if (!since_metadata_err_incr_readonly_metric)
+        if (!is_readonly_metric_set)
         {
-            since_metadata_err_incr_readonly_metric = true;
+            is_readonly_metric_set = true;
             CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
         }
 
@@ -5001,13 +5001,6 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
         return;
     }
 
-    if (since_metadata_err_incr_readonly_metric)
-    {
-        since_metadata_err_incr_readonly_metric = false;
-        CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
-        assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
-    }
-
     try
     {
         auto zookeeper = getZooKeeper();
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 429f381dfe0..a8ab8eb7013 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -428,10 +428,7 @@ private:
     /// If false - ZooKeeper is available, but there is no table metadata. It's safe to drop table in this case.
     std::optional<bool> has_metadata_in_zookeeper;
 
-    /// during server restart or attach table process, set since_metadata_err_incr_readonly_metric = true and increase readonly metric if has_metadata_in_zookeeper = false.
-    /// during detach or drop table process, decrease readonly metric if since_metadata_err_incr_readonly_metric = true.
-    /// during restore replica process, set since_metadata_err_incr_readonly_metric = false and decrease readonly metric if since_metadata_err_incr_readonly_metric = true.
-    bool since_metadata_err_incr_readonly_metric = false;
+    bool is_readonly_metric_set = false;
 
     static const String default_zookeeper_name;
     const String zookeeper_name;

From 2ea44adefdced4f9e5bd5ee897748f89d780db2f Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 27 Nov 2023 15:05:56 +0000
Subject: [PATCH 1056/1097] Add test

---
 tests/integration/test_system_metrics/test.py | 49 ++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_system_metrics/test.py b/tests/integration/test_system_metrics/test.py
index 338622b824e..e59ed919708 100644
--- a/tests/integration/test_system_metrics/test.py
+++ b/tests/integration/test_system_metrics/test.py
@@ -5,6 +5,8 @@ from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import assert_eq_with_retry
 from helpers.network import PartitionManager
 
+from kazoo.client import KazooClient
+
 
 def fill_nodes(nodes, shard):
     for node in nodes:
@@ -24,7 +26,10 @@ def fill_nodes(nodes, shard):
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance(
-    "node1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+    "node1",
+    main_configs=["configs/remote_servers.xml"],
+    with_zookeeper=True,
+    stay_alive=True,
 )
 node2 = cluster.add_instance(
     "node2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
@@ -214,3 +219,45 @@ def test_attach_without_zk_incr_readonly_metric(start_cluster):
         retry_count=300,
         sleep_time=1,
     )
+
+
+def get_zk(timeout=30.0):
+    _zk_instance = KazooClient(
+        hosts=cluster.get_instance_ip("zoo1") + ":2181", timeout=timeout
+    )
+    _zk_instance.start()
+    return _zk_instance
+
+
+def test_broken_tables_readonly_metric(start_cluster):
+    node1.query(
+        "CREATE TABLE test.broken_table_readonly(initial_name Int8) ENGINE = ReplicatedMergeTree('/clickhouse/broken_table_readonly', 'replica') ORDER BY tuple()"
+    )
+    assert_eq_with_retry(
+        node1,
+        "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'",
+        "0\n",
+        retry_count=300,
+        sleep_time=1,
+    )
+
+    zk_path = node1.query(
+        "SELECT replica_path FROM system.replicas WHERE table = 'broken_table_readonly'"
+    ).strip()
+
+    node1.stop_clickhouse()
+
+    zk_client = get_zk()
+
+    columns_path = zk_path + "/columns"
+    metadata = zk_client.get(columns_path)[0]
+    modified_metadata = metadata.replace(b"initial_name", b"new_name")
+    zk_client.set(columns_path, modified_metadata)
+
+    node1.start_clickhouse()
+
+    assert node1.contains_in_log("Initialization failed, table will remain readonly")
+    assert (
+        node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'")
+        == "1\n"
+    )

From 40c774f3876dc39988537319a7b38d918a6d2b1a Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 5 Apr 2023 13:04:53 +0000
Subject: [PATCH 1057/1097] Enable temporary_data_in_cache in s3 tests in CI

---
 tests/config/config.d/s3_storage_policy_by_default.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/config/config.d/s3_storage_policy_by_default.xml b/tests/config/config.d/s3_storage_policy_by_default.xml
index dd93a317a77..4e3d9636daf 100644
--- a/tests/config/config.d/s3_storage_policy_by_default.xml
+++ b/tests/config/config.d/s3_storage_policy_by_default.xml
@@ -25,4 +25,5 @@
     <merge_tree>
         <storage_policy>s3</storage_policy>
     </merge_tree>
+    <temporary_data_in_cache>cached_s3</temporary_data_in_cache>
 </clickhouse>

From 6a70bfcdb76240e16d61b2f95b1bdf9a7c0ea38c Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 21 Apr 2023 10:51:42 +0000
Subject: [PATCH 1058/1097] Print profile events in
 02402_external_disk_mertrics

---
 .../02402_external_disk_mertrics.reference    |  6 +-
 .../02402_external_disk_mertrics.sql          | 60 +++++++++++--------
 2 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/tests/queries/0_stateless/02402_external_disk_mertrics.reference b/tests/queries/0_stateless/02402_external_disk_mertrics.reference
index e8183f05f5d..7614df8ec46 100644
--- a/tests/queries/0_stateless/02402_external_disk_mertrics.reference
+++ b/tests/queries/0_stateless/02402_external_disk_mertrics.reference
@@ -1,3 +1,3 @@
-1
-1
-1
+ok
+ok
+ok
diff --git a/tests/queries/0_stateless/02402_external_disk_mertrics.sql b/tests/queries/0_stateless/02402_external_disk_mertrics.sql
index e9696eb7122..7237ea19775 100644
--- a/tests/queries/0_stateless/02402_external_disk_mertrics.sql
+++ b/tests/queries/0_stateless/02402_external_disk_mertrics.sql
@@ -31,40 +31,52 @@ FORMAT Null;
 SYSTEM FLUSH LOGS;
 
 SELECT
-    any(ProfileEvents['ExternalProcessingFilesTotal']) >= 1 AND
-    any(ProfileEvents['ExternalProcessingCompressedBytesTotal']) >= 100000 AND
-    any(ProfileEvents['ExternalProcessingUncompressedBytesTotal']) >= 100000 AND
-    any(ProfileEvents['ExternalSortWritePart']) >= 1 AND
-    any(ProfileEvents['ExternalSortMerge']) >= 1 AND
-    any(ProfileEvents['ExternalSortCompressedBytes']) >= 100000 AND
-    any(ProfileEvents['ExternalSortUncompressedBytes']) >= 100000 AND
-    count() == 1
+    if(
+        any(ProfileEvents['ExternalProcessingFilesTotal']) >= 1 AND
+        any(ProfileEvents['ExternalProcessingCompressedBytesTotal']) >= 100000 AND
+        any(ProfileEvents['ExternalProcessingUncompressedBytesTotal']) >= 100000 AND
+        any(ProfileEvents['ExternalSortWritePart']) >= 1 AND
+        any(ProfileEvents['ExternalSortMerge']) >= 1 AND
+        any(ProfileEvents['ExternalSortCompressedBytes']) >= 100000 AND
+        any(ProfileEvents['ExternalSortUncompressedBytes']) >= 100000 AND
+        count() == 1,
+        'ok',
+        'fail: ' || toString(count()) || ' ' || toString(any(ProfileEvents))
+    )
     FROM system.query_log WHERE current_database = currentDatabase()
         AND log_comment = '02402_external_disk_mertrics/sort'
         AND query ILIKE 'SELECT%2097152%' AND type = 'QueryFinish';
 
 SELECT
-    any(ProfileEvents['ExternalProcessingFilesTotal']) >= 1 AND
-    any(ProfileEvents['ExternalProcessingCompressedBytesTotal']) >= 100000 AND
-    any(ProfileEvents['ExternalProcessingUncompressedBytesTotal']) >= 100000 AND
-    any(ProfileEvents['ExternalAggregationWritePart']) >= 1 AND
-    any(ProfileEvents['ExternalAggregationMerge']) >= 1 AND
-    any(ProfileEvents['ExternalAggregationCompressedBytes']) >= 100000 AND
-    any(ProfileEvents['ExternalAggregationUncompressedBytes']) >= 100000 AND
-    count() == 1
+    if(
+        any(ProfileEvents['ExternalProcessingFilesTotal']) >= 1 AND
+        any(ProfileEvents['ExternalProcessingCompressedBytesTotal']) >= 100000 AND
+        any(ProfileEvents['ExternalProcessingUncompressedBytesTotal']) >= 100000 AND
+        any(ProfileEvents['ExternalAggregationWritePart']) >= 1 AND
+        any(ProfileEvents['ExternalAggregationMerge']) >= 1 AND
+        any(ProfileEvents['ExternalAggregationCompressedBytes']) >= 100000 AND
+        any(ProfileEvents['ExternalAggregationUncompressedBytes']) >= 100000 AND
+        count() == 1,
+        'ok',
+        'fail: ' || toString(count()) || ' ' || toString(any(ProfileEvents))
+    )
     FROM system.query_log WHERE current_database = currentDatabase()
         AND log_comment = '02402_external_disk_mertrics/aggregation'
         AND query ILIKE 'SELECT%2097152%' AND type = 'QueryFinish';
 
 SELECT
-    any(ProfileEvents['ExternalProcessingFilesTotal']) >= 1 AND
-    any(ProfileEvents['ExternalProcessingCompressedBytesTotal']) >= 100000 AND
-    any(ProfileEvents['ExternalProcessingUncompressedBytesTotal']) >= 100000 AND
-    any(ProfileEvents['ExternalJoinWritePart']) >= 1 AND
-    any(ProfileEvents['ExternalJoinMerge']) >= 0 AND
-    any(ProfileEvents['ExternalJoinCompressedBytes']) >= 100000 AND
-    any(ProfileEvents['ExternalJoinUncompressedBytes']) >= 100000 AND
-    count() == 1
+    if(
+        any(ProfileEvents['ExternalProcessingFilesTotal']) >= 1 AND
+        any(ProfileEvents['ExternalProcessingCompressedBytesTotal']) >= 100000 AND
+        any(ProfileEvents['ExternalProcessingUncompressedBytesTotal']) >= 100000 AND
+        any(ProfileEvents['ExternalJoinWritePart']) >= 1 AND
+        any(ProfileEvents['ExternalJoinMerge']) >= 0 AND
+        any(ProfileEvents['ExternalJoinCompressedBytes']) >= 100000 AND
+        any(ProfileEvents['ExternalJoinUncompressedBytes']) >= 100000 AND
+        count() == 1,
+        'ok',
+        'fail: ' || toString(count()) || ' ' || toString(any(ProfileEvents))
+    )
     FROM system.query_log WHERE current_database = currentDatabase()
         AND log_comment = '02402_external_disk_mertrics/join'
         AND query ILIKE 'SELECT%2097152%' AND type = 'QueryFinish';

From 80cc1b3a1790c78618272d77eb8c1188ace29613 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 27 Nov 2023 12:01:12 +0000
Subject: [PATCH 1059/1097] Remove addBatchSinglePlaceFromInterval

---
 src/AggregateFunctions/IAggregateFunction.h   | 34 -------------------
 src/Interpreters/Aggregator.cpp               | 11 +++---
 src/Processors/Transforms/WindowTransform.cpp |  2 +-
 3 files changed, 5 insertions(+), 42 deletions(-)

diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index b89e179ee90..2a62b76adbb 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -289,15 +289,6 @@ public:
         Arena * arena,
         ssize_t if_argument_pos = -1) const = 0;
 
-    virtual void addBatchSinglePlaceFromInterval( /// NOLINT
-        size_t row_begin,
-        size_t row_end,
-        AggregateDataPtr __restrict place,
-        const IColumn ** columns,
-        Arena * arena,
-        ssize_t if_argument_pos = -1)
-        const = 0;
-
     /** In addition to addBatch, this method collects multiple rows of arguments into array "places"
       *  as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
       *  -Array combinator. It might also be used generally to break data dependency when array
@@ -586,31 +577,6 @@ public:
         }
     }
 
-    void addBatchSinglePlaceFromInterval( /// NOLINT
-        size_t row_begin,
-        size_t row_end,
-        AggregateDataPtr __restrict place,
-        const IColumn ** columns,
-        Arena * arena,
-        ssize_t if_argument_pos = -1)
-        const override
-    {
-        if (if_argument_pos >= 0)
-        {
-            const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
-            for (size_t i = row_begin; i < row_end; ++i)
-            {
-                if (flags[i])
-                    static_cast<const Derived *>(this)->add(place, columns, i, arena);
-            }
-        }
-        else
-        {
-            for (size_t i = row_begin; i < row_end; ++i)
-                static_cast<const Derived *>(this)->add(place, columns, i, arena);
-        }
-    }
-
     void addBatchArray(
         size_t row_begin,
         size_t row_end,
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index c3cfd0647be..b43edfb8d3e 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -1446,18 +1446,15 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKey(
     for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
     {
         if (inst->offsets)
-            inst->batch_that->addBatchSinglePlaceFromInterval(
+            inst->batch_that->addBatchSinglePlace(
                 inst->offsets[static_cast<ssize_t>(row_begin) - 1],
                 inst->offsets[row_end - 1],
                 res + inst->state_offset,
-                inst->batch_arguments, data_variants.aggregates_pool);
-        else
-            inst->batch_that->addBatchSinglePlaceFromInterval(
-                row_begin,
-                row_end,
-                res + inst->state_offset,
                 inst->batch_arguments,
                 data_variants.aggregates_pool);
+        else
+            inst->batch_that->addBatchSinglePlace(
+                row_begin, row_end, res + inst->state_offset, inst->batch_arguments, data_variants.aggregates_pool);
     }
 }
 
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index df6246510bd..7afc7a38aab 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -958,7 +958,7 @@ void WindowTransform::updateAggregationState()
             auto * columns = ws.argument_columns.data();
             // Removing arena.get() from the loop makes it faster somehow...
             auto * arena_ptr = arena.get();
-            a->addBatchSinglePlaceFromInterval(first_row, past_the_end_row, buf, columns, arena_ptr);
+            a->addBatchSinglePlace(first_row, past_the_end_row, buf, columns, arena_ptr);
         }
     }
 }

From 0754ee49016bf64b35f4c4492ddea95a38ba7363 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 27 Nov 2023 22:52:03 +0100
Subject: [PATCH 1060/1097] Fix embarrasing bug number 1

---
 src/AggregateFunctions/AggregateFunctionSum.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index e5e6af5b9f5..b3006f2ce82 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -142,6 +142,7 @@ struct AggregateFunctionSumData
     ), addManyConditionalInternalImpl, MULTITARGET_FUNCTION_BODY((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT
     {
         ptr += start;
+        condition_map += start;
         size_t count = end - start;
         const auto * end_ptr = ptr + count;
 

From a3ceafbbed56155603ce6cce95325dcc8e5ae133 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 27 Nov 2023 23:09:35 +0100
Subject: [PATCH 1061/1097] Fix embarrasing bug number 2

---
 src/AggregateFunctions/AggregateFunctionCount.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h
index 77d3bfeb448..9cab107e652 100644
--- a/src/AggregateFunctions/AggregateFunctionCount.h
+++ b/src/AggregateFunctions/AggregateFunctionCount.h
@@ -77,7 +77,7 @@ public:
         if (if_argument_pos >= 0)
         {
             const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
-            data(place).count += countBytesInFilter(flags);
+            data(place).count += countBytesInFilter(flags.data(), row_begin, row_end);
         }
         else
         {

From 6d7f160bcc84971d408e597d34113eea5e9882d6 Mon Sep 17 00:00:00 2001
From: Philip Hallstrom <philip@pjkh.com>
Date: Mon, 27 Nov 2023 15:05:59 -0800
Subject: [PATCH 1062/1097] Update URL to related content about LowCardinality

---
 docs/en/sql-reference/data-types/lowcardinality.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md
index 7810f4c5324..db10103282d 100644
--- a/docs/en/sql-reference/data-types/lowcardinality.md
+++ b/docs/en/sql-reference/data-types/lowcardinality.md
@@ -56,7 +56,7 @@ Functions:
 
 ## Related content
 
-- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/)
+- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)
 - [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf)
 - Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema)
 - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)

From a85cf758b0118525ae7f7136d29b5b00fe3a987c Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-3-174.us-west-1.compute.internal>
Date: Fri, 3 Nov 2023 03:59:45 +0000
Subject: [PATCH 1063/1097] Fuzz JSON table function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create a table function `fuzzJSON`

An example query:
```
CREATE NAMED COLLECTION json_fuzzer AS json_str={};

SELECT *
FROM fuzzJSON(json_fuzzer, json_str = '{"students" : ["Alice", "Bob"]}', random_seed = 666, max_output_length = 128, probability = 0.9)
LIMIT 3

Query id: 7f802052-efb0-41b4-87fa-03b7dd290e9d

┌─json──────────────────────────────────────────────────────────────────────────────────┐
│ {"ade0yX":[9200406572736542991, true, "sm"]}                                          │
│ {"students":["Alice", "eSN3WO#a6NYTBe0$#OWwyIQ"], "cVoP2BuQugQ":17384271928263249403} │
│ {"students":["Alice", "Bob", "T+-k4+PJGkL%XRRaF2BoeN@&A"]}                            │
└───────────────────────────────────────────────────────────────────────────────────────┘
```

Next step:
* Generate invalid string

Fixes #35962

add Object('json')

use named collection
---
 .../sql-reference/table-functions/fuzzJSON.md |  86 +++
 src/Storages/StorageFuzzJSON.cpp              | 695 ++++++++++++++++++
 src/Storages/StorageFuzzJSON.h                |  68 ++
 src/Storages/registerStorages.cpp             |   4 +-
 src/TableFunctions/CMakeLists.txt             |   8 +
 src/TableFunctions/TableFunctionFuzzJSON.cpp  |  59 ++
 src/TableFunctions/TableFunctionFuzzJSON.h    |  39 +
 src/TableFunctions/registerTableFunctions.cpp |   1 +
 src/TableFunctions/registerTableFunctions.h   |   1 +
 .../02918_fuzzjson_table_function.reference   | 152 ++++
 .../02918_fuzzjson_table_function.sql         | 106 +++
 .../02919_storage_fuzzjson.reference          |   3 +
 .../0_stateless/02919_storage_fuzzjson.sql    |  44 ++
 .../aspell-ignore/en/aspell-dict.txt          |   3 +-
 14 files changed, 1267 insertions(+), 2 deletions(-)
 create mode 100644 docs/en/sql-reference/table-functions/fuzzJSON.md
 create mode 100644 src/Storages/StorageFuzzJSON.cpp
 create mode 100644 src/Storages/StorageFuzzJSON.h
 create mode 100644 src/TableFunctions/TableFunctionFuzzJSON.cpp
 create mode 100644 src/TableFunctions/TableFunctionFuzzJSON.h
 create mode 100644 tests/queries/0_stateless/02918_fuzzjson_table_function.reference
 create mode 100644 tests/queries/0_stateless/02918_fuzzjson_table_function.sql
 create mode 100644 tests/queries/0_stateless/02919_storage_fuzzjson.reference
 create mode 100644 tests/queries/0_stateless/02919_storage_fuzzjson.sql

diff --git a/docs/en/sql-reference/table-functions/fuzzJSON.md b/docs/en/sql-reference/table-functions/fuzzJSON.md
new file mode 100644
index 00000000000..74ccb0bcb8a
--- /dev/null
+++ b/docs/en/sql-reference/table-functions/fuzzJSON.md
@@ -0,0 +1,86 @@
+---
+slug: /en/sql-reference/table-functions/fuzzJSON
+sidebar_position: 75
+sidebar_label: fuzzJSON
+---
+
+# fuzzJSON
+
+Perturbs a JSON string with random variations.
+
+``` sql
+fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
+```
+
+**Arguments**
+
+- `named_collection`- A [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md).
+- `option=value` - Named collection optional parameters and their values.
+ - `json_str` (String) - The source string representing structured data in JSON format.
+ - `random_seed` (UInt64) - Manual random seed for producing stable results.
+ - `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
+ - `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
+ - `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
+ - `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
+ - `max_array_size` (UInt64) - The maximum allowed size of a JSON array.
+ - `max_object_size` (UInt64) - The maximum allowed number of fields on a single level of a JSON object.
+ - `max_string_value_length` (UInt64) - The maximum length of a String value.
+ - `min_key_length` (UInt64) - The minimum key length. Should be at least 1.
+ - `max_key_length` (UInt64) - The maximum key length. Should be greater or equal than the `min_key_length`, if specified.
+
+**Returned Value**
+
+A table object with a a single column containing perturbed JSON strings.
+
+## Usage Example
+
+``` sql
+CREATE NAMED COLLECTION json_fuzzer AS json_str='{}';
+SELECT * FROM fuzzJSON(json_fuzzer) LIMIT 3;
+```
+
+``` text
+{"52Xz2Zd4vKNcuP2":true}
+{"UPbOhOQAdPKIg91":3405264103600403024}
+{"X0QUWu8yT":[]}
+```
+
+``` sql
+SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"name" : "value"}', random_seed=1234) LIMIT 3;
+```
+
+``` text
+{"key":"value", "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
+{"BRE3":true}
+{"key":"value", "SWzJdEJZ04nrpSfy":[{"3Q23y":[]}]}
+```
+
+``` sql
+SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', reuse_output=true) LIMIT 3;
+```
+
+``` text
+{"students":["Alice", "Bob"], "nwALnRMc4pyKD9Krv":[]}
+{"students":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}]}
+{"xeEk":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}, {}]}
+```
+
+``` sql
+SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', max_output_length=512) LIMIT 3;
+```
+
+``` text
+{"students":["Alice", "Bob"], "BREhhXj5":true}
+{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true}
+{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true, "k1SXzbSIz":[{}]}
+```
+
+``` sql
+SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
+```
+
+``` text
+{"id":1, "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
+{"BRjE":16137826149911306846}
+{"XjKE":15076727133550123563}
+```
diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp
new file mode 100644
index 00000000000..b4025f58a46
--- /dev/null
+++ b/src/Storages/StorageFuzzJSON.cpp
@@ -0,0 +1,695 @@
+#include <optional>
+#include <random>
+#include <string_view>
+#include <unordered_set>
+#include <Columns/ColumnString.h>
+#include <Interpreters/evaluateConstantExpression.h>
+#include <Storages/NamedCollectionsHelpers.h>
+#include <Storages/StorageFactory.h>
+#include <Storages/StorageFuzzJSON.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <Common/JSONParsers/RapidJSONParser.h>
+#include <Common/JSONParsers/SimdJSONParser.h>
+#include <Common/checkStackSize.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int BAD_ARGUMENTS;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int LOGICAL_ERROR;
+extern const int INCORRECT_DATA;
+}
+
+namespace
+{
+
+using uniform = std::uniform_int_distribution<size_t>;
+
+struct JSONNode;
+using JSONNodeList = std::list<std::shared_ptr<JSONNode>>;
+
+struct JSONValue
+{
+    enum class Type : size_t
+    {
+        Fixed = 0,
+        Array = 1,
+        Object = 2,
+    };
+
+    static Type getType(const JSONValue & v);
+
+    // The node value must be one of the following:
+    // Examples: 5, true, "abc"
+    std::optional<Field> fixed;
+    // Examples: [], ["a"], [1, true]
+    std::optional<JSONNodeList> array;
+    // Examples: {}, {"a": [1,2], "b": "c"}
+    std::optional<JSONNodeList> object;
+};
+
+JSONValue::Type JSONValue::getType(const JSONValue & v)
+{
+    if (v.fixed)
+    {
+        assert(!v.array);
+        assert(!v.object);
+        return JSONValue::Type::Fixed;
+    }
+    else if (v.array)
+    {
+        assert(!v.fixed);
+        assert(!v.object);
+        return JSONValue::Type::Array;
+    }
+    else if (v.object)
+    {
+        assert(!v.fixed);
+        assert(!v.array);
+        return JSONValue::Type::Object;
+    }
+    else
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to determine JSON node type.");
+}
+
+// A node represents either a JSON field (a key-value pair) or a JSON value.
+// The key is not set for the JSON root and for the array items.
+struct JSONNode
+{
+    std::optional<String> key;
+    JSONValue value;
+};
+
+#if USE_SIMDJSON
+using ParserImpl = DB::SimdJSONParser;
+#elif USE_RAPIDJSON
+using ParserImpl = DB::RapidJSONParser;
+#endif
+
+std::optional<Field> getFixedValue(const ParserImpl::Element & e)
+{
+    return e.isBool()  ? e.getBool()
+        : e.isInt64()  ? e.getInt64()
+        : e.isUInt64() ? e.getUInt64()
+        : e.isDouble() ? e.getDouble()
+        : e.isString() ? e.getString()
+        : e.isNull()   ? Field()
+                       : std::optional<Field>();
+}
+
+void traverse(const ParserImpl::Element & e, std::shared_ptr<JSONNode> node)
+{
+    checkStackSize();
+
+    assert(node);
+
+    auto & val = node->value;
+    if (e.isObject())
+    {
+        const auto & obj = e.getObject();
+        if (!val.object)
+            val.object = JSONNodeList{};
+
+        for (const auto [k, v] : obj)
+        {
+            auto child = std::make_shared<JSONNode>();
+            child->key = k;
+            traverse(v, child);
+            val.object->push_back(child);
+        }
+    }
+    else if (e.isArray())
+    {
+        if (!val.array)
+            val.array = JSONNodeList{};
+
+        const auto arr = e.getArray();
+        for (const auto a : arr)
+        {
+            auto child = std::make_shared<JSONNode>();
+            traverse(a, child);
+            val.array->push_back(child);
+        }
+    }
+    else
+    {
+        auto field = getFixedValue(e);
+        if (!field)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Failed to parse a fixed JSON value.");
+
+        val.fixed = std::move(field);
+    }
+}
+
+std::shared_ptr<JSONNode> parseJSON(const String & json)
+{
+    std::string_view view{json.begin(), json.end()};
+    ParserImpl::Element document;
+    ParserImpl p;
+
+    if (!p.parse(json, document))
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Failed to parse JSON string.");
+
+    auto root = std::make_shared<JSONNode>();
+    traverse(document, root);
+    return root;
+}
+
+char generateRandomCharacter(pcg64 & rnd, const std::string_view & charset)
+{
+    assert(!charset.empty());
+    auto idx = uniform(0, charset.size() - 1)(rnd);
+    return charset[idx];
+}
+
+char generateRandomKeyCharacter(pcg64 & rnd)
+{
+    static constexpr std::string_view charset = "0123456789"
+                                                "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                                "abcdefghijklmnopqrstuvwxyz";
+    return generateRandomCharacter(rnd, charset);
+}
+
+char generateRandomStringValueCharacter(pcg64 & rnd)
+{
+    static constexpr std::string_view charset = "0123456789"
+                                                "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                                "abcdefghijklmnopqrstuvwxyz"
+                                                "!@#$%^&*-+_";
+    return generateRandomCharacter(rnd, charset);
+}
+
+String generateRandomStringValue(UInt64 min_length, UInt64 max_length, pcg64 & rnd)
+{
+    size_t size = min_length + rnd() % (max_length - min_length + 1);
+    String res;
+    res.reserve(size);
+    for (size_t i = 0; i < size; ++i)
+        res.push_back(generateRandomStringValueCharacter(rnd));
+
+    return res;
+}
+
+String generateRandomKey(UInt64 min_length, UInt64 max_length, pcg64 & rnd)
+{
+    size_t size = min_length + rnd() % (max_length - min_length + 1);
+    String res;
+    res.reserve(size);
+    for (size_t i = 0; i < size; ++i)
+        res.push_back(generateRandomKeyCharacter(rnd));
+
+    return res;
+}
+
+enum class FuzzAction : size_t
+{
+    Skip = 0,
+    Edit = 1,
+    Add = 2,
+    Delete = 3,
+};
+
+Field generateRandomFixedValue(const StorageFuzzJSON::Configuration & config, pcg64 & rnd)
+{
+    // TODO (@jkartseva): support more field types.
+    static std::array<Field::Types::Which, 3> possible_types{
+        Field::Types::Which::UInt64, Field::Types::Which::String, Field::Types::Which::Bool};
+
+    Field f;
+    auto idx = rnd() % possible_types.size();
+    switch (possible_types[idx])
+    {
+        case Field::Types::Which::UInt64: {
+            f = rnd();
+            break;
+        }
+        case Field::Types::Which::String:
+            f = generateRandomStringValue(/*min_length*/ 0, config.max_string_value_length, rnd);
+            break;
+        case Field::Types::Which::Bool:
+            f = bool(rnd() % 2);
+            break;
+        default:
+    }
+    return f;
+}
+
+String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & source)
+{
+    String result;
+    result.reserve(config.max_key_length);
+
+    using FA = FuzzAction;
+    auto get_action = [&]() -> FuzzAction
+    {
+        static constexpr std::array<FuzzAction, 4> actions{FA::Skip, FA::Edit, FA::Add, FA::Delete};
+        return actions[uniform(0, 3)(rnd)];
+    };
+
+    size_t i = 0;
+    while (i < source.size() && result.size() < config.max_key_length)
+    {
+        auto action = get_action();
+        switch (action)
+        {
+            case FA::Skip: {
+                result.push_back(source[i++]);
+            }
+            break;
+            case FA::Edit: {
+                result.push_back(generateRandomKeyCharacter(rnd));
+                ++i;
+            }
+            break;
+            case FA::Add: {
+                result.push_back(generateRandomKeyCharacter(rnd));
+            }
+            break;
+            default:
+                ++i;
+        }
+    }
+
+    while (result.size() < config.min_key_length)
+        result.push_back(generateRandomKeyCharacter(rnd));
+
+    return result;
+}
+
+std::shared_ptr<JSONNode>
+generateRandomJSONNode(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, JSONValue::Type type)
+{
+    auto node = std::make_shared<JSONNode>();
+
+    if (with_key)
+        node->key = generateRandomKey(config.min_key_length, config.max_key_length, rnd);
+
+    auto & val = node->value;
+    switch (type)
+
+    {
+        case JSONValue::Type::Fixed: {
+            val.fixed = generateRandomFixedValue(config, rnd);
+            break;
+        }
+        case JSONValue::Type::Array: {
+            val.array = JSONNodeList{};
+            break;
+        }
+        case JSONValue::Type::Object: {
+            val.object = JSONNodeList{};
+            break;
+        }
+    }
+    return node;
+}
+
+template <size_t n>
+std::shared_ptr<JSONNode> generateRandomJSONNode(
+    const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, const std::array<JSONValue::Type, n> & possible_types)
+{
+    auto type = possible_types[uniform(0, possible_types.size() - 1)(rnd)];
+    return generateRandomJSONNode(config, rnd, with_key, type);
+}
+
+std::shared_ptr<JSONNode> generateRandomJSONNode(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, size_t depth)
+{
+    if (depth >= config.max_nesting_level)
+        return generateRandomJSONNode(config, rnd, with_key, JSONValue::Type::Fixed);
+
+    static constexpr std::array<JSONValue::Type, 3> possible_types
+        = {JSONValue::Type::Fixed, JSONValue::Type::Array, JSONValue::Type::Object};
+    return generateRandomJSONNode(config, rnd, with_key, possible_types);
+}
+
+JSONNode & fuzzSingleJSONNode(JSONNode & n, const StorageFuzzJSON::Configuration & config, pcg64 & rnd, size_t depth, size_t & node_count)
+{
+    auto & val = n.value;
+
+    static constexpr size_t update_key = 1;
+    static constexpr size_t update_value = 2;
+
+    auto action = 1 + rnd() % static_cast<size_t>(update_key | update_value);
+    if (n.key && (action & update_key))
+        n.key = fuzzJSONKey(config, rnd, *n.key);
+
+    if ((action & update_value) == 0)
+        return n;
+
+    if (val.fixed)
+        val.fixed = generateRandomFixedValue(config, rnd);
+    else if (val.array && val.array->size() < config.max_array_size && node_count + val.array->size() < config.value_number_limit)
+    {
+        if (val.array->empty())
+            val.array->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ false, depth));
+        else
+        {
+            // Use the type of the preceding element.
+            const auto & prev = val.array->back();
+            auto value_type = JSONValue::getType(prev->value);
+            val.array->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ false, value_type));
+        }
+        ++node_count;
+    }
+    else if (val.object && val.object->size() < config.max_object_size && node_count + val.object->size() < config.value_number_limit)
+    {
+        val.object->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ true, depth));
+        ++node_count;
+    }
+
+    return n;
+}
+
+
+void fuzzJSONObject(
+    const std::shared_ptr<JSONNode> & node,
+    WriteBuffer & out,
+    const StorageFuzzJSON::Configuration & config,
+    pcg64 & rnd,
+    size_t depth,
+    size_t & node_count)
+{
+    checkStackSize();
+
+    ++node_count;
+
+    bool should_fuzz = rnd() % 100 < 100 * config.probability;
+
+    const auto & next_node = should_fuzz && !config.should_reuse_output ? std::make_shared<JSONNode>(*node) : node;
+
+    if (should_fuzz)
+        fuzzSingleJSONNode(*next_node, config, rnd, depth, node_count);
+
+    if (next_node->key)
+    {
+        writeDoubleQuoted(*next_node->key, out);
+        out << ":";
+    }
+
+    auto & val = next_node->value;
+
+    if (val.fixed)
+    {
+        if (val.fixed->getType() == Field::Types::Which::String)
+            writeDoubleQuoted(val.fixed->get<String>(), out);
+        else
+            writeFieldText(*val.fixed, out);
+    }
+    else
+    {
+        if (!val.array && !val.object)
+            return;
+
+        const auto & [op, cl, node_list] = val.array ? std::make_tuple('[', ']', *val.array) : std::make_tuple('{', '}', *val.object);
+
+        out << op;
+
+        bool first = true;
+        for (const auto & ptr : node_list)
+        {
+            if (node_count >= config.value_number_limit)
+                break;
+
+            WriteBufferFromOwnString child_out;
+            if (!first)
+                child_out << ", ";
+            first = false;
+
+            fuzzJSONObject(ptr, child_out, config, rnd, depth + 1, node_count);
+            // Should not exceed the maximum length of the output string.
+            if (out.count() + child_out.count() >= config.max_output_length)
+                break;
+            out << child_out.str();
+        }
+        out << cl;
+    }
+}
+
+void fuzzJSONObject(std::shared_ptr<JSONNode> n, WriteBuffer & out, const StorageFuzzJSON::Configuration & config, pcg64 & rnd)
+{
+    size_t node_count = 0;
+    return fuzzJSONObject(n, out, config, rnd, /*depth*/ 0, node_count);
+}
+
+class FuzzJSONSource : public ISource
+{
+public:
+    FuzzJSONSource(
+        UInt64 block_size_, Block block_header_, const StorageFuzzJSON::Configuration & config_, std::shared_ptr<JSONNode> json_root_)
+        : ISource(block_header_)
+        , block_size(block_size_)
+        , block_header(std::move(block_header_))
+        , config(config_)
+        , rnd(config.random_seed)
+        , json_root(json_root_)
+    {
+    }
+    String getName() const override { return "FuzzJSON"; }
+
+protected:
+    Chunk generate() override
+    {
+        Columns columns;
+        columns.reserve(block_header.columns());
+        columns.emplace_back(createColumn());
+
+        return {std::move(columns), block_size};
+    }
+
+private:
+    ColumnPtr createColumn();
+
+    UInt64 block_size;
+    Block block_header;
+
+    StorageFuzzJSON::Configuration config;
+    pcg64 rnd;
+
+    std::shared_ptr<JSONNode> json_root;
+};
+
+ColumnPtr FuzzJSONSource::createColumn()
+{
+    auto column = ColumnString::create();
+    ColumnString::Chars & data_to = column->getChars();
+    ColumnString::Offsets & offsets_to = column->getOffsets();
+
+    offsets_to.resize(block_size);
+    IColumn::Offset offset = 0;
+
+    for (size_t row_num = 0; row_num < block_size; ++row_num)
+    {
+        WriteBufferFromOwnString out;
+        fuzzJSONObject(json_root, out, config, rnd);
+
+        auto data = out.str();
+        size_t data_len = data.size();
+
+        IColumn::Offset next_offset = offset + data_len + 1;
+        data_to.resize(next_offset);
+
+        std::copy(data.begin(), data.end(), &data_to[offset]);
+
+        data_to[offset + data_len] = 0;
+        offsets_to[row_num] = next_offset;
+
+        offset = next_offset;
+    }
+
+    return column;
+}
+
+}
+
+StorageFuzzJSON::StorageFuzzJSON(
+    const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_)
+    : IStorage(table_id_), config(config_)
+{
+    StorageInMemoryMetadata storage_metadata;
+    storage_metadata.setColumns(columns_);
+    storage_metadata.setComment(comment_);
+    setInMemoryMetadata(storage_metadata);
+}
+
+Pipe StorageFuzzJSON::read(
+    const Names & column_names,
+    const StorageSnapshotPtr & storage_snapshot,
+    SelectQueryInfo & /*query_info*/,
+    ContextPtr /*context*/,
+    QueryProcessingStage::Enum /*processed_stage*/,
+    size_t max_block_size,
+    size_t num_streams)
+{
+    storage_snapshot->check(column_names);
+
+    Pipes pipes;
+    pipes.reserve(num_streams);
+
+    const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns();
+    Block block_header;
+    for (const auto & name : column_names)
+    {
+        const auto & name_type = our_columns.get(name);
+        MutableColumnPtr column = name_type.type->createColumn();
+        block_header.insert({std::move(column), name_type.type, name_type.name});
+    }
+
+    for (UInt64 i = 0; i < num_streams; ++i)
+        pipes.emplace_back(std::make_shared<FuzzJSONSource>(max_block_size, block_header, config, parseJSON(config.json_str)));
+
+    return Pipe::unitePipes(std::move(pipes));
+}
+
+static constexpr std::array<std::string_view, 13> optional_configuration_keys
+    = {"json_str",
+       "random_seed",
+       "reuse_output",
+       "probability",
+       "max_output_length",
+       "max_nesting_level",
+       "max_array_size",
+       "max_object_size",
+       "max_string_value_length",
+       "min_key_length",
+       "max_key_length"};
+
+void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection)
+{
+    validateNamedCollection(
+        collection,
+        std::unordered_set<std::string>(),
+        std::unordered_set<std::string>(optional_configuration_keys.begin(), optional_configuration_keys.end()));
+
+    if (collection.has("json_str"))
+        configuration.json_str = collection.get<String>("json_str");
+
+    if (collection.has("random_seed"))
+        configuration.random_seed = collection.get<UInt64>("random_seed");
+
+    if (collection.has("reuse_output"))
+        configuration.should_reuse_output = static_cast<bool>(collection.get<UInt64>("reuse_output"));
+
+    if (collection.has("probability"))
+    {
+        configuration.probability = collection.get<Float64>("probability");
+
+        if (configuration.probability < 0.0 || configuration.probability > 1.0)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the 'probability' argument must be within the interval [0, 1].");
+    }
+
+    if (collection.has("max_output_length"))
+    {
+        configuration.max_output_length = collection.get<UInt64>("max_output_length");
+
+        if (configuration.max_output_length < 2 || configuration.max_output_length > configuration.output_length_limit)
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "The value of the 'max_output_length' argument must be within the interval [2, {}.]",
+                configuration.output_length_limit);
+    }
+
+    if (collection.has("max_nesting_level"))
+        configuration.max_nesting_level = collection.get<UInt64>("max_nesting_level");
+
+    if (collection.has("max_array_size"))
+        configuration.max_array_size = collection.get<UInt64>("max_array_size");
+
+    if (collection.has("max_object_size"))
+        configuration.max_object_size = collection.get<UInt64>("max_object_size");
+
+    if (collection.has("max_string_value_length"))
+    {
+        auto max_string_value_length = collection.get<UInt64>("max_string_value_length");
+        if (max_string_value_length > configuration.output_length_limit)
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "The value of the 'max_string_value_length' argument must be at most {}.",
+                configuration.output_length_limit);
+
+        configuration.max_string_value_length = std::min(max_string_value_length, configuration.max_output_length);
+    }
+
+    if (collection.has("max_key_length"))
+    {
+        auto max_key_length = collection.get<UInt64>("max_key_length");
+        if (max_key_length > configuration.output_length_limit)
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "The value of the 'max_key_length' argument must be less or equal than {}.",
+                configuration.output_length_limit);
+        configuration.max_key_length = std::min(max_key_length, configuration.max_output_length);
+        configuration.min_key_length = std::min(configuration.min_key_length, configuration.max_key_length);
+    }
+
+    if (collection.has("min_key_length"))
+    {
+        auto min_key_length = collection.get<UInt64>("min_key_length");
+        if (min_key_length == 0)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the 'min_key_length' argument must be at least 1.");
+
+        if (collection.has("max_key_length") && collection.get<UInt64>("max_key_length") < min_key_length)
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "The value of the 'min_key_length' argument must be less or equal than "
+                "the value of the 'max_key_lenght' argument.");
+
+        configuration.min_key_length = min_key_length;
+        configuration.max_key_length = std::max(configuration.max_key_length, configuration.min_key_length);
+    }
+}
+
+StorageFuzzJSON::Configuration StorageFuzzJSON::getConfiguration(ASTs & engine_args, ContextPtr local_context)
+{
+    StorageFuzzJSON::Configuration configuration{};
+
+    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
+    {
+        StorageFuzzJSON::processNamedCollectionResult(configuration, *named_collection);
+    }
+    else
+    {
+        // Supported signatures:
+        //
+        // FuzzJSON('json_str')
+        // FuzzJSON('json_str', 'random_seed')
+        if (engine_args.empty() || engine_args.size() > 2)
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "FuzzJSON requires 1 to 2 arguments: "
+                "json_str, random_seed");
+        for (auto & engine_arg : engine_args)
+            engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
+
+        auto first_arg = checkAndGetLiteralArgument<String>(engine_args[0], "json_str");
+        configuration.json_str = std::move(first_arg);
+
+        if (engine_args.size() == 2)
+        {
+            const auto & literal = engine_args[1]->as<const ASTLiteral &>();
+            if (!literal.value.isNull())
+                configuration.random_seed = checkAndGetLiteralArgument<UInt64>(literal, "random_seed");
+        }
+    }
+    return configuration;
+}
+
+void registerStorageFuzzJSON(StorageFactory & factory)
+{
+    factory.registerStorage(
+        "FuzzJSON",
+        [](const StorageFactory::Arguments & args) -> std::shared_ptr<StorageFuzzJSON>
+        {
+            ASTs & engine_args = args.engine_args;
+
+            if (engine_args.empty())
+                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzJSON must have arguments.");
+
+            StorageFuzzJSON::Configuration configuration = StorageFuzzJSON::getConfiguration(engine_args, args.getLocalContext());
+            return std::make_shared<StorageFuzzJSON>(args.table_id, args.columns, args.comment, configuration);
+        });
+}
+
+}
diff --git a/src/Storages/StorageFuzzJSON.h b/src/Storages/StorageFuzzJSON.h
new file mode 100644
index 00000000000..98cde1e83fd
--- /dev/null
+++ b/src/Storages/StorageFuzzJSON.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include <Storages/IStorage.h>
+#include <Storages/StorageConfiguration.h>
+#include <Common/randomSeed.h>
+
+namespace DB
+{
+
+class NamedCollection;
+
+class StorageFuzzJSON final : public IStorage
+{
+public:
+    struct Configuration : public StatelessTableEngineConfiguration
+    {
+        // A full N-ary tree may be memory-intensive as it can potentially contain
+        // up to (B^(D + 1) - 1) / (B - 1) nodes, where B is the number of branches,
+        // and D is the depth of the tree. Therefore, a value number limit is introduced.
+        // This limit includes complex values (arrays and nested objects).
+        static constexpr UInt64 value_number_limit = 1000;
+        static constexpr UInt64 output_length_limit = 1LU << 16;
+
+        String json_str = "{}";
+        UInt64 random_seed = randomSeed();
+        bool should_reuse_output = false;
+        Float64 probability = 0.25;
+
+        UInt64 max_output_length = 1024;
+
+        // Key parameters
+        UInt64 min_key_length = 4;
+        UInt64 max_key_length = 20;
+
+        // Value parameters
+        // Maximum number of fields (key-value pairs) at each level of a JSON.
+        UInt64 max_object_size = 10;
+        // Maximum number of elements within a JSON array.
+        UInt64 max_array_size = 10;
+        // Max depth of nested structures. How deeply objects or arrays can be
+        // nested within one another.
+        UInt64 max_nesting_level = 5;
+        UInt64 max_string_value_length = 32;
+    };
+
+    StorageFuzzJSON(
+        const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_);
+
+    std::string getName() const override { return "FuzzJSON"; }
+
+    Pipe read(
+        const Names & column_names,
+        const StorageSnapshotPtr & storage_snapshot,
+        SelectQueryInfo & query_info,
+        ContextPtr context,
+        QueryProcessingStage::Enum processed_stage,
+        size_t max_block_size,
+        size_t num_streams) override;
+
+    static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection);
+
+    static StorageFuzzJSON::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
+
+private:
+    const Configuration config;
+};
+
+}
diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp
index b971e9b623a..c05ff7d613c 100644
--- a/src/Storages/registerStorages.cpp
+++ b/src/Storages/registerStorages.cpp
@@ -25,6 +25,7 @@ void registerStorageLiveView(StorageFactory & factory);
 void registerStorageGenerateRandom(StorageFactory & factory);
 void registerStorageExecutable(StorageFactory & factory);
 void registerStorageWindowView(StorageFactory & factory);
+void registerStorageFuzzJSON(StorageFactory & factory);
 
 #if USE_AWS_S3
 void registerStorageS3(StorageFactory & factory);
@@ -123,8 +124,9 @@ void registerStorages()
     registerStorageGenerateRandom(factory);
     registerStorageExecutable(factory);
     registerStorageWindowView(factory);
+    registerStorageFuzzJSON(factory);
 
-    #if USE_AWS_S3
+#if USE_AWS_S3
     registerStorageS3(factory);
     registerStorageCOS(factory);
     registerStorageOSS(factory);
diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt
index b02a0e79f9c..770990cc405 100644
--- a/src/TableFunctions/CMakeLists.txt
+++ b/src/TableFunctions/CMakeLists.txt
@@ -28,3 +28,11 @@ endif ()
 if (TARGET ch_contrib::azure_sdk)
     target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::azure_sdk)
 endif ()
+
+if (TARGET ch_contrib::simdjson)
+    target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::simdjson)
+endif ()
+
+if (TARGET ch_contrib::rapidjson)
+    target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::rapidjson)
+endif ()
diff --git a/src/TableFunctions/TableFunctionFuzzJSON.cpp b/src/TableFunctions/TableFunctionFuzzJSON.cpp
new file mode 100644
index 00000000000..629fbd54b62
--- /dev/null
+++ b/src/TableFunctions/TableFunctionFuzzJSON.cpp
@@ -0,0 +1,59 @@
+#include <TableFunctions/TableFunctionFuzzJSON.h>
+
+#include <DataTypes/DataTypeString.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <TableFunctions/TableFunctionFactory.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+
+}
+
+void TableFunctionFuzzJSON::parseArguments(const ASTPtr & ast_function, ContextPtr context)
+{
+    ASTs & args_func = ast_function->children;
+
+    if (args_func.size() != 1)
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName());
+
+    auto args = args_func.at(0)->children;
+    configuration = StorageFuzzJSON::getConfiguration(args, context);
+}
+
+ColumnsDescription TableFunctionFuzzJSON::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const
+{
+    return ColumnsDescription{{"json", std::make_shared<DataTypeString>()}};
+}
+
+StoragePtr TableFunctionFuzzJSON::executeImpl(
+    const ASTPtr & /*ast_function*/,
+    ContextPtr context,
+    const std::string & table_name,
+    ColumnsDescription /*cached_columns*/,
+    bool is_insert_query) const
+{
+    ColumnsDescription columns = getActualTableStructure(context, is_insert_query);
+    auto res = std::make_shared<StorageFuzzJSON>(
+        StorageID(getDatabaseName(), table_name),
+        columns,
+        /* comment */ String{},
+        configuration);
+    res->startup();
+    return res;
+}
+
+void registerTableFunctionFuzzJSON(TableFunctionFactory & factory)
+{
+    factory.registerFunction<TableFunctionFuzzJSON>(
+        {.documentation
+         = {.description = "Perturbs a JSON string with random variations.",
+            .returned_value = "A table object with a a single column containing perturbed JSON strings."},
+         .allow_readonly = true});
+}
+
+}
diff --git a/src/TableFunctions/TableFunctionFuzzJSON.h b/src/TableFunctions/TableFunctionFuzzJSON.h
new file mode 100644
index 00000000000..821e58bbf8a
--- /dev/null
+++ b/src/TableFunctions/TableFunctionFuzzJSON.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <optional>
+
+#include <Storages/StorageFuzzJSON.h>
+#include <TableFunctions/ITableFunction.h>
+
+namespace DB
+{
+
+class TableFunctionFuzzJSON : public ITableFunction
+{
+public:
+    static constexpr auto name = "fuzzJSON";
+    std::string getName() const override { return name; }
+
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+
+private:
+    StoragePtr executeImpl(
+        const ASTPtr & ast_function,
+        ContextPtr context,
+        const std::string & table_name,
+        ColumnsDescription cached_columns,
+        bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return "FuzzJSON"; }
+
+    String source;
+    std::optional<UInt64> random_seed;
+    std::optional<bool> should_reuse_output;
+    std::optional<UInt64> max_output_length;
+
+    StorageFuzzJSON::Configuration configuration;
+};
+
+}
diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp
index de29c8074b1..33029475dac 100644
--- a/src/TableFunctions/registerTableFunctions.cpp
+++ b/src/TableFunctions/registerTableFunctions.cpp
@@ -22,6 +22,7 @@ void registerTableFunctions()
     registerTableFunctionGenerate(factory);
     registerTableFunctionMongoDB(factory);
     registerTableFunctionRedis(factory);
+    registerTableFunctionFuzzJSON(factory);
 
 #if USE_AWS_S3
     registerTableFunctionS3(factory);
diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h
index 4e39324aba6..db945168282 100644
--- a/src/TableFunctions/registerTableFunctions.h
+++ b/src/TableFunctions/registerTableFunctions.h
@@ -19,6 +19,7 @@ void registerTableFunctionInput(TableFunctionFactory & factory);
 void registerTableFunctionGenerate(TableFunctionFactory & factory);
 void registerTableFunctionMongoDB(TableFunctionFactory & factory);
 void registerTableFunctionRedis(TableFunctionFactory & factory);
+void registerTableFunctionFuzzJSON(TableFunctionFactory & factory);
 
 #if USE_AWS_S3
 void registerTableFunctionS3(TableFunctionFactory & factory);
diff --git a/tests/queries/0_stateless/02918_fuzzjson_table_function.reference b/tests/queries/0_stateless/02918_fuzzjson_table_function.reference
new file mode 100644
index 00000000000..1b5c6f46f77
--- /dev/null
+++ b/tests/queries/0_stateless/02918_fuzzjson_table_function.reference
@@ -0,0 +1,152 @@
+{"QJC4GhRByEtEAjku":{}}
+{}
+{}
+{}
+{}
+{}
+{}
+{}
+{}
+{}
+{"Cicktxh":true, "SpByjZKtr2VAyHCO":false}
+{"ClickHouse":"Is Fast", "VO7TCIkyu1akvN":{}}
+{"ClickHouse":"Is Fast"}
+{"ISlW1DB":"Is Fast", "5j4ATkq":{}}
+{"ClickHouse":false}
+{"ClickHouse":"Is Fast", "tRSz":13522460516091116060}
+{"ClickHouse":"Is Fast"}
+{"ClickHouse":"Is Fast"}
+{"CzTcYkQdSce":"Is Fast"}
+{"ClickHouse":"Is Fast"}
+{"ClickHouse":false}
+{"ClickHouse":"Is Fast"}
+{"ClickHouse":"Is Fast", "jql0YAY":[]}
+{"ClickHouse":"Is Fast"}
+{"ClickHouse":"Is Fast"}
+{"ClickHouse":"Is Fast", "lF2vXus":false}
+{"ClickHouse":"Is Fast"}
+{"ClickHouse":"Is Fast"}
+{"ClickHouse":"Is Fast"}
+{"QJiGcwkonghk":"Is Fast"}
+{"sidetx":[{"name":"Alice"}, {"R6Vm":false}, {}], "SpByjZKtr2VAyHCO":false}
+{"students":[{"name":"Alice"}, {"name":"Bob"}]}
+{"students":[{"name":"Alice"}, {"name":true}]}
+{"students":[{"name":"Alice"}, {"name":"Bob"}]}
+{"ISuW1":[{"naYmS":"Alice", "hzTDYZQdScOct0RS":[]}, {"name":"Bob"}]}
+{"students":[{"name":"Alice"}, {"name":"Bob"}], "jql0YAY":[]}
+{"students":[{"name":"Alice"}, {"name":"Bob"}], "lF2vXus":false}
+{"students":[{"QJmGe":"Alice"}, {"name":"Bob"}]}
+{"students":[{"name":"Alice"}, {"name":"Bob"}]}
+{"kXtdet":[{"name":"Alice"}, {"name":"Bob"}]}
+{"students":[{"name":"Alice"}, {"name":"Bob"}], "Qcm4":{}}
+{"students":[{"name":"Alice"}, {"PmjG":"Bob"}]}
+{"students":[{"name":6128974479331836233}, {"name":"Bob"}]}
+{"sGudyet5u":[{"name":"Alice"}, {"name":"Bob"}, {}]}
+{"students":[{"name":"Alice"}, {"name":"Bob"}]}
+{"students":[{"Kamc":true}, {"name":"rKKN+5#NKEi-uf5U"}]}
+{"students":[{"name":"Alice"}, {"nPL6":1455900058404521160}]}
+{"students":[{"name":"Alice", "dzm5g9aPI21iIP9":[]}, {"name":"Bob"}]}
+{"students":[{"n4z4N":true, "uJrCh4ifo":{}}, {"name":"Bob", "kMnsl0BBFk":[]}], "kG21YiAcUKpcUS2":true}
+{"students":[{"name":"Alice"}, {"name":"Bob", "wQCN":{}}]}
+{"schedule":[{"breakfast":"7am", "5ZB35":{"nHypO":[]}}, {"lunch":"12pm"}]}
+{"schedule":[{"bdvelrflX":"7am", "5ZB35":{"nHypO":[]}}, {"23slh":"12pm"}]}
+{"tkdu8hl":[{"bdvelrflX":"7am", "5ZB35":{"nHypO":[]}}, {"23slh":"12pm"}]}
+{"tkdu8hl":[{"bdvelrflX":"7am", "5mkj5":{"nHypO":[]}}, {"23slh":"12pm"}], "n8HX5N6DVpBa":["fYOPSVVK*Brv_-AajZwT"]}
+{"tkdu8hl":[{"nQ4PePPfX":16091119822740071899, "5mkj5":{"npOE":[[]]}}, {"23slh":"12pm"}], "nHXa6BVq8E":["fYOPSVVK*Brv_-AajZwT"], "BHUNvB8sHk8ts6":true}
+{"tkdu8hl":[{"nQ4PePPfX":16091119822740071899, "5mkj5":{"G71D":[[], []]}}, {"23slh":"12pm"}], "FOIRaJ6VqVCKD0E":["fYOPSVVK*Brv_-AajZwT", 17244534201851710710], "BHUNvB8sHk8ts6":true, "qnk47QAn0yQ3ESEgO":true}
+{"tkdu8hl":[{"nQ4PePPfX":16091119822740071899, "5mkj5":{"G71D":[[], []]}}, {"23slh":"-plal2e"}], "FOIRaJ6VqVCKD0E":["fYOPSVVK*Brv_-AajZwT", 17244534201851710710], "BHUNvB8sHk8ts6":true, "qnk47QAn0yQ3ESEgO":true}
+{"tkdu8hl":[{"nQ4PePPfX":16091119822740071899, "5mkj5":{"Gpq7":[[], [false]]}, "YgbEtY":true}, {"23slh":false}], "FOIRaJ6VqVCKD0E":["fYOPSVVK*Brv_-AajZwT", 17244534201851710710], "ByRvBC4H0kgydJ":false, "zqokAQz8z0KnPOBrs8":true}
+{"kzcUZOl":[{"nQ4PePPfX":16091119822740071899, "Ekmj":{"lBKR":[[], [false], []], "dLc32r2f":{}}, "xbguW":"vGV&bitEteAH%-Eigg_7VlejYuHP"}, {"23slh":false}, {}], "FOIRaJ6VqVCKD0E":["fYOPSVVK*Brv_-AajZwT", 17244534201851710710], "ByRvBC4H0kgydJ":false, "zqokAQz8z0KnPOBrs8":true}
+{"kzcUZOl":[{"nQ4PePPfX":16091119822740071899, "Ekmj":{"lBKR":[[3774015142547830176], [false], []], "rCmVPvvf":{"wU6YWjag":[]}}, "xb7uW":"pWUTs&ikTCNRQt"}, {"23slh":false}, {}], "h3IK06PQGfCRQ":[false, false], "SyRRLBzEjy8YJ":false, "zqokAQz8z0KnPOBrs8":true}
+{"ukrzZl":[{"nQ4PePPfX":16091119822740071899, "5kmG":{"lBKR":[[14228925827882160318, "TpCrsW@11Io1sSu1@nFm"], [true], []], "rOmNvc":{"wU6YWjag":[], "pIK6tGXUp1gekWViJ":{}}, "igqgnb":[]}, "xb7uW":"pWUTs&ikTCNRQt", "jBT1ImcYb77bl2":true}, {"dsyf":true}, {}, {"qOElRhbehMXQNrln":{"PDoZa8OJHh1al59Ggq":{}}}], "h3IK06PQGfCRQ":[false, false], "SyRRLBzEjy8YJ":false, "zqokAQz8z0KnPOBrs8":true}
+{"ukrzZl":[{"nQ4PePPfX":16091119822740071899, "5kmG":{"lBKR":[[14228925827882160318, "TpCrsW@11Io1sSu1@nFm"], [true], []], "rOmNvc":{"wU6YWjag":[], "pIK6tGXUp1gekWViJ":{}}, "igqgnb":[]}, "xb7uW":"pWUTs&ikTCNRQt", "jBT1ImcYb77bl2":true}, {"dsyf":18233789955605096603}, {}, {"qOElRhbehMXQNrln":{"PoZngOHXMaWGRJq":{"QlnPi9zKoBtW2nGWB":"LgFazuGX*CuDy7X%4hkEmykg@6"}}}], "h3IK06PQGfCRQ":[false, false], "SyRRLBzEjy8YJ":false, "zQO8BA7nazqKW7CRP8":true}
+{"ukrzZl":[{"nQ4PePPfX":16091119822740071899, "5kmG":{"lBKR":[[16730631663303458403, "TpCrsW@11Io1sSu1@nFm"], [true], []], "rOmNvc":{"wU6YWjag":[false], "pIK6tGXUp1gekWViJ":{}}, "igqgnb":[]}, "xb7uW":"pWUTs&ikTCNRQt", "jBT1ImcYb77bl2":true}, {"dsyf":18233789955605096603, "mmCFLovnBThJPtpQG0Tv":false}, {}, {"qOElRhbehMXQNrln":{"PoZngOHXMaWGRJq":{"QlnPi9zKoBtW2nGWB":"LgFazuGX*CuDy7X%4hkEmykg@6"}}}, {"sx21nRmS69bXRo":[]}], "h3IK06PQGfCRQ":[false, "HjPw@G1Icu#dn"], "SyRRLBzEjy8YJ":false, "zQO8BA7nazqKW7CRP8":true}
+{"ukrzZl":[{"nQ4PePPfX":16091119822740071899, "5kmG":{"lBKR":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D"], [true], [], []], "rOmNvc":{"wOWxSWQf":[false], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988}, "igqgnb":[]}, "xb7uW":"pWUTs&ikTCNRQt", "jlT1T35c27wbl2":true}, {"dsyf":18233789955605096603, "mYikENkiDhPRtQHOr":true}, {}, {"qOElRhbehMXQNrln":{"4GBqJBrnoOHJW5GA":{"QaPSqINbjb7nGx9qz":8975023301134451623, "JWOUP4WB1":14622543266409160782}}}, {"sx21nRmS69bXRo":[]}], "h3IK06PQGfCRQ":[false, "HjPw@G1Icu#dn"], "S1ncA0ERs8Y9v":"@7EShAFjSycp%Wo0gHn", "zQO8BA7nazqKW7CRP8":true}
+{"ukrzZl":[{"nQ4PePPfX":11197252787701758701, "5kmG":{"lBKR":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", false], [true, true], [], []], "rOmNvc":{"wOWxSWQf":[false], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988}, "igqgnb":[], "pUDeAJw":"MN^9hUPKv811Vq!"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":18233789955605096603, "mYikENkiDhPRtQHOr":true}, {}, {"qOElRhbehMXQNrln":{"4GBqJBrnoOHJW5GA":{"QaPSqINbjb7nGx9qz":8975023301134451623, "aOUaQBB":false}}}, {"x27uem04bX6R87b":[[]]}, {"MqSQ5v":[]}], "h3IK06PQGfCRQ":[false, "7pq+IfdiKeTkTym7AWjlc"], "S1ncA0ERs8Y9v":"@7EShAFjSycp%Wo0gHn", "zQO8BA7nazqKW7CRP8":true}
+{"UkPbWZl":[{"nQ4PePPfX":11197252787701758701, "5kmG":{"lBKR":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", false], [true, true], [false], []], "rvCMyf":{"2pnWUuQ6J":[false, "q-5Gl5B8uOK"], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988, "yeNIt3JgSC0K":1931793149388080066}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":516601863564431352}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":"F!*nU1V_WOni8$a9RXBHGob^sg", "mYikENkiDhPRtQHOr":true}, {}, {"qOURhbeBpKE8qrhC":{"4GBqJBrnoOHJW5GA":{"QaPSqINbjb7nGx9qz":8975023301134451623, "OUlR":false}}}, {"x27uem04bX6R87b":[[]]}, {"MqSQ5v":[]}], "h3IK06PQGfCRQ":[false, "7pq+IfdiKeTkTym7AWjlc", true], "dlCX4s8LF":"@7EShAFjSycp%Wo0gHn", "zQO8BA7nazqKW7CRP8":true, "XahaweEPjnHUyKsT":{}}
+{"IkkCdvbW8oLK":[{"nQ4PePPfX":11197252787701758701, "5kmG":{"lB3l":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", 17822336972471685000], [true, true], [false], [], []], "rvCMyf":{"2pnWUuQ6J":[false, "q-5Gl5B8uOK"], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988, "yeNIt3JgSC0K":1931793149388080066}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":"fDT@hLdFJNXwBfJ__Fok7u2@BWY^t0"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":false, "mYikENkiDhPRtQHOr":true}, {}, {"qOURhbeBpKE8qrhC":{"7Qf27pQMkchIOBWX":{"QaPSqINbjb7nGx9qz":8975023301134451623, "OUlR":false, "EoEJ7GlbhI0":[]}}}, {"x27uem04bX6R87b":[[[]], []]}, {"MqSQ5v":[9304041946960766827]}, {}], "h3IK06PQGfCRQ":[false, "7pq+IfdiKeTkTym7AWjlc", true], "dlCX4s8LF":true, "zQO8BA7nazqKW7CRP8":true, "fOa5rfhNLCiqjrnUrtZ6":{}}
+{"IkkCdvbW8oLK":[{"nQ4PePPfX":11197252787701758701, "mGJx":{"lB3l":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", 17822336972471685000], [true, true], [10370853850869029207], [], ["VaTduwAFH0ahN5xeJU"]], "rvCMyf":{"2pnWUuQ6J":[false, "6J%Orinf%4"], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988, "yeNIt3JgSC0K":1931793149388080066}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":"fDT@hLdFJNXwBfJ__Fok7u2@BWY^t0"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":false, "mYikENkiDhPRtQHOr":true}, {}, {"qOURhbeBpKE8qrhC":{"7Qf27pQMkchIOBWX":{"aKaShNyxj7Gx9qB":8975023301134451623, "OUlR":false, "EoEJ7GlbhI0":[]}}}, {"x27uem04bX6R87b":[[[]], []]}, {"MqSQ5v":[9304041946960766827, "T##LF8eDM"]}, {}], "h3IK06PQGfCRQ":[false, 6667769656296380039, true], "dlCX4s8LF":true, "zQO8BA7nazqKW7CRP8":true, "fOa5rfhNLCiqjrnUrtZ6":{}}
+{"IkkCdvbW8oLK":[{"nQ4PePPfX":11197252787701758701, "xGBZx":{"lB3l":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", "sFwAP3"], [true, "-TBj_T1BS7OJh8^p1qO3!DK_X&CfwetZ"], [5795439407585677270, false], [], ["VaTduwAFH0ahN5xeJU"]], "OvMy":{"2pnWUuQ6J":[false, "6J%Orinf%4"], "wni3QGXfpgeq":{"QF0hiIqRIKp2mp04U":14287172497490584292}, "M8pg0INzhg3Hz":14538916875375166988, "yeNIt3JgSC0K":false, "TeFWw":[]}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":"fDT@hLdFJNXwBfJ__Fok7u2@BWY^t0"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":false, "mYikENkiDhPRtQHOr":true}, {}, {"DjYSOeUFNepEK4XvC":{"7Qf27pQMkchIOBWX":{"aKaShNyxj7Gx9qB":8975023301134451623, "OUlR":false, "EoEJ7GlbhI0":[]}}}, {"x27uem04bX6R87b":[[[15632688604980432085]], []]}, {"MqSQ5v":[9304041946960766827, "T##LF8eDM"]}, {}], "h3IK06PQGfCRQ":[false, 6667769656296380039, true], "dlCX4s8LF":true, "zQO8BA7nazqKW7CRP8":true, "fOa5rfhNLCiqjrnUrtZ6":{}}
+{"IkkCdvbW8oLK":[{"nQ4PePPfX":11197252787701758701, "xGBZx":{"lB3l":[["_#JSXSLdVKXb+c", "eiUmT%F$FQBWtWz^Tt7Ix&D", "sFwAP3"], [true, "-TBj_T1BS7OJh8^p1qO3!DK_X&CfwetZ"], [5795439407585677270, false], [], ["VaTduwAFH0ahN5xeJU"]], "OvMy":{"2pnWUuQ6J":[false, "6J%Orinf%4"], "wni3QGXfpgeq":{"QF0hiIqRIKp2mp04U":14287172497490584292}, "M8pg0INzhg3Hz":14538916875375166988, "yeNIt3JgSC0K":false, "TeFWw":[]}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":"8&VE7"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":false, "mYikENkiDhPRtQHOr":true, "lbci":{}}, {}, {"DjYSOeUFNepEK4XvC":{"QVEsjfQBcsIEbRWBW":{"uGYvt33UTmxj7t2B":8975023301134451623, "OUlR":false, "EoEJ7GlbhI0":[]}, "Qya8i":{"EMfurslq2KFOCa29od0d":[]}}}, {"x27uem04bX6R87b":[[[15632688604980432085]], [[]]]}, {"MqSQ5v":[9304041946960766827, "T##LF8eDM"]}, {}], "sEdwKHDRafKvC":[false, 6667769656296380039, true], "dlCX4s8LF":true, "zQO8BA7nazqKW7CRP8":true, "fOa5rfhNLCiqjrnUrtZ6":{}}
+{"schedule":[{"breakfast":"7am", "5ZB35":{"nHypO":[]}}, {"lunch":"12pm"}]}
+{"schedule":[{"bdvelrflX":"7am"}, {"lunch":"12pm"}]}
+{"23sldMp":[{"Ob8hrGkHsU8X":"7am"}, {"lunch":"12pm"}]}
+{"schedule":[{"bMnamkjsAsat":"7am"}, {"lunch":"12pm", "OfmJPaS":{}}]}
+{"snjTZul":[{"breakfast":"7am"}, {"lHkn6N":1318333088581732761}, {"bQH4jPs":{}}], "Hrv8ZL6":[]}
+{"schedule":[{"QrqaD":"!uUry9J-#VUCkKD0yyI+xM", "3e8EfNin":"0_Ny&1pcBzd8YEFq8hn4+Q#y^ESEg*"}, {"lunch":"12pm"}], "hGh8RR":{}}
+{"schedule":[{"regEsl2t":true, "q5flU9DI7erByRjh":{}}, {"lH0h":"%yJEbznodCJ8-#KzPNcBHrsr"}, {"pPk2zAcfUxDZcO":{}}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}, {}], "hZNsEeUmexM":{}}
+{"lhhG":[{"breakfast":"7am"}, {"lunch":"12pm", "OEgZYuhDWP3vGbV4bi":[]}, {}]}
+{"schedule":[{"breakfast":"kj*RPaKLng*&h4&UBqa-tw%53aE", "WtHnb8mVPvvHDUYWaJSB":[[]]}, {"lunch":"12pm"}], "6EigJgc8sxf7VIfMkDl":[]}
+{"schedule":[{"breakfast":false}, {"lunch":"12pm", "WikTL":1724418800345361559}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
+{"h3hK0l":[{"breakfast":"7am", "fGNLfAC":{}}, {"lETzn6S":"12pm"}]}
+{"schedule":[{"breakfast":"7am"}, {"izEx":9011753325952200749}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
+{"schedule":[{"breakfast":"7am"}, {"mY7la":17408441466865856756, "yIG0VqnoY1TTMjs":{"11BIo1csSuB1n":10038860187222625751}}]}
+{"cSJ8eOuN":[{"breakfast":"7am", "UgpWK":{"Wkha9tqdiOefZfAKQcEg":"EbhMQNrlngPo"}}, {"lunch":"12pm", "wGWGRJqJlPYzCB0":[]}, {}]}
+{"UBgFuue":[{"brrak2st":"kEmykg@6-%h-OQ@O_"}, {"lunch":"12pm", "7DnPaGPqi5Wr7":false}, {}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm", "LeH3":{}}]}
+{"schedule":[{"breakon":true}, {"Sx1Rch":9823913620251756169, "0TvaWJUmv0Cv":{}}]}
+{"schedule":[{"breakfast":"7am", "5ZB35":{"nHypO":[]}}, {"lunch":"12pm"}]}
+{"schedule":[{"bdvelrflX":"7am"}, {"lunch":"12pm"}]}
+{"23sldMp":[{"Ob8hrGkHsU8X":"7am"}, {"lunch":"12pm"}]}
+{"schedule":[{"bMnamkjsAsat":"7am"}, {"lunch":"12pm", "OfmJPaS":{}}]}
+{"snjTZul":[{"breakfast":"7am"}, {"lHkn6N":1318333088581732761}, {"bQH4jPs":{}}], "Hrv8ZL6":[]}
+{"schedule":[{"QrqaD":"!uUry9J-#VUCkKD0yyI+xM", "3e8EfNin":"0_Ny&1pcBzd8YEFq8hn4+Q#y^ESEg*"}, {"lunch":"12pm"}], "hGh8RR":{}}
+{"schedule":[{"regEsl2t":true, "q5flU9DI7erByRjh":{}}, {"lH0h":"%yJEbznodCJ8-#KzPNcBHrsr"}, {"pPk2zAcfUxDZcO":{}}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}, {}], "hZNsEeUmexM":{}}
+{"lhhG":[{"breakfast":"7am"}, {"lunch":"12pm", "OEgZYuhDWP3vGbV4bi":[]}, {}]}
+{"schedule":[{"breakfast":"kj*RPaKLng*&h4&UBqa-tw%53aE", "WtHnb8mVPvvHDUYWaJSB":[[]]}, {"lunch":"12pm"}]}
+{"schedule":[{"breakfast":false}, {"lunch":"12pm", "WikTL":1724418800345361559}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
+{"h3hK0l":[{"breakfast":"7am", "fGNLfAC":{}}, {"lETzn6S":"12pm"}]}
+{"schedule":[{"breakfast":"7am"}, {"izEx":9011753325952200749}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
+{"schedule":[{"breakfast":"7am"}, {"mY7la":17408441466865856756, "yIG0VqnoY1TTMjs":{"11BIo1csSuB1n":10038860187222625751}}]}
+{"cSJ8eOuN":[{"breakfast":"7am", "UgpWK":{"Wkha9tqdiOefZfAKQcEg":"EbhMQNrlngPo"}}, {"lunch":"12pm", "wGWGRJqJlPYzCB0":[]}, {}]}
+{"UBgFuue":[{"brrak2st":"kEmykg@6-%h-OQ@O_"}, {"lunch":"12pm", "7DnPaGPqi5Wr7":false}, {}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm", "LeH3":{}}]}
+{"schedule":[{"breakon":true}, {"Sx1Rch":9823913620251756169, "0TvaWJUmv0Cv":{}}]}
+{"schedule":[{"breakfast":"7am", "5ZB35":{"nHypO":[]}}, {"lunch":"12pm"}]}
+{"schedule":[{"bdvelrflX":"7am"}, {"lunch":"12pm"}]}
+{"23sldMp":[{"Ob8hrGkHsU8X":"7am"}, {"lunch":"12pm"}]}
+{"schedule":[{"bMnamkjsAsat":"7am"}, {"lunch":"12pm", "OfmJPaS":{}}]}
+{"snjTZul":[{"breakfast":"7am"}, {"lHkn6N":1318333088581732761}, {"bQH4jPs":{}}], "Hrv8ZL6":[]}
+{"schedule":[{"QrqaD":"!uUry9J-#VUCkKD0yyI+xM", "3e8EfNin":"0_Ny&1pcBzd8YEFq8hn4+Q#y^ESEg*"}, {"lunch":"12pm"}], "hGh8RR":{}}
+{"schedule":[{"regEsl2t":true, "q5flU9DI7erByRjh":{}}, {"lH0h":"%yJEbznodCJ8-#KzPNcBHrsr"}, {"pPk2zAcfUxDZcO":{}}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}, {}], "hZNsEeUmexM":{}}
+{"lhhG":[{"breakfast":"7am"}, {"lunch":"12pm", "OEgZYuhDWP3vGbV4bi":[]}, {}]}
+{"schedule":[{"breakfast":"kj*RPaKLng*&h4&UBqa-tw%53aE", "WtHnb8mVPvvHDUYWaJSB":[[]]}, {"lunch":"12pm"}], "6EigJgc8sxf7VIfMkDl":[]}
+{"schedule":[{"breakfast":false}, {"lunch":"12pm", "WikTL":1724418800345361559}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
+{"h3hK0l":[{"breakfast":"7am", "fGNLfAC":{}}, {"lETzn6S":"12pm"}]}
+{"schedule":[{"breakfast":"7am"}, {"izEx":9011753325952200749}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
+{"schedule":[{"breakfast":"7am"}, {"mY7la":17408441466865856756, "yIG0VqnoY1TTMjs":{"11BIo1csSuB1n":10038860187222625751}}]}
+{"cSJ8eOuN":[{"breakfast":"7am", "UgpWK":{"Wkha9tqdiOefZfAKQcEg":"EbhMQNrlngPo"}}, {"lunch":"12pm", "wGWGRJqJlPYzCB0":[]}, {}]}
+{"UBgFuue":[{"brrak2st":"kEmykg@6-%h-OQ@O_"}, {"lunch":"12pm", "7DnPaGPqi5Wr7":false}, {}]}
+{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm", "LeH3":{}}]}
+{"schedule":[{"breakon":true}, {"Sx1Rch":9823913620251756169, "0TvaWJUmv0Cv":{}}]}
+{}
+{}
+{}
+{"cuNC":"j#Q*KbvL"}
+{}
+{}
+{}
+{"e2mZBQPL9f0pgd0sXR":false}
+{}
+{}
+{}
+{}
+{}
+{}
+{}
+{}
+{}
+{}
+{}
+{}
+730
+200
diff --git a/tests/queries/0_stateless/02918_fuzzjson_table_function.sql b/tests/queries/0_stateless/02918_fuzzjson_table_function.sql
new file mode 100644
index 00000000000..6db0c69dbac
--- /dev/null
+++ b/tests/queries/0_stateless/02918_fuzzjson_table_function.sql
@@ -0,0 +1,106 @@
+-- Tags: no-parallel, no-replicated-database: Named collection is used
+
+SET allow_experimental_object_type = 1;
+--
+
+DROP NAMED COLLECTION IF EXISTS 02918_json_fuzzer;
+CREATE NAMED COLLECTION 02918_json_fuzzer AS json_str='{}';
+
+SELECT * FROM fuzzJSON(02918_json_fuzzer, random_seed=54321) LIMIT 10;
+SELECT * FROM fuzzJSON(02918_json_fuzzer, json_str='{"ClickHouse":"Is Fast"}', random_seed=1337) LIMIT 20;
+SELECT * FROM fuzzJSON(02918_json_fuzzer, json_str='{"students":[{"name":"Alice"}, {"name":"Bob"}]}', random_seed=1337) LIMIT 20;
+SELECT * FROM fuzzJSON(02918_json_fuzzer, json_str='{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}', random_seed=123456, reuse_output=true) LIMIT 20;
+SELECT * FROM fuzzJSON(02918_json_fuzzer, json_str='{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}', random_seed=123456, reuse_output=false) LIMIT 20;
+SELECT * FROM fuzzJSON(02918_json_fuzzer,
+    json_str='{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}',
+    random_seed=123456,
+    reuse_output=0,
+    max_output_length=128) LIMIT 20;
+
+SELECT * FROM fuzzJSON(02918_json_fuzzer,
+    json_str='{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}',
+    random_seed=123456,
+    reuse_output=0,
+    max_output_length=65536,
+    max_nesting_level=10,
+    max_array_size=20) LIMIT 20;
+
+SELECT * FROM fuzzJSON(02918_json_fuzzer,
+    random_seed=6667,
+    max_nesting_level=0) LIMIT 10;
+
+SELECT * FROM fuzzJSON(02918_json_fuzzer,
+    random_seed=6667,
+    max_object_size=0,
+    max_array_size=0) LIMIT 10;
+
+--
+DROP TABLE IF EXISTS 02918_table_str;
+CREATE TABLE 02918_table_str (json_str String) Engine=Memory;
+
+INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(02918_json_fuzzer) limit 10;
+INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(02918_json_fuzzer) limit 10;
+INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(02918_json_fuzzer, random_seed=123, reuse_output=true) limit 10;
+INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(
+    02918_json_fuzzer,
+    json_str='{"name": "John Doe", "age": 30, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
+    random_seed=6666) LIMIT 200;
+
+INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(
+    02918_json_fuzzer,
+    json_str='{"name": "John Doe", "age": 30, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
+    random_seed=6666,
+    min_key_length=1,
+    max_key_length=5) LIMIT 200;
+
+INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(
+    02918_json_fuzzer,
+    json_str='{"name": "John Doe", "age": 30, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
+    max_nesting_level=128,
+    reuse_output=true,
+    random_seed=6666,
+    min_key_length=5,
+    max_key_length=5) LIMIT 200;
+
+INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(
+    02918_json_fuzzer,
+    json_str='{"name": "John Doe", "age": 30, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
+    random_seed=6666,
+    reuse_output=1,
+    probability=0.5,
+    max_output_length=65536,
+    max_nesting_level=18446744073709551615,
+    max_array_size=18446744073709551615,
+    max_object_size=18446744073709551615,
+    max_key_length=65536,
+    max_string_value_length=65536) LIMIT 100;
+
+SELECT count() FROM 02918_table_str;
+
+DROP TABLE IF EXISTS 02918_table_str;
+
+--
+SELECT * FROM fuzzJSON(02918_json_fuzzer, max_output_length="Hello") LIMIT 10; -- { serverError BAD_ARGUMENTS }
+SELECT * FROM fuzzJSON(02918_json_fuzzer, max_output_length=65537) LIMIT 10; -- { serverError BAD_ARGUMENTS }
+SELECT * FROM fuzzJSON(02918_json_fuzzer, probability=10) LIMIT 10; -- { serverError BAD_ARGUMENTS }
+SELECT * FROM fuzzJSON(02918_json_fuzzer, probability=-0.1) LIMIT 10; -- { serverError BAD_ARGUMENTS }
+SELECT * FROM fuzzJSON(02918_json_fuzzer, probability=1.1) LIMIT 10; -- { serverError BAD_ARGUMENTS }
+SELECT * FROM fuzzJSON(02918_json_fuzzer, probability=1.1) LIMIT 10; -- { serverError BAD_ARGUMENTS }
+SELECT * FROM fuzzJSON(02918_json_fuzzer, max_string_value_length=65537) LIMIT 10; -- { serverError BAD_ARGUMENTS }
+SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=65537) LIMIT 10; -- { serverError BAD_ARGUMENTS }
+SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=0) LIMIT 10; -- { serverError BAD_ARGUMENTS }
+SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=11) LIMIT 10; -- { serverError BAD_ARGUMENTS }
+
+--
+DROP TABLE IF EXISTS 02918_table_obj;
+CREATE TABLE 02918_table_obj (json_obj Object('json')) Engine=Memory;
+
+INSERT INTO 02918_table_obj SELECT * FROM fuzzJSON(
+    02918_json_fuzzer,
+    json_str='{"name": "John Doe", "age": 27, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
+    random_seed=12345) LIMIT 200;
+SELECT count() FROM 02918_table_obj;
+
+DROP TABLE IF EXISTS 02918_table_obj;
+
+DROP NAMED COLLECTION IF EXISTS 02918_json_fuzzer;
diff --git a/tests/queries/0_stateless/02919_storage_fuzzjson.reference b/tests/queries/0_stateless/02919_storage_fuzzjson.reference
new file mode 100644
index 00000000000..a134ce52c11
--- /dev/null
+++ b/tests/queries/0_stateless/02919_storage_fuzzjson.reference
@@ -0,0 +1,3 @@
+100
+100
+100
diff --git a/tests/queries/0_stateless/02919_storage_fuzzjson.sql b/tests/queries/0_stateless/02919_storage_fuzzjson.sql
new file mode 100644
index 00000000000..80b4a406a08
--- /dev/null
+++ b/tests/queries/0_stateless/02919_storage_fuzzjson.sql
@@ -0,0 +1,44 @@
+DROP TABLE IF EXISTS 02919_test_table_noarg;
+CREATE TABLE 02919_test_table_noarg(str String) ENGINE = FuzzJSON('{}');
+
+SELECT count() FROM (SELECT * FROM 02919_test_table_noarg LIMIT 100);
+
+DROP TABLE IF EXISTS 02919_test_table_noarg;
+
+--
+DROP TABLE IF EXISTS 02919_test_table_valid_args;
+CREATE TABLE 02919_test_table_valid_args(str String) ENGINE = FuzzJSON(
+    '{"pet":"rat"}', NULL);
+
+SELECT count() FROM (SELECT * FROM 02919_test_table_valid_args LIMIT 100);
+
+DROP TABLE IF EXISTS 02919_test_table_valid_args;
+
+--
+DROP TABLE IF EXISTS 02919_test_table_reuse_args;
+CREATE TABLE 02919_test_table_reuse_args(str String) ENGINE = FuzzJSON(
+    '{
+      "name": "Jane Doe",
+      "age": 30,
+      "city": "New York",
+      "contacts": {
+        "email": "jane@example.com",
+        "phone": "+1234567890"
+      },
+      "skills": [
+        "JavaScript",
+        "Python",
+        {
+          "frameworks": ["React", "Django"]
+        }
+      ],
+      "projects": [
+        {"name": "Project A", "status": "completed"},
+        {"name": "Project B", "status": "in-progress"}
+      ]
+    }',
+    12345);
+
+SELECT count() FROM (SELECT * FROM 02919_test_table_reuse_args LIMIT 100);
+
+DROP TABLE IF EXISTS 02919_test_table_reuse_args;
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 86f59e52482..2ba72c11e78 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1,4 +1,4 @@
-personal_ws-1.1 en 2646 
+personal_ws-1.1 en 2646
 AArch
 ACLs
 ALTERs
@@ -1512,6 +1512,7 @@ fromUnixTimestampInJodaSyntax
 fsync
 func
 fuzzBits
+fuzzJSON
 fuzzer
 fuzzers
 gRPC

From 612c6be204088f2473c0bbe92cd8b950af5c4995 Mon Sep 17 00:00:00 2001
From: Antonio Bonuccelli <tony.bonuccelli@clickhouse.com>
Date: Tue, 28 Nov 2023 10:20:02 +0100
Subject: [PATCH 1064/1097] replace incorrect field name in coalesce example

field in the example should be `telegram` (not `icq`)
---
 docs/en/sql-reference/functions/functions-for-nulls.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md
index bde2a8a9505..91c04cfded3 100644
--- a/docs/en/sql-reference/functions/functions-for-nulls.md
+++ b/docs/en/sql-reference/functions/functions-for-nulls.md
@@ -164,7 +164,7 @@ Consider a list of contacts that may specify multiple ways to contact a customer
 └──────────┴──────┴───────────┴───────────┘
 ```
 
-The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
+The `mail` and `phone` fields are of type String, but the `telegram` field is `UInt32`, so it needs to be converted to `String`.
 
 Get the first available contact method for the customer from the contact list:
 

From 889c53eca013f1280ba9b442a98101201074ef00 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 28 Nov 2023 09:47:49 +0000
Subject: [PATCH 1065/1097] Correctly handle unknown changelog versions and
 snapshot deleting

---
 programs/keeper/keeper_config.xml          |  1 +
 src/Coordination/Changelog.cpp             |  7 ++++++-
 src/Coordination/Changelog.h               |  2 ++
 src/Coordination/KeeperLogStore.cpp        |  3 ++-
 src/Coordination/KeeperSnapshotManager.cpp | 14 +++++++++++---
 tests/config/config.d/keeper_port.xml      |  2 ++
 6 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/programs/keeper/keeper_config.xml b/programs/keeper/keeper_config.xml
index 3d728e2bfdf..4cf84cffc86 100644
--- a/programs/keeper/keeper_config.xml
+++ b/programs/keeper/keeper_config.xml
@@ -41,6 +41,7 @@
                 <min_session_timeout_ms>10000</min_session_timeout_ms>
                 <session_timeout_ms>100000</session_timeout_ms>
                 <raft_logs_level>information</raft_logs_level>
+                <compress_logs>false</compress_logs>
                 <!-- All settings listed in https://github.com/ClickHouse/ClickHouse/blob/master/src/Coordination/CoordinationSettings.h -->
             </coordination_settings>
 
diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index f715a02db28..7f1135eec94 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -516,7 +516,7 @@ public:
 
                 if (record.header.version > CURRENT_CHANGELOG_VERSION)
                     throw Exception(
-                        ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", record.header.version, filepath);
+                        ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", static_cast<uint8_t>(record.header.version), filepath);
 
                 /// Read data
                 if (record.header.blob_size != 0)
@@ -1480,4 +1480,9 @@ void Changelog::setRaftServer(const nuraft::ptr<nuraft::raft_server> & raft_serv
     raft_server = raft_server_;
 }
 
+bool Changelog::isInitialized() const
+{
+    return initialized;
+}
+
 }
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index 2bffa868f8a..20f850e3f62 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -153,6 +153,8 @@ public:
 
     void setRaftServer(const nuraft::ptr<nuraft::raft_server> & raft_server_);
 
+    bool isInitialized() const;
+
     /// Fsync log to disk
     ~Changelog();
 
diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp
index b55b083cb5c..8cff3419afc 100644
--- a/src/Coordination/KeeperLogStore.cpp
+++ b/src/Coordination/KeeperLogStore.cpp
@@ -127,7 +127,8 @@ void KeeperLogStore::shutdownChangelog()
 bool KeeperLogStore::flushChangelogAndShutdown()
 {
     std::lock_guard lock(changelog_lock);
-    changelog.flush();
+    if (changelog.isInitialized())
+        changelog.flush();
     changelog.shutdown();
     return true;
 }
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index 98f490facf2..fffa6eaa941 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -779,7 +779,7 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx)
     if (itr == existing_snapshots.end())
         throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx);
     const auto & [path, disk] = itr->second;
-    disk->removeFile(path);
+    disk->removeFileIfExists(path);
     existing_snapshots.erase(itr);
 }
 
@@ -809,8 +809,16 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor
     disk->removeFile(tmp_snapshot_file_name);
 
     existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk});
-    removeOutdatedSnapshotsIfNeeded();
-    moveSnapshotsIfNeeded();
+
+    try
+    {
+        removeOutdatedSnapshotsIfNeeded();
+        moveSnapshotsIfNeeded();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log, "Failed to cleanup and/or move older snapshots");
+    }
 
     return {snapshot_file_name, disk};
 }
diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml
index 1e646cd07a7..b87014d2485 100644
--- a/tests/config/config.d/keeper_port.xml
+++ b/tests/config/config.d/keeper_port.xml
@@ -20,6 +20,8 @@
             <election_timeout_lower_bound_ms>0</election_timeout_lower_bound_ms>
             <election_timeout_upper_bound_ms>0</election_timeout_upper_bound_ms>
 
+            <compress_logs>0</compress_logs>
+
             <async_replication>1</async_replication>
         </coordination_settings>
 

From ed855cf51350cdd0651fcceb1a13467938f4d30a Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 27 Nov 2023 19:52:17 +0000
Subject: [PATCH 1066/1097] Fix 02402_external_disk_mertrics with tmp data in
 cache

---
 src/Interpreters/TemporaryDataOnDisk.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp
index a424ed0ce4e..6589a974087 100644
--- a/src/Interpreters/TemporaryDataOnDisk.cpp
+++ b/src/Interpreters/TemporaryDataOnDisk.cpp
@@ -14,6 +14,11 @@
 #include <Core/Defines.h>
 #include <Interpreters/Cache/WriteBufferToFileSegment.h>
 
+namespace ProfileEvents
+{
+    extern const Event ExternalProcessingFilesTotal;
+}
+
 namespace DB
 {
 
@@ -97,6 +102,8 @@ FileSegmentsHolderPtr TemporaryDataOnDisk::createCacheFile(size_t max_file_size)
     if (!file_cache)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache");
 
+    ProfileEvents::increment(ProfileEvents::ExternalProcessingFilesTotal);
+
     const auto key = FileSegment::Key::random();
     auto holder = file_cache->set(key, 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true));
     fs::create_directories(file_cache->getPathInLocalCache(key));
@@ -120,7 +127,7 @@ TemporaryFileOnDiskHolder TemporaryDataOnDisk::createRegularFile(size_t max_file
     {
         disk = volume->getDisk();
     }
-
+    /// We do not increment ProfileEvents::ExternalProcessingFilesTotal here because it is incremented in TemporaryFileOnDisk constructor.
     return std::make_unique<TemporaryFileOnDisk>(disk, current_metric_scope);
 }
 

From 148ba939032006970a9648097e44723cc0a2100e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 28 Nov 2023 10:02:52 +0000
Subject: [PATCH 1067/1097] Add comment for
 AggregateFunctionCombinatorNull::transformAggregateFunction

---
 .../Combinators/AggregateFunctionNull.cpp                  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
index 1f5bf4c3818..4d9e6a0fe55 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
@@ -144,11 +144,18 @@ public:
             }
             else
             {
+                return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
+#if 0
                 if (serialize_flag)
                     return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
                 else
+                    /// This should be <false, false> (no serialize flag) but it was initially added incorrectly and
+                    /// changing it would break the binary compatibility of aggregation states using this method
+                    // (such as AggregateFunction(argMaxOrNull, Nullable(Int64), UInt64)). The extra flag is harmless
                     return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
             }
+#endif
+            }
         }
     }
 };

From ff0df49e2b83c82d39aadc9defc4828bc609426c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 28 Nov 2023 10:09:03 +0000
Subject: [PATCH 1068/1097] Fix comment in
 MergeTreeDataPartWide::checkConsistency

---
 src/Storages/MergeTree/MergeTreeDataPartWide.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
index 9b71c8df3a3..2fe9b6f3311 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
@@ -198,8 +198,10 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const
                     if (!stream_name)
                         throw Exception(
                             ErrorCodes::NO_FILE_IN_DATA_PART,
-                            "No {}.{} file checksum for column {} in part {}",
-                            *stream_name, DATA_FILE_EXTENSION, name_type.name, getDataPartStorage().getFullPath());
+                            "No stream ({}) file checksum for column {} in part {}",
+                            DATA_FILE_EXTENSION,
+                            name_type.name,
+                            getDataPartStorage().getFullPath());
 
                     auto mrk_file_name = *stream_name + marks_file_extension;
                     if (!checksums.files.contains(mrk_file_name))

From 3c5d908bbb31b9731ef0f1a1415b77448c2c6360 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 28 Nov 2023 10:09:26 +0000
Subject: [PATCH 1069/1097] Fix use before check in toDecimalString

---
 src/Functions/toDecimalString.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp
index 75593901bb3..cc2de8df0d4 100644
--- a/src/Functions/toDecimalString.cpp
+++ b/src/Functions/toDecimalString.cpp
@@ -225,10 +225,10 @@ private:
         if constexpr (is_decimal<T>)
         {
             const auto * from_col = checkAndGetColumn<ColumnDecimal<T>>(arguments[0].column.get());
-            UInt8 from_scale = from_col->getScale();
 
             if (from_col)
             {
+                UInt8 from_scale = from_col->getScale();
                 if (precision_col_const)
                     vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets, from_scale);
                 else if (precision_col)

From 04baafccbabffe17dd834c2a42deb054caac7037 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 27 Nov 2023 14:40:53 +0000
Subject: [PATCH 1070/1097] Analyzer: fix result type of aggregate function
 with NULL

---
 .../AggregateFunctionNothing.cpp              |  3 +--
 .../AggregateFunctionNothing.h                |  9 +++++++--
 .../Combinators/AggregateFunctionNull.cpp     | 10 +++++++---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 19 +++++++++++++++++-
 ..._analyzer_aggregate_nothing_type.reference | 13 ++++++++++++
 .../02922_analyzer_aggregate_nothing_type.sql | 20 +++++++++++++++++++
 6 files changed, 66 insertions(+), 8 deletions(-)
 create mode 100644 tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference
 create mode 100644 tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql

diff --git a/src/AggregateFunctions/AggregateFunctionNothing.cpp b/src/AggregateFunctions/AggregateFunctionNothing.cpp
index ebeffffc71e..f1ac04e9890 100644
--- a/src/AggregateFunctions/AggregateFunctionNothing.cpp
+++ b/src/AggregateFunctions/AggregateFunctionNothing.cpp
@@ -14,8 +14,7 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory & factory)
     {
         assertNoParameters(name, parameters);
 
-        auto result_type = argument_types.empty() ? std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()) : argument_types.front();
-        return std::make_shared<AggregateFunctionNothing>(argument_types, parameters, result_type);
+        return std::make_shared<AggregateFunctionNothing>(argument_types, parameters);
     });
 }
 
diff --git a/src/AggregateFunctions/AggregateFunctionNothing.h b/src/AggregateFunctions/AggregateFunctionNothing.h
index 8c1b95c26b0..3c530e71dc9 100644
--- a/src/AggregateFunctions/AggregateFunctionNothing.h
+++ b/src/AggregateFunctions/AggregateFunctionNothing.h
@@ -24,8 +24,13 @@ namespace ErrorCodes
 class AggregateFunctionNothing final : public IAggregateFunctionHelper<AggregateFunctionNothing>
 {
 public:
-    AggregateFunctionNothing(const DataTypes & arguments, const Array & params, const DataTypePtr & result_type_)
-        : IAggregateFunctionHelper<AggregateFunctionNothing>(arguments, params, result_type_) {}
+    AggregateFunctionNothing(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionHelper<AggregateFunctionNothing>(
+            arguments,
+            params,
+            arguments.empty() ? std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()) : arguments.front())
+    {
+    }
 
     String getName() const override
     {
diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
index 1f5bf4c3818..5e47af54d6c 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
@@ -101,10 +101,14 @@ public:
         if (has_null_types)
         {
             /// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64.
+            /// In that case we need to set type of first argument to UInt64 to have in as a result type.
             if (properties.returns_default_when_only_null)
-                return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeUInt64>());
-            else
-                return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()));
+            {
+                DataTypes new_arguments = {std::make_shared<DataTypeUInt64>()};
+                std::copy(arguments.begin(), arguments.end(), std::back_inserter(new_arguments));
+                return std::make_shared<AggregateFunctionNothing>(new_arguments, params);
+            }
+            return std::make_shared<AggregateFunctionNothing>(arguments, params);
         }
 
         assert(nested_function);
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 210c5b9e7cb..0b161693357 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -5185,7 +5185,24 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
 
         AggregateFunctionProperties properties;
         auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
-
+        if (aggregate_function->getName() == "nothing")
+        {
+            /** When we resolve aggregate function into `nothing` it may have different result type.
+              * For functions with peorperty `returns_default_when_only_null` set to true, it will be UInt64, for other functions it will be Null.
+              * Aggregate function `nothing` returns the same type as first argument has.
+              * In that case we may need to set first argument to correct type.
+              * Also we don't want to replace first argument, but just prepend it because it may have aliases, for example
+              * SELECT count(NULL AS a), sum(a) FROM table
+              */
+            const auto & actual_argument_types = aggregate_function->getArgumentTypes();
+            if (!actual_argument_types.empty() && !argument_types.empty() && !argument_types[0]->equals(*actual_argument_types[0]))
+            {
+                QueryTreeNodes & nodes = function_node.getArguments().getNodes();
+                QueryTreeNodes new_nodes = {std::make_shared<ConstantNode>(actual_argument_types[0]->getDefault(), actual_argument_types[0])};
+                std::move(nodes.begin(), nodes.end(), std::back_inserter(new_nodes));
+                nodes = std::move(new_nodes);
+            }
+        }
         function_node.resolveAsAggregateFunction(std::move(aggregate_function));
 
         return result_projection_names;
diff --git a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference
new file mode 100644
index 00000000000..f9c7b26d245
--- /dev/null
+++ b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference
@@ -0,0 +1,13 @@
+0
+0
+
+0
+0
+
+0
+0	\N
+
+0	\N
+0
+
+0
diff --git a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql
new file mode 100644
index 00000000000..25becc7dcaf
--- /dev/null
+++ b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql
@@ -0,0 +1,20 @@
+#!/usr/bin/env -S ${HOME}/clickhouse-client --progress --queries-file
+
+
+SELECT count(NULL) FROM remote('127.0.0.{1,2}', numbers(3)) GROUP BY number % 2 WITH TOTALS;
+
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (`n` UInt64) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO t1 SELECT * FROM numbers(10);
+
+SET
+    allow_experimental_parallel_reading_from_replicas=1,
+    max_parallel_replicas=2,
+    use_hedged_requests=0,
+    cluster_for_parallel_replicas='parallel_replicas',
+    parallel_replicas_for_non_replicated_merge_tree=1
+;
+
+SELECT count(NULL) FROM t1 WITH TOTALS;
+SELECT count(NULL as a), sum(a) FROM t1 WITH TOTALS;
+SELECT uniq(NULL) FROM t1 WITH TOTALS;

From 1f474a870af714504e8c88c64a7c70e1bdd2534e Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 27 Nov 2023 17:37:31 +0000
Subject: [PATCH 1071/1097] Add extra argument to function 'nothing' only in
 QueryAnalysisPass

---
 src/AggregateFunctions/AggregateFunctionNothing.cpp    |  3 ++-
 src/AggregateFunctions/AggregateFunctionNothing.h      |  9 ++-------
 .../Combinators/AggregateFunctionNull.cpp              | 10 +++-------
 src/Analyzer/Passes/QueryAnalysisPass.cpp              |  6 +++---
 4 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionNothing.cpp b/src/AggregateFunctions/AggregateFunctionNothing.cpp
index f1ac04e9890..ebeffffc71e 100644
--- a/src/AggregateFunctions/AggregateFunctionNothing.cpp
+++ b/src/AggregateFunctions/AggregateFunctionNothing.cpp
@@ -14,7 +14,8 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory & factory)
     {
         assertNoParameters(name, parameters);
 
-        return std::make_shared<AggregateFunctionNothing>(argument_types, parameters);
+        auto result_type = argument_types.empty() ? std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()) : argument_types.front();
+        return std::make_shared<AggregateFunctionNothing>(argument_types, parameters, result_type);
     });
 }
 
diff --git a/src/AggregateFunctions/AggregateFunctionNothing.h b/src/AggregateFunctions/AggregateFunctionNothing.h
index 3c530e71dc9..8c1b95c26b0 100644
--- a/src/AggregateFunctions/AggregateFunctionNothing.h
+++ b/src/AggregateFunctions/AggregateFunctionNothing.h
@@ -24,13 +24,8 @@ namespace ErrorCodes
 class AggregateFunctionNothing final : public IAggregateFunctionHelper<AggregateFunctionNothing>
 {
 public:
-    AggregateFunctionNothing(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionHelper<AggregateFunctionNothing>(
-            arguments,
-            params,
-            arguments.empty() ? std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()) : arguments.front())
-    {
-    }
+    AggregateFunctionNothing(const DataTypes & arguments, const Array & params, const DataTypePtr & result_type_)
+        : IAggregateFunctionHelper<AggregateFunctionNothing>(arguments, params, result_type_) {}
 
     String getName() const override
     {
diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
index 5e47af54d6c..1f5bf4c3818 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
@@ -101,14 +101,10 @@ public:
         if (has_null_types)
         {
             /// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64.
-            /// In that case we need to set type of first argument to UInt64 to have in as a result type.
             if (properties.returns_default_when_only_null)
-            {
-                DataTypes new_arguments = {std::make_shared<DataTypeUInt64>()};
-                std::copy(arguments.begin(), arguments.end(), std::back_inserter(new_arguments));
-                return std::make_shared<AggregateFunctionNothing>(new_arguments, params);
-            }
-            return std::make_shared<AggregateFunctionNothing>(arguments, params);
+                return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeUInt64>());
+            else
+                return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()));
         }
 
         assert(nested_function);
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 0b161693357..6caef751ae7 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -5194,11 +5194,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
               * Also we don't want to replace first argument, but just prepend it because it may have aliases, for example
               * SELECT count(NULL AS a), sum(a) FROM table
               */
-            const auto & actual_argument_types = aggregate_function->getArgumentTypes();
-            if (!actual_argument_types.empty() && !argument_types.empty() && !argument_types[0]->equals(*actual_argument_types[0]))
+            const auto & actual_result_type = aggregate_function->getResultType();
+            if (!argument_types.empty() && !argument_types.front()->equals(*actual_result_type))
             {
                 QueryTreeNodes & nodes = function_node.getArguments().getNodes();
-                QueryTreeNodes new_nodes = {std::make_shared<ConstantNode>(actual_argument_types[0]->getDefault(), actual_argument_types[0])};
+                QueryTreeNodes new_nodes = {std::make_shared<ConstantNode>(actual_result_type->getDefault(), actual_result_type)};
                 std::move(nodes.begin(), nodes.end(), std::back_inserter(new_nodes));
                 nodes = std::move(new_nodes);
             }

From 9a793bcd034658561c18dd071f94534138815d4a Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 27 Nov 2023 17:54:24 +0000
Subject: [PATCH 1072/1097] Revert fix result type of aggregate function with
 NULL

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 6caef751ae7..210c5b9e7cb 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -5185,24 +5185,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
 
         AggregateFunctionProperties properties;
         auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
-        if (aggregate_function->getName() == "nothing")
-        {
-            /** When we resolve aggregate function into `nothing` it may have different result type.
-              * For functions with peorperty `returns_default_when_only_null` set to true, it will be UInt64, for other functions it will be Null.
-              * Aggregate function `nothing` returns the same type as first argument has.
-              * In that case we may need to set first argument to correct type.
-              * Also we don't want to replace first argument, but just prepend it because it may have aliases, for example
-              * SELECT count(NULL AS a), sum(a) FROM table
-              */
-            const auto & actual_result_type = aggregate_function->getResultType();
-            if (!argument_types.empty() && !argument_types.front()->equals(*actual_result_type))
-            {
-                QueryTreeNodes & nodes = function_node.getArguments().getNodes();
-                QueryTreeNodes new_nodes = {std::make_shared<ConstantNode>(actual_result_type->getDefault(), actual_result_type)};
-                std::move(nodes.begin(), nodes.end(), std::back_inserter(new_nodes));
-                nodes = std::move(new_nodes);
-            }
-        }
+
         function_node.resolveAsAggregateFunction(std::move(aggregate_function));
 
         return result_projection_names;

From 8c4592f5c277bed5ad6d64eda9bad1dd67bd299d Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 28 Nov 2023 11:51:06 +0100
Subject: [PATCH 1073/1097] Follow up recommendations from #57167

---
 tests/ci/cherry_pick.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py
index e36c31b2089..e0c6b3ff8d5 100644
--- a/tests/ci/cherry_pick.py
+++ b/tests/ci/cherry_pick.py
@@ -58,7 +58,7 @@ class ReleaseBranch:
     CHERRYPICK_DESCRIPTION = """Original pull-request #{pr_number}
 
 This pull-request is a first step of an automated backporting.
-It contains changes like after calling a local command `git cherry-pick`.
+It contains changes similar to calling a local command `git cherry-pick`.
 If you intend to continue backporting this changes, then resolve all conflicts if any.
 Otherwise, if you do not want to backport them, then just close this pull-request.
 
@@ -66,19 +66,25 @@ The check results does not matter at this step - you can safely ignore them.
 
 ### Note
 
-This pull-request will be merged automatically as it reaches the mergeable state, \
-**do not merge it manually**. It's 100% safe, but completely meaningless.
+This pull-request will be merged automatically. Please, **do not merge it manually** \
+(but if you accidentally did, nothing bad will happen).
 
-### If the PR was closed and then reopened
+### If the PR was manually reopened after being closed
 
-If it stuck (e.g. for a day), check {pr_url} for `{backport_created_label}` *label* and \
-delete it if necessary. Manually merging will do nothing, since \
-`{backport_created_label}` *label* prevents the original PR {pr_url} from being \
-processed.
+If this PR is stuck (i.e. not automatically merged after one day), check {pr_url} for \
+`{backport_created_label}` *label* and delete it.
 
-If the cherry-pick PR is completely screwed, and you want to recreate it: delete the \
-`{label_cherrypick}` label and delete this branch.
-You may also need to delete the `{backport_created_label}` label from the original PR.
+Manually merging will do nothing. The `{backport_created_label}` *label* prevents the \
+original PR {pr_url} from being processed.
+
+If this cherry-pick PR is completely screwed by a wrong conflicts resolution, and you \
+want to recreate it:
+
+- delete the `{label_cherrypick}` label from the PR
+- delete this branch from the repository
+
+You also need to check the original PR {pr_url} for `{backport_created_label}`, and \
+delete if it's presented there
 """
     BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \
 backporting.

From 4aac18d651a9303bf30c42be796c6a4388a228ed Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 28 Nov 2023 12:01:33 +0100
Subject: [PATCH 1074/1097] Add another header for the wrong conflicts
 resolution

---
 tests/ci/cherry_pick.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py
index e0c6b3ff8d5..ac0fd88fde3 100644
--- a/tests/ci/cherry_pick.py
+++ b/tests/ci/cherry_pick.py
@@ -77,6 +77,8 @@ If this PR is stuck (i.e. not automatically merged after one day), check {pr_url
 Manually merging will do nothing. The `{backport_created_label}` *label* prevents the \
 original PR {pr_url} from being processed.
 
+### If the conflicts were resolved in a wrong way
+
 If this cherry-pick PR is completely screwed by a wrong conflicts resolution, and you \
 want to recreate it:
 

From af81cb8252e5bdcea0b29a23f8607b17e8f8b585 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 3 Jul 2023 16:49:57 +0000
Subject: [PATCH 1075/1097] Setting JoinAlgorithm respect specified order

---
 src/Core/Settings.h                           |   2 +-
 src/Core/SettingsFields.h                     |  31 +++--
 src/Core/tests/gtest_settings.cpp             |  94 +++++--------
 src/Interpreters/ConcurrentHashJoin.h         |   3 +
 src/Interpreters/DirectJoin.h                 |   1 +
 src/Interpreters/ExpressionAnalyzer.cpp       |  64 +++++----
 src/Interpreters/FullSortingMergeJoin.h       |   2 +
 src/Interpreters/GraceHashJoin.h              |   2 +
 src/Interpreters/HashJoin.h                   |   1 +
 src/Interpreters/IJoin.h                      |   1 +
 src/Interpreters/InterpreterSelectQuery.cpp   |   4 +-
 src/Interpreters/JoinSwitcher.h               |   2 +
 src/Interpreters/JoinedTables.cpp             |   7 +-
 src/Interpreters/MergeJoin.h                  |   1 +
 src/Interpreters/TableJoin.cpp                |   4 +-
 src/Interpreters/TableJoin.h                  |  14 +-
 src/Planner/PlannerJoins.cpp                  | 129 ++++++++++--------
 src/Storages/MySQL/MySQLSettings.h            |   2 +-
 .../01763_filter_push_down_bugs.reference     |   2 +-
 .../02496_remove_redundant_sorting.reference  |   4 +-
 .../02500_remove_redundant_distinct.reference |  18 +--
 .../02815_join_algorithm_setting.reference    |  13 ++
 .../02815_join_algorithm_setting.sql          |  99 ++++++++++++++
 23 files changed, 314 insertions(+), 186 deletions(-)
 create mode 100644 tests/queries/0_stateless/02815_join_algorithm_setting.reference
 create mode 100644 tests/queries/0_stateless/02815_join_algorithm_setting.sql

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index bb5e4322485..590e4b25c7c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -592,7 +592,7 @@ class IColumn;
     M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \
     M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
     M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \
-    M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
+    M(MySQLDataTypesSupport, mysql_datatypes_support_level, {}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
     M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
     M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
     M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h
index d796818e9cb..c2a3c5f2b3b 100644
--- a/src/Core/SettingsFields.h
+++ b/src/Core/SettingsFields.h
@@ -459,22 +459,25 @@ template <typename Enum, typename Traits>
 struct SettingFieldMultiEnum
 {
     using EnumType = Enum;
-    using ValueType = MultiEnum<Enum>;
-    using StorageType = typename ValueType::StorageType;
+    using ValueType = std::vector<Enum>;
 
     ValueType value;
     bool changed = false;
 
     explicit SettingFieldMultiEnum(ValueType v = ValueType{}) : value{v} {}
     explicit SettingFieldMultiEnum(EnumType e) : value{e} {}
-    explicit SettingFieldMultiEnum(StorageType s) : value(s) {}
     explicit SettingFieldMultiEnum(const Field & f) : value(parseValueFromString(f.safeGet<const String &>())) {}
 
     operator ValueType() const { return value; } /// NOLINT
-    explicit operator StorageType() const { return value.getValue(); }
     explicit operator Field() const { return toString(); }
+    operator MultiEnum<EnumType>() const /// NOLINT
+    {
+        MultiEnum<EnumType> res;
+        for (const auto & v : value)
+            res.set(v);
+        return res;
+    }
 
-    SettingFieldMultiEnum & operator= (StorageType x) { changed = true; value.setValue(x); return *this; }
     SettingFieldMultiEnum & operator= (ValueType x) { changed = true; value = x; return *this; }
     SettingFieldMultiEnum & operator= (const Field & x) { parseFromString(x.safeGet<const String &>()); return *this; }
 
@@ -482,14 +485,10 @@ struct SettingFieldMultiEnum
     {
         static const String separator = ",";
         String result;
-        for (StorageType i = 0; i < Traits::getEnumSize(); ++i)
+        for (const auto & v : value)
         {
-            const auto v = static_cast<Enum>(i);
-            if (value.isSet(v))
-            {
-                result += Traits::toString(v);
-                result += separator;
-            }
+            result += Traits::toString(v);
+            result += separator;
         }
 
         if (!result.empty())
@@ -508,6 +507,7 @@ private:
         static const String separators=", ";
 
         ValueType result;
+        std::unordered_set<EnumType> values_set;
 
         //to avoid allocating memory on substr()
         const std::string_view str_view{str};
@@ -519,7 +519,12 @@ private:
             if (value_end == std::string::npos)
                 value_end = str_view.size();
 
-            result.set(Traits::fromString(str_view.substr(value_start, value_end - value_start)));
+            auto value = Traits::fromString(str_view.substr(value_start, value_end - value_start));
+            /// Deduplicate values
+            auto [_, inserted] = values_set.emplace(value);
+            if (inserted)
+                result.push_back(value);
+
             value_start = str_view.find_first_not_of(separators, value_end);
         }
 
diff --git a/src/Core/tests/gtest_settings.cpp b/src/Core/tests/gtest_settings.cpp
index a6d8763bfb8..4f75eb922f2 100644
--- a/src/Core/tests/gtest_settings.cpp
+++ b/src/Core/tests/gtest_settings.cpp
@@ -27,16 +27,16 @@ bool operator== (const Field & f, const SettingFieldMultiEnum<Enum, Traits> & se
 
 }
 
-GTEST_TEST(MySQLDataTypesSupport, WithDefault)
+GTEST_TEST(SettingMySQLDataTypesSupport, WithDefault)
 {
     // Setting can be default-initialized and that means all values are unset.
     const SettingMySQLDataTypesSupport setting;
-    ASSERT_EQ(0, setting.value.getValue());
+    ASSERT_EQ(std::vector<MySQLDataTypesSupport>{}, setting.value);
     ASSERT_EQ("", setting.toString());
     ASSERT_EQ(setting, Field(""));
 
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
 }
 
 GTEST_TEST(SettingMySQLDataTypesSupport, WithDECIMAL)
@@ -44,10 +44,10 @@ GTEST_TEST(SettingMySQLDataTypesSupport, WithDECIMAL)
     // Setting can be initialized with MySQLDataTypesSupport::DECIMAL
     // and this value can be obtained in varios forms with getters.
     const SettingMySQLDataTypesSupport setting(MySQLDataTypesSupport::DECIMAL);
-    ASSERT_EQ(1, setting.value.getValue());
+    ASSERT_EQ(std::vector<MySQLDataTypesSupport>{MySQLDataTypesSupport::DECIMAL}, setting.value);
 
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
 
     ASSERT_EQ("decimal", setting.toString());
     ASSERT_EQ(Field("decimal"), setting);
@@ -57,95 +57,69 @@ GTEST_TEST(SettingMySQLDataTypesSupport, WithDATE)
 {
     SettingMySQLDataTypesSupport setting;
     setting = String("date2Date32");
-    ASSERT_EQ(4, setting.value.getValue());
+    ASSERT_EQ(std::vector<MySQLDataTypesSupport>{MySQLDataTypesSupport::DATE2DATE32}, setting.value);
 
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATE2DATE32));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATE2DATE32));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
 
     ASSERT_EQ("date2Date32", setting.toString());
     ASSERT_EQ(Field("date2Date32"), setting);
 
     setting = String("date2String");
-    ASSERT_EQ(8, setting.value.getValue());
+    ASSERT_EQ(std::vector<MySQLDataTypesSupport>{MySQLDataTypesSupport::DATE2STRING}, setting.value);
 
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATE2STRING));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATE2DATE32));
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATE2STRING));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATE2DATE32));
 
     ASSERT_EQ("date2String", setting.toString());
     ASSERT_EQ(Field("date2String"), setting);
 }
 
-GTEST_TEST(SettingMySQLDataTypesSupport, With1)
-{
-    // Setting can be initialized with int value corresponding to DECIMAL
-    // and rest of the test is the same as for that value.
-    const SettingMySQLDataTypesSupport setting(1u);
-    ASSERT_EQ(1, setting.value.getValue());
-
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
-
-    ASSERT_EQ("decimal", setting.toString());
-    ASSERT_EQ(Field("decimal"), setting);
-}
-
-GTEST_TEST(SettingMySQLDataTypesSupport, WithMultipleValues)
-{
-    // Setting can be initialized with int value corresponding to (DECIMAL | DATETIME64)
-    const SettingMySQLDataTypesSupport setting(3u);
-    ASSERT_EQ(3, setting.value.getValue());
-
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
-
-    ASSERT_EQ("decimal,datetime64", setting.toString());
-    ASSERT_EQ(Field("decimal,datetime64"), setting);
-}
-
 GTEST_TEST(SettingMySQLDataTypesSupport, SetString)
 {
     SettingMySQLDataTypesSupport setting;
     setting = String("decimal");
     ASSERT_TRUE(setting.changed);
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
     ASSERT_EQ("decimal", setting.toString());
     ASSERT_EQ(Field("decimal"), setting);
 
     setting = "datetime64,decimal";
     ASSERT_TRUE(setting.changed);
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
-    ASSERT_EQ("decimal,datetime64", setting.toString());
-    ASSERT_EQ(Field("decimal,datetime64"), setting);
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_EQ("datetime64,decimal", setting.toString());
+    ASSERT_EQ(Field("datetime64,decimal"), setting);
 
     // comma with spaces
     setting = " datetime64 ,    decimal "; /// bad punctuation is ok here
     ASSERT_TRUE(setting.changed);
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
-    ASSERT_EQ("decimal,datetime64", setting.toString());
-    ASSERT_EQ(Field("decimal,datetime64"), setting);
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_EQ("datetime64,decimal", setting.toString());
+    ASSERT_EQ(Field("datetime64,decimal"), setting);
 
     setting = String(",,,,,,,, ,decimal");
     ASSERT_TRUE(setting.changed);
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
     ASSERT_EQ("decimal", setting.toString());
     ASSERT_EQ(Field("decimal"), setting);
 
     setting = String(",decimal,decimal,decimal,decimal,decimal,decimal,decimal,decimal,decimal,");
     ASSERT_TRUE(setting.changed); //since previous value was DECIMAL
-    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
     ASSERT_EQ("decimal", setting.toString());
     ASSERT_EQ(Field("decimal"), setting);
 
     setting = String("");
     ASSERT_TRUE(setting.changed);
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
-    ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
+    ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
     ASSERT_EQ("", setting.toString());
     ASSERT_EQ(Field(""), setting);
 }
@@ -156,13 +130,13 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString)
     SettingMySQLDataTypesSupport setting;
     EXPECT_THROW(setting = String("FOOBAR"), Exception);
     ASSERT_FALSE(setting.changed);
-    ASSERT_EQ(0, setting.value.getValue());
+    ASSERT_EQ(std::vector<MySQLDataTypesSupport>{}, setting.value);
 
     EXPECT_THROW(setting = String("decimal,datetime64,123"), Exception);
     ASSERT_FALSE(setting.changed);
-    ASSERT_EQ(0, setting.value.getValue());
+    ASSERT_EQ(std::vector<MySQLDataTypesSupport>{}, setting.value);
 
     EXPECT_NO_THROW(setting = String(", "));
     ASSERT_TRUE(setting.changed);
-    ASSERT_EQ(0, setting.value.getValue());
+    ASSERT_EQ(std::vector<MySQLDataTypesSupport>{}, setting.value);
 }
diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h
index 3052c688e5f..775b0c67396 100644
--- a/src/Interpreters/ConcurrentHashJoin.h
+++ b/src/Interpreters/ConcurrentHashJoin.h
@@ -52,6 +52,9 @@ public:
     size_t getTotalByteCount() const override;
     bool alwaysReturnsEmptySet() const override;
     bool supportParallelJoin() const override { return true; }
+
+    String descriptionForPlan() const override { return "ConcurrentHashJoin"; }
+
     IBlocksStreamPtr
     getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
 
diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h
index 5f664314818..bb4e9e880b9 100644
--- a/src/Interpreters/DirectJoin.h
+++ b/src/Interpreters/DirectJoin.h
@@ -47,6 +47,7 @@ public:
     virtual bool alwaysReturnsEmptySet() const override { return false; }
 
     virtual bool isFilled() const override { return true; }
+    String descriptionForPlan() const override { return "DirectJoin"; }
 
     virtual IBlocksStreamPtr
     getNonJoinedBlocks(const Block &, const Block &, UInt64) const override
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 5b26084e440..1c724ac41c2 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -944,18 +944,16 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin(
 std::shared_ptr<DirectKeyValueJoin> tryKeyValueJoin(std::shared_ptr<TableJoin> analyzed_join, const Block & right_sample_block);
 
 
-static std::shared_ptr<IJoin> chooseJoinAlgorithm(
-    std::shared_ptr<TableJoin> analyzed_join, const ColumnsWithTypeAndName & left_sample_columns, std::unique_ptr<QueryPlan> & joined_plan, ContextPtr context)
+static std::shared_ptr<IJoin> tryCreateJoin(
+    JoinAlgorithm algorithm,
+    std::shared_ptr<TableJoin> analyzed_join,
+    const ColumnsWithTypeAndName & left_sample_columns,
+    const Block & right_sample_block,
+    std::unique_ptr<QueryPlan> & joined_plan,
+    ContextPtr context)
 {
-    const auto & settings = context->getSettings();
-
-    Block right_sample_block = joined_plan->getCurrentDataStream().header;
-
-    std::vector<String> tried_algorithms;
-
-    if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT))
+    if (algorithm == JoinAlgorithm::DIRECT || algorithm == JoinAlgorithm::DEFAULT)
     {
-        tried_algorithms.push_back(toString(JoinAlgorithm::DIRECT));
         JoinPtr direct_join = tryKeyValueJoin(analyzed_join, right_sample_block);
         if (direct_join)
         {
@@ -965,54 +963,63 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(
         }
     }
 
-    if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PARTIAL_MERGE) ||
-        analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE))
+    if (algorithm == JoinAlgorithm::PARTIAL_MERGE ||
+        algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE)
     {
-        tried_algorithms.push_back(toString(JoinAlgorithm::PARTIAL_MERGE));
         if (MergeJoin::isSupported(analyzed_join))
             return std::make_shared<MergeJoin>(analyzed_join, right_sample_block);
     }
 
-    if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::HASH) ||
+    if (algorithm == JoinAlgorithm::HASH ||
         /// partial_merge is preferred, but can't be used for specified kind of join, fallback to hash
-        analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE) ||
-        analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PARALLEL_HASH))
+        algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE ||
+        algorithm == JoinAlgorithm::PARALLEL_HASH ||
+        algorithm == JoinAlgorithm::DEFAULT)
     {
-        tried_algorithms.push_back(toString(JoinAlgorithm::HASH));
+        const auto & settings = context->getSettings();
+
         if (analyzed_join->allowParallelHashJoin())
             return std::make_shared<ConcurrentHashJoin>(context, analyzed_join, settings.max_threads, right_sample_block);
         return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
     }
 
-    if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE))
+    if (algorithm == JoinAlgorithm::FULL_SORTING_MERGE)
     {
-        tried_algorithms.push_back(toString(JoinAlgorithm::FULL_SORTING_MERGE));
         if (FullSortingMergeJoin::isSupported(analyzed_join))
             return std::make_shared<FullSortingMergeJoin>(analyzed_join, right_sample_block);
     }
 
-    if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH))
+    if (algorithm == JoinAlgorithm::GRACE_HASH)
     {
-        tried_algorithms.push_back(toString(JoinAlgorithm::GRACE_HASH));
-
         // Grace hash join requires that columns exist in left_sample_block.
         Block left_sample_block(left_sample_columns);
         if (sanitizeBlock(left_sample_block, false) && GraceHashJoin::isSupported(analyzed_join))
             return std::make_shared<GraceHashJoin>(context, analyzed_join, left_sample_block, right_sample_block, context->getTempDataOnDisk());
     }
 
-    if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::AUTO))
+    if (algorithm == JoinAlgorithm::AUTO)
     {
-        tried_algorithms.push_back(toString(JoinAlgorithm::AUTO));
-
         if (MergeJoin::isSupported(analyzed_join))
             return std::make_shared<JoinSwitcher>(analyzed_join, right_sample_block);
         return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
     }
+    return nullptr;
+}
+
+static std::shared_ptr<IJoin> chooseJoinAlgorithm(
+    std::shared_ptr<TableJoin> analyzed_join, const ColumnsWithTypeAndName & left_sample_columns, std::unique_ptr<QueryPlan> & joined_plan, ContextPtr context)
+{
+    Block right_sample_block = joined_plan->getCurrentDataStream().header;
+    const auto & join_algorithms = analyzed_join->getEnabledJoinAlgorithms();
+    for (const auto alg : join_algorithms)
+    {
+        auto join = tryCreateJoin(alg, analyzed_join, left_sample_columns, right_sample_block, joined_plan, context);
+        if (join)
+            return join;
+    }
 
     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-        "Can't execute {} join algorithm for this strictness/kind and right storage type",
-        fmt::join(tried_algorithms, " or "));
+        "Can't execute any of specified join algorithms for this strictness/kind and right storage type");
 }
 
 static std::unique_ptr<QueryPlan> buildJoinedPlan(
@@ -1070,9 +1077,6 @@ static std::unique_ptr<QueryPlan> buildJoinedPlan(
 
 std::shared_ptr<DirectKeyValueJoin> tryKeyValueJoin(std::shared_ptr<TableJoin> analyzed_join, const Block & right_sample_block)
 {
-    if (!analyzed_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT))
-        return nullptr;
-
     auto storage = analyzed_join->getStorageKeyValue();
     if (!storage)
         return nullptr;
diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h
index 3fc9f8920ed..35bf41bacad 100644
--- a/src/Interpreters/FullSortingMergeJoin.h
+++ b/src/Interpreters/FullSortingMergeJoin.h
@@ -104,6 +104,8 @@ public:
         throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::getTotalByteCount should not be called");
     }
 
+    String descriptionForPlan() const override { return "FullSortingMergeJoin"; }
+
     bool alwaysReturnsEmptySet() const override { return false; }
 
     IBlocksStreamPtr
diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h
index 44949440467..903715be489 100644
--- a/src/Interpreters/GraceHashJoin.h
+++ b/src/Interpreters/GraceHashJoin.h
@@ -75,6 +75,8 @@ public:
     size_t getTotalByteCount() const override;
     bool alwaysReturnsEmptySet() const override;
 
+    String descriptionForPlan() const override { return "GraceHashJoin"; }
+
     bool supportParallelJoin() const override { return true; }
     bool supportTotals() const override { return false; }
 
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index d125e56057f..739476fc032 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -173,6 +173,7 @@ public:
     ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add) const;
 
     bool isFilled() const override { return from_storage_join; }
+    String descriptionForPlan() const override { return "HashJoin"; }
 
     JoinPipelineType pipelineType() const override
     {
diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h
index 493a5dd2126..4b55cf7b869 100644
--- a/src/Interpreters/IJoin.h
+++ b/src/Interpreters/IJoin.h
@@ -85,6 +85,7 @@ public:
     /// Different query plan is used for such joins.
     virtual bool isFilled() const { return pipelineType() == JoinPipelineType::FilledRight; }
     virtual JoinPipelineType pipelineType() const { return JoinPipelineType::FillRightFirst; }
+    virtual String descriptionForPlan() const = 0;
 
     // That can run FillingRightJoinSideTransform parallelly
     virtual bool supportParallelJoin() const { return false; }
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index f61f6745024..a51a4f0ade0 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1645,7 +1645,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                         expressions.join,
                         settings.max_block_size);
 
-                    filled_join_step->setStepDescription("JOIN");
+                    filled_join_step->setStepDescription(expressions.join->descriptionForPlan());
                     query_plan.addStep(std::move(filled_join_step));
                 }
                 else
@@ -1734,7 +1734,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                         max_streams,
                         analysis_result.optimize_read_in_order);
 
-                    join_step->setStepDescription(fmt::format("JOIN {}", expressions.join->pipelineType()));
+                    join_step->setStepDescription(fmt::format("{} {}", expressions.join->pipelineType(), expressions.join->descriptionForPlan()));
                     std::vector<QueryPlanPtr> plans;
                     plans.emplace_back(std::make_unique<QueryPlan>(std::move(query_plan)));
                     plans.emplace_back(std::move(joined_plan));
diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h
index 1d2ebc6b456..ecbc259f158 100644
--- a/src/Interpreters/JoinSwitcher.h
+++ b/src/Interpreters/JoinSwitcher.h
@@ -77,6 +77,8 @@ public:
         return join->hasDelayedBlocks();
     }
 
+    String descriptionForPlan() const override { return "AutoSwitchJoin"; }
+
 private:
     JoinPtr join;
     SizeLimits limits;
diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp
index 29add31fd5d..911aded5091 100644
--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@@ -308,6 +308,7 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
 
     auto settings = context->getSettingsRef();
     MultiEnum<JoinAlgorithm> join_algorithm = settings.join_algorithm;
+    bool try_use_direct_join = join_algorithm.isSet(JoinAlgorithm::DIRECT) || join_algorithm.isSet(JoinAlgorithm::DEFAULT);
     auto table_join = std::make_shared<TableJoin>(settings, context->getGlobalTemporaryVolume());
 
     const ASTTablesInSelectQueryElement * ast_join = select_query_.join();
@@ -325,8 +326,7 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
                 table_join->setStorageJoin(storage_join);
             }
 
-            if (auto storage_dict = std::dynamic_pointer_cast<StorageDictionary>(storage);
-                storage_dict && join_algorithm.isSet(JoinAlgorithm::DIRECT))
+            if (auto storage_dict = std::dynamic_pointer_cast<StorageDictionary>(storage); storage_dict && try_use_direct_join)
             {
                 FunctionDictHelper dictionary_helper(context);
 
@@ -347,8 +347,7 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
                 table_join->setStorageJoin(dictionary_kv);
             }
 
-            if (auto storage_kv = std::dynamic_pointer_cast<IKeyValueEntity>(storage);
-                storage_kv && join_algorithm.isSet(JoinAlgorithm::DIRECT))
+            if (auto storage_kv = std::dynamic_pointer_cast<IKeyValueEntity>(storage); storage_kv && try_use_direct_join)
             {
                 table_join->setStorageJoin(storage_kv);
             }
diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h
index 98fae1d419f..84b013a9ba9 100644
--- a/src/Interpreters/MergeJoin.h
+++ b/src/Interpreters/MergeJoin.h
@@ -34,6 +34,7 @@ public:
     size_t getTotalByteCount() const override { return right_blocks.bytes; }
     /// Has to be called only after setTotals()/mergeRightBlocks()
     bool alwaysReturnsEmptySet() const override { return (is_right || is_inner) && min_max_right_blocks.empty(); }
+    String descriptionForPlan() const override { return "PartialMergeJoin"; }
 
     IBlocksStreamPtr getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
 
diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp
index 952cf0d5ec6..fa289b82aaf 100644
--- a/src/Interpreters/TableJoin.cpp
+++ b/src/Interpreters/TableJoin.cpp
@@ -936,7 +936,9 @@ void TableJoin::resetToCross()
 
 bool TableJoin::allowParallelHashJoin() const
 {
-    if (!right_storage_name.empty() || !join_algorithm.isSet(JoinAlgorithm::PARALLEL_HASH))
+    if (std::find(join_algorithm.begin(), join_algorithm.end(), JoinAlgorithm::PARALLEL_HASH) == join_algorithm.end())
+        return false;
+    if (!right_storage_name.empty())
         return false;
     if (table_join.kind != JoinKind::Left && table_join.kind != JoinKind::Inner)
         return false;
diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index 4dc2b72aa79..f97e6a74b8c 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -140,7 +140,7 @@ private:
     const size_t default_max_bytes = 0;
     const bool join_use_nulls = false;
     const size_t max_joined_block_rows = 0;
-    MultiEnum<JoinAlgorithm> join_algorithm = MultiEnum<JoinAlgorithm>(JoinAlgorithm::AUTO);
+    std::vector<JoinAlgorithm> join_algorithm;
     const size_t partial_merge_join_rows_in_right_blocks = 0;
     const size_t partial_merge_join_left_table_buffer_bytes = 0;
     const size_t max_files_to_merge = 0;
@@ -236,7 +236,7 @@ public:
         : size_limits(limits)
         , default_max_bytes(0)
         , join_use_nulls(use_nulls)
-        , join_algorithm(JoinAlgorithm::DEFAULT)
+        , join_algorithm({JoinAlgorithm::DEFAULT})
     {
         clauses.emplace_back().key_names_right = key_names_right;
         table_join.kind = kind;
@@ -253,16 +253,16 @@ public:
 
     ActionsDAGPtr createJoinedBlockActions(ContextPtr context) const;
 
+    const std::vector<JoinAlgorithm> & getEnabledJoinAlgorithms() const { return join_algorithm; }
+
     bool isEnabledAlgorithm(JoinAlgorithm val) const
     {
         /// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm.
         /// It's behaviour that was initially supported by clickhouse.
-        bool is_enabled_by_default = val == JoinAlgorithm::DEFAULT
-                                  || val == JoinAlgorithm::HASH
-                                  || val == JoinAlgorithm::DIRECT;
-        if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enabled_by_default)
+        bool is_default_enabled = std::find(join_algorithm.begin(), join_algorithm.end(), JoinAlgorithm::DEFAULT) != join_algorithm.end();
+        if (is_default_enabled && (val == JoinAlgorithm::DEFAULT || val == JoinAlgorithm::HASH || val == JoinAlgorithm::DIRECT))
             return true;
-        return join_algorithm.isSet(val);
+        return std::find(join_algorithm.begin(), join_algorithm.end(), val) != join_algorithm.end();
     }
 
     bool allowParallelHashJoin() const;
diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index 5f53c8e1fce..5fda2a8617d 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -558,7 +558,7 @@ void trySetStorageInTableJoin(const QueryTreeNodePtr & table_expression, std::sh
         return;
     }
 
-    if (!table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT))
+    if (!table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT) && !table_join->isEnabledAlgorithm(JoinAlgorithm::DEFAULT))
         return;
 
     if (auto storage_dictionary = std::dynamic_pointer_cast<StorageDictionary>(storage);
@@ -645,6 +645,74 @@ std::shared_ptr<DirectKeyValueJoin> tryDirectJoin(const std::shared_ptr<TableJoi
 
 }
 
+
+static std::shared_ptr<IJoin> tryCreateJoin(JoinAlgorithm algorithm,
+    std::shared_ptr<TableJoin> & table_join,
+    const QueryTreeNodePtr & right_table_expression,
+    const Block & left_table_expression_header,
+    const Block & right_table_expression_header,
+    const PlannerContextPtr & planner_context)
+{
+    /// Direct JOIN with special storages that support key value access. For example JOIN with Dictionary
+    if (algorithm == JoinAlgorithm::DIRECT || algorithm == JoinAlgorithm::DEFAULT)
+    {
+        JoinPtr direct_join = tryDirectJoin(table_join, right_table_expression, right_table_expression_header, planner_context);
+        if (direct_join)
+            return direct_join;
+    }
+
+    if (algorithm == JoinAlgorithm::PARTIAL_MERGE ||
+        algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE)
+    {
+        if (MergeJoin::isSupported(table_join))
+            return std::make_shared<MergeJoin>(table_join, right_table_expression_header);
+    }
+
+    if (algorithm == JoinAlgorithm::HASH ||
+        /// partial_merge is preferred, but can't be used for specified kind of join, fallback to hash
+        algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE ||
+        algorithm == JoinAlgorithm::PARALLEL_HASH ||
+        algorithm == JoinAlgorithm::DEFAULT)
+    {
+        if (table_join->allowParallelHashJoin())
+        {
+            auto query_context = planner_context->getQueryContext();
+            return std::make_shared<ConcurrentHashJoin>(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header);
+        }
+
+        return std::make_shared<HashJoin>(table_join, right_table_expression_header);
+    }
+
+    if (algorithm == JoinAlgorithm::FULL_SORTING_MERGE)
+    {
+        if (FullSortingMergeJoin::isSupported(table_join))
+            return std::make_shared<FullSortingMergeJoin>(table_join, right_table_expression_header);
+    }
+
+    if (algorithm == JoinAlgorithm::GRACE_HASH)
+    {
+        if (GraceHashJoin::isSupported(table_join))
+        {
+            auto query_context = planner_context->getQueryContext();
+            return std::make_shared<GraceHashJoin>(
+                query_context,
+                table_join,
+                left_table_expression_header,
+                right_table_expression_header,
+                query_context->getTempDataOnDisk());
+        }
+    }
+
+    if (algorithm == JoinAlgorithm::AUTO)
+    {
+        if (MergeJoin::isSupported(table_join))
+            return std::make_shared<JoinSwitcher>(table_join, right_table_expression_header);
+        return std::make_shared<HashJoin>(table_join, right_table_expression_header);
+    }
+
+    return nullptr;
+}
+
 std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_join,
     const QueryTreeNodePtr & right_table_expression,
     const Block & left_table_expression_header,
@@ -679,7 +747,7 @@ std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_jo
     if (table_join->isJoinWithConstant())
     {
         if (!table_join->isEnabledAlgorithm(JoinAlgorithm::HASH))
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "JOIN with constant supported only with join algorithm 'hash'");
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "JOIN ON constant supported only with join algorithm 'hash'");
 
         return std::make_shared<HashJoin>(table_join, right_table_expression_header);
     }
@@ -687,60 +755,11 @@ std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_jo
     if (!table_join->oneDisjunct() && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH) && !table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO))
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section");
 
-    /// Direct JOIN with special storages that support key value access. For example JOIN with Dictionary
-    if (table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT))
+    for (auto algorithm : table_join->getEnabledJoinAlgorithms())
     {
-        JoinPtr direct_join = tryDirectJoin(table_join, right_table_expression, right_table_expression_header, planner_context);
-        if (direct_join)
-            return direct_join;
-    }
-
-    if (table_join->isEnabledAlgorithm(JoinAlgorithm::PARTIAL_MERGE) ||
-        table_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE))
-    {
-        if (MergeJoin::isSupported(table_join))
-            return std::make_shared<MergeJoin>(table_join, right_table_expression_header);
-    }
-
-    if (table_join->isEnabledAlgorithm(JoinAlgorithm::HASH) ||
-        /// partial_merge is preferred, but can't be used for specified kind of join, fallback to hash
-        table_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE) ||
-        table_join->isEnabledAlgorithm(JoinAlgorithm::PARALLEL_HASH))
-    {
-        if (table_join->allowParallelHashJoin())
-        {
-            auto query_context = planner_context->getQueryContext();
-            return std::make_shared<ConcurrentHashJoin>(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header);
-        }
-
-        return std::make_shared<HashJoin>(table_join, right_table_expression_header);
-    }
-
-    if (table_join->isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE))
-    {
-        if (FullSortingMergeJoin::isSupported(table_join))
-            return std::make_shared<FullSortingMergeJoin>(table_join, right_table_expression_header);
-    }
-
-    if (table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH))
-    {
-        if (GraceHashJoin::isSupported(table_join))
-        {
-            auto query_context = planner_context->getQueryContext();
-            return std::make_shared<GraceHashJoin>(
-                query_context,
-                table_join,
-                left_table_expression_header,
-                right_table_expression_header,
-                query_context->getTempDataOnDisk());
-        }
-    }
-
-    if (table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO))
-    {
-        if (MergeJoin::isSupported(table_join))
-            return std::make_shared<JoinSwitcher>(table_join, right_table_expression_header);
-        return std::make_shared<HashJoin>(table_join, right_table_expression_header);
+        auto join = tryCreateJoin(algorithm, table_join, right_table_expression, left_table_expression_header, right_table_expression_header, planner_context);
+        if (join)
+            return join;
     }
 
     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
diff --git a/src/Storages/MySQL/MySQLSettings.h b/src/Storages/MySQL/MySQLSettings.h
index 850ac432aa1..363ac1eb10b 100644
--- a/src/Storages/MySQL/MySQLSettings.h
+++ b/src/Storages/MySQL/MySQLSettings.h
@@ -24,7 +24,7 @@ class ASTSetQuery;
     M(Bool, connection_auto_close, true, "Auto-close connection after query execution, i.e. disable connection reuse.", 0) \
     M(UInt64, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connect timeout (in seconds)", 0) \
     M(UInt64, read_write_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Read/write timeout (in seconds)", 0) \
-    M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
+    M(MySQLDataTypesSupport, mysql_datatypes_support_level, {}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
 
 DECLARE_SETTINGS_TRAITS(MySQLSettingsTraits, LIST_OF_MYSQL_SETTINGS)
 
diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
index c8045dd26f5..c1225e0a311 100644
--- a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
+++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
@@ -8,7 +8,7 @@ String1_0	String2_0	String3_0	String4_0	1
 1
 Expression ((Projection + Before ORDER BY))
   Filter (WHERE)
-    Join (JOIN FillRightFirst)
+    Join (FillRightFirst HashJoin)
       Filter (( + Before JOIN))
         ReadFromMergeTree (default.t1)
         Indexes:
diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
index ca1384fd177..e90717383a6 100644
--- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
+++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
@@ -115,7 +115,7 @@ FROM
 ) AS t2
 -- explain
 Expression ((Projection + Before ORDER BY))
-  Join (JOIN FillRightFirst)
+  Join (FillRightFirst HashJoin)
     Expression ((Before JOIN + Projection))
       Sorting (Sorting for ORDER BY)
         Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
@@ -163,7 +163,7 @@ ORDER BY t1.number, t2.number
 Expression (Projection)
   Sorting (Sorting for ORDER BY)
     Expression (Before ORDER BY)
-      Join (JOIN FillRightFirst)
+      Join (FillRightFirst HashJoin)
         Expression ((Before JOIN + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))
           ReadFromStorage (SystemNumbers)
         Expression ((Joined actions + (Rename joined columns + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))))
diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
index 763a7cc4286..43cdd743b4b 100644
--- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
+++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
@@ -78,7 +78,7 @@ Expression (Projection)
   Distinct
     Distinct (Preliminary DISTINCT)
       Expression (Before ORDER BY)
-        Join (JOIN FillRightFirst)
+        Join (FillRightFirst HashJoin)
           Expression ((Before JOIN + Projection))
             Distinct
               Distinct (Preliminary DISTINCT)
@@ -227,7 +227,7 @@ FROM
 Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
   Aggregating
     Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-      Join (JOIN FillRightFirst)
+      Join (FillRightFirst HashJoin)
         Expression (Before JOIN)
           ReadFromStorage (SystemNumbers)
         Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -260,7 +260,7 @@ Expression (Projection)
       Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
         Aggregating
           Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-            Join (JOIN FillRightFirst)
+            Join (FillRightFirst HashJoin)
               Expression (Before JOIN)
                 ReadFromStorage (SystemNumbers)
               Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -292,7 +292,7 @@ Expression (Projection)
         Rollup
           Aggregating
             Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-              Join (JOIN FillRightFirst)
+              Join (FillRightFirst HashJoin)
                 Expression (Before JOIN)
                   ReadFromStorage (SystemNumbers)
                 Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -322,7 +322,7 @@ Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
   Rollup
     Aggregating
       Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-        Join (JOIN FillRightFirst)
+        Join (FillRightFirst HashJoin)
           Expression (Before JOIN)
             ReadFromStorage (SystemNumbers)
           Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -357,7 +357,7 @@ Expression (Projection)
         Cube
           Aggregating
             Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-              Join (JOIN FillRightFirst)
+              Join (FillRightFirst HashJoin)
                 Expression (Before JOIN)
                   ReadFromStorage (SystemNumbers)
                 Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -387,7 +387,7 @@ Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
   Cube
     Aggregating
       Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-        Join (JOIN FillRightFirst)
+        Join (FillRightFirst HashJoin)
           Expression (Before JOIN)
             ReadFromStorage (SystemNumbers)
           Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -422,7 +422,7 @@ Expression (Projection)
         TotalsHaving
           Aggregating
             Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-              Join (JOIN FillRightFirst)
+              Join (FillRightFirst HashJoin)
                 Expression (Before JOIN)
                   ReadFromStorage (SystemNumbers)
                 Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -453,7 +453,7 @@ Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
   TotalsHaving
     Aggregating
       Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-        Join (JOIN FillRightFirst)
+        Join (FillRightFirst HashJoin)
           Expression (Before JOIN)
             ReadFromStorage (SystemNumbers)
           Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
diff --git a/tests/queries/0_stateless/02815_join_algorithm_setting.reference b/tests/queries/0_stateless/02815_join_algorithm_setting.reference
new file mode 100644
index 00000000000..94999dab6b5
--- /dev/null
+++ b/tests/queries/0_stateless/02815_join_algorithm_setting.reference
@@ -0,0 +1,13 @@
+1
+1	0
+1
+1	0
+1
+0	1
+1
+0	1
+1	0
+1
+1	0
+1	0
+0	1
diff --git a/tests/queries/0_stateless/02815_join_algorithm_setting.sql b/tests/queries/0_stateless/02815_join_algorithm_setting.sql
new file mode 100644
index 00000000000..fada864a07e
--- /dev/null
+++ b/tests/queries/0_stateless/02815_join_algorithm_setting.sql
@@ -0,0 +1,99 @@
+-- Tags: use-rocksdb
+
+-- DirectJoin still requires fixes for analyzer
+SET allow_experimental_analyzer = 0;
+
+DROP TABLE IF EXISTS rdb;
+DROP TABLE IF EXISTS t2;
+
+CREATE TABLE rdb ( `key` UInt32, `value` String )
+ENGINE = EmbeddedRocksDB PRIMARY KEY key;
+INSERT INTO rdb VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e');
+
+CREATE TABLE t2 ( `k` UInt16 ) ENGINE = TinyLog;
+INSERT INTO t2 VALUES (4), (5), (6);
+
+SELECT value == 'default' FROM system.settings WHERE name = 'join_algorithm';
+
+SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
+    EXPLAIN PLAN description = 1
+    SELECT * FROM ( SELECT k AS key FROM t2 ) AS t2
+    INNER JOIN rdb ON rdb.key = t2.key
+    ORDER BY key ASC
+);
+
+SET join_algorithm = 'direct, hash';
+
+SELECT value == 'direct,hash' FROM system.settings WHERE name = 'join_algorithm';
+
+SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
+    EXPLAIN PLAN description = 1
+    SELECT * FROM ( SELECT k AS key FROM t2 ) AS t2
+    INNER JOIN rdb ON rdb.key = t2.key
+    ORDER BY key ASC
+);
+
+SET join_algorithm = 'hash, direct';
+
+SELECT value == 'hash,direct' FROM system.settings WHERE name = 'join_algorithm';
+
+SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
+    EXPLAIN PLAN description = 1
+    SELECT * FROM ( SELECT k AS key FROM t2 ) AS t2
+    INNER JOIN rdb ON rdb.key = t2.key
+    ORDER BY key ASC
+);
+
+SET join_algorithm = 'grace_hash,hash';
+
+SELECT value == 'grace_hash,hash' FROM system.settings WHERE name = 'join_algorithm';
+
+SELECT countIf(explain like '%FillRightFirst%GraceHashJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
+    EXPLAIN PLAN description = 1
+    SELECT * FROM ( SELECT number AS key, number * 10 AS key2 FROM numbers_mt(10) ) AS t1
+    JOIN ( SELECT k AS key, k + 100 AS key2 FROM t2 ) AS t2 ON t1.key = t2.key OR t1.key2 = t2.key2
+);
+
+SELECT countIf(explain like '%FillRightFirst%GraceHashJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
+    EXPLAIN PLAN description = 1
+    SELECT * FROM ( SELECT number AS key, number * 10 AS key2 FROM numbers_mt(10) ) AS t1
+    JOIN ( SELECT k AS key, k + 100 AS key2 FROM t2 ) AS t2 ON t1.key = t2.key
+);
+
+SET join_algorithm = 'grace_hash, hash, auto';
+
+SELECT value = 'grace_hash,hash,auto' FROM system.settings WHERE name = 'join_algorithm';
+
+
+DROP DICTIONARY IF EXISTS dict;
+DROP TABLE IF EXISTS src;
+
+CREATE TABLE src (id UInt64, s String) ENGINE = MergeTree ORDER BY id
+AS SELECT number, toString(number) FROM numbers(1000000);
+
+CREATE DICTIONARY dict(
+  id UInt64,
+  s  String
+) PRIMARY KEY id
+SOURCE(CLICKHOUSE(TABLE 'src' DB currentDatabase()))
+LIFETIME (MIN 0 MAX 0)
+LAYOUT(HASHED());
+
+SET join_algorithm = 'default';
+
+SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
+    EXPLAIN description = 1
+    SELECT s FROM (SELECT toUInt64(9911) id) t1 INNER JOIN dict t2 USING (id)
+);
+
+SET join_algorithm = 'direct,hash';
+SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
+    EXPLAIN description = 1
+    SELECT s FROM (SELECT toUInt64(9911) id) t1 INNER JOIN dict t2 USING (id)
+);
+
+SET join_algorithm = 'hash,direct';
+SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
+    EXPLAIN description = 1
+    SELECT s FROM (SELECT toUInt64(9911) id) t1 INNER JOIN dict t2 USING (id)
+);

From 5c20e5039bb5d3fb444ee9aced0cd0acdc946bde Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 14 Nov 2023 18:52:46 +0000
Subject: [PATCH 1076/1097] Remove redundant descriptionForPlan

---
 src/Interpreters/ConcurrentHashJoin.h          |  2 --
 src/Interpreters/DirectJoin.h                  |  1 -
 src/Interpreters/FullSortingMergeJoin.h        |  2 --
 src/Interpreters/GraceHashJoin.h               |  2 --
 src/Interpreters/HashJoin.h                    |  1 -
 src/Interpreters/IJoin.h                       |  1 -
 src/Interpreters/InterpreterSelectQuery.cpp    |  3 +--
 src/Interpreters/JoinSwitcher.h                |  2 --
 src/Interpreters/MergeJoin.h                   |  1 -
 .../01763_filter_push_down_bugs.reference      |  2 +-
 .../02496_remove_redundant_sorting.reference   |  4 ++--
 .../02500_remove_redundant_distinct.reference  | 18 +++++++++---------
 12 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h
index 775b0c67396..40796376d23 100644
--- a/src/Interpreters/ConcurrentHashJoin.h
+++ b/src/Interpreters/ConcurrentHashJoin.h
@@ -53,8 +53,6 @@ public:
     bool alwaysReturnsEmptySet() const override;
     bool supportParallelJoin() const override { return true; }
 
-    String descriptionForPlan() const override { return "ConcurrentHashJoin"; }
-
     IBlocksStreamPtr
     getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
 
diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h
index bb4e9e880b9..5f664314818 100644
--- a/src/Interpreters/DirectJoin.h
+++ b/src/Interpreters/DirectJoin.h
@@ -47,7 +47,6 @@ public:
     virtual bool alwaysReturnsEmptySet() const override { return false; }
 
     virtual bool isFilled() const override { return true; }
-    String descriptionForPlan() const override { return "DirectJoin"; }
 
     virtual IBlocksStreamPtr
     getNonJoinedBlocks(const Block &, const Block &, UInt64) const override
diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h
index 35bf41bacad..3fc9f8920ed 100644
--- a/src/Interpreters/FullSortingMergeJoin.h
+++ b/src/Interpreters/FullSortingMergeJoin.h
@@ -104,8 +104,6 @@ public:
         throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::getTotalByteCount should not be called");
     }
 
-    String descriptionForPlan() const override { return "FullSortingMergeJoin"; }
-
     bool alwaysReturnsEmptySet() const override { return false; }
 
     IBlocksStreamPtr
diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h
index 903715be489..44949440467 100644
--- a/src/Interpreters/GraceHashJoin.h
+++ b/src/Interpreters/GraceHashJoin.h
@@ -75,8 +75,6 @@ public:
     size_t getTotalByteCount() const override;
     bool alwaysReturnsEmptySet() const override;
 
-    String descriptionForPlan() const override { return "GraceHashJoin"; }
-
     bool supportParallelJoin() const override { return true; }
     bool supportTotals() const override { return false; }
 
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index 739476fc032..d125e56057f 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -173,7 +173,6 @@ public:
     ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add) const;
 
     bool isFilled() const override { return from_storage_join; }
-    String descriptionForPlan() const override { return "HashJoin"; }
 
     JoinPipelineType pipelineType() const override
     {
diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h
index 4b55cf7b869..493a5dd2126 100644
--- a/src/Interpreters/IJoin.h
+++ b/src/Interpreters/IJoin.h
@@ -85,7 +85,6 @@ public:
     /// Different query plan is used for such joins.
     virtual bool isFilled() const { return pipelineType() == JoinPipelineType::FilledRight; }
     virtual JoinPipelineType pipelineType() const { return JoinPipelineType::FillRightFirst; }
-    virtual String descriptionForPlan() const = 0;
 
     // That can run FillingRightJoinSideTransform parallelly
     virtual bool supportParallelJoin() const { return false; }
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index a51a4f0ade0..814b61c514c 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1645,7 +1645,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                         expressions.join,
                         settings.max_block_size);
 
-                    filled_join_step->setStepDescription(expressions.join->descriptionForPlan());
                     query_plan.addStep(std::move(filled_join_step));
                 }
                 else
@@ -1734,7 +1733,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                         max_streams,
                         analysis_result.optimize_read_in_order);
 
-                    join_step->setStepDescription(fmt::format("{} {}", expressions.join->pipelineType(), expressions.join->descriptionForPlan()));
+                    join_step->setStepDescription(fmt::format("JOIN {}", expressions.join->pipelineType()));
                     std::vector<QueryPlanPtr> plans;
                     plans.emplace_back(std::make_unique<QueryPlan>(std::move(query_plan)));
                     plans.emplace_back(std::move(joined_plan));
diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h
index ecbc259f158..1d2ebc6b456 100644
--- a/src/Interpreters/JoinSwitcher.h
+++ b/src/Interpreters/JoinSwitcher.h
@@ -77,8 +77,6 @@ public:
         return join->hasDelayedBlocks();
     }
 
-    String descriptionForPlan() const override { return "AutoSwitchJoin"; }
-
 private:
     JoinPtr join;
     SizeLimits limits;
diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h
index 84b013a9ba9..98fae1d419f 100644
--- a/src/Interpreters/MergeJoin.h
+++ b/src/Interpreters/MergeJoin.h
@@ -34,7 +34,6 @@ public:
     size_t getTotalByteCount() const override { return right_blocks.bytes; }
     /// Has to be called only after setTotals()/mergeRightBlocks()
     bool alwaysReturnsEmptySet() const override { return (is_right || is_inner) && min_max_right_blocks.empty(); }
-    String descriptionForPlan() const override { return "PartialMergeJoin"; }
 
     IBlocksStreamPtr getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
 
diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
index c1225e0a311..c8045dd26f5 100644
--- a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
+++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference
@@ -8,7 +8,7 @@ String1_0	String2_0	String3_0	String4_0	1
 1
 Expression ((Projection + Before ORDER BY))
   Filter (WHERE)
-    Join (FillRightFirst HashJoin)
+    Join (JOIN FillRightFirst)
       Filter (( + Before JOIN))
         ReadFromMergeTree (default.t1)
         Indexes:
diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
index e90717383a6..ca1384fd177 100644
--- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
+++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
@@ -115,7 +115,7 @@ FROM
 ) AS t2
 -- explain
 Expression ((Projection + Before ORDER BY))
-  Join (FillRightFirst HashJoin)
+  Join (JOIN FillRightFirst)
     Expression ((Before JOIN + Projection))
       Sorting (Sorting for ORDER BY)
         Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
@@ -163,7 +163,7 @@ ORDER BY t1.number, t2.number
 Expression (Projection)
   Sorting (Sorting for ORDER BY)
     Expression (Before ORDER BY)
-      Join (FillRightFirst HashJoin)
+      Join (JOIN FillRightFirst)
         Expression ((Before JOIN + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))
           ReadFromStorage (SystemNumbers)
         Expression ((Joined actions + (Rename joined columns + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))))
diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
index 43cdd743b4b..763a7cc4286 100644
--- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
+++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
@@ -78,7 +78,7 @@ Expression (Projection)
   Distinct
     Distinct (Preliminary DISTINCT)
       Expression (Before ORDER BY)
-        Join (FillRightFirst HashJoin)
+        Join (JOIN FillRightFirst)
           Expression ((Before JOIN + Projection))
             Distinct
               Distinct (Preliminary DISTINCT)
@@ -227,7 +227,7 @@ FROM
 Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
   Aggregating
     Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-      Join (FillRightFirst HashJoin)
+      Join (JOIN FillRightFirst)
         Expression (Before JOIN)
           ReadFromStorage (SystemNumbers)
         Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -260,7 +260,7 @@ Expression (Projection)
       Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
         Aggregating
           Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-            Join (FillRightFirst HashJoin)
+            Join (JOIN FillRightFirst)
               Expression (Before JOIN)
                 ReadFromStorage (SystemNumbers)
               Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -292,7 +292,7 @@ Expression (Projection)
         Rollup
           Aggregating
             Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-              Join (FillRightFirst HashJoin)
+              Join (JOIN FillRightFirst)
                 Expression (Before JOIN)
                   ReadFromStorage (SystemNumbers)
                 Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -322,7 +322,7 @@ Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
   Rollup
     Aggregating
       Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-        Join (FillRightFirst HashJoin)
+        Join (JOIN FillRightFirst)
           Expression (Before JOIN)
             ReadFromStorage (SystemNumbers)
           Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -357,7 +357,7 @@ Expression (Projection)
         Cube
           Aggregating
             Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-              Join (FillRightFirst HashJoin)
+              Join (JOIN FillRightFirst)
                 Expression (Before JOIN)
                   ReadFromStorage (SystemNumbers)
                 Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -387,7 +387,7 @@ Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
   Cube
     Aggregating
       Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-        Join (FillRightFirst HashJoin)
+        Join (JOIN FillRightFirst)
           Expression (Before JOIN)
             ReadFromStorage (SystemNumbers)
           Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -422,7 +422,7 @@ Expression (Projection)
         TotalsHaving
           Aggregating
             Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-              Join (FillRightFirst HashJoin)
+              Join (JOIN FillRightFirst)
                 Expression (Before JOIN)
                   ReadFromStorage (SystemNumbers)
                 Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
@@ -453,7 +453,7 @@ Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
   TotalsHaving
     Aggregating
       Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-        Join (FillRightFirst HashJoin)
+        Join (JOIN FillRightFirst)
           Expression (Before JOIN)
             ReadFromStorage (SystemNumbers)
           Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))

From aa5d973204ce34b044e4dc813f955b211f0598d9 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 14 Nov 2023 18:53:34 +0000
Subject: [PATCH 1077/1097] Fix direct join with range dictionary in
 JoinedTables.cpp and update test queries

---
 src/Interpreters/JoinedTables.cpp                            | 3 ++-
 .../queries/0_stateless/02815_range_dict_no_direct_join.sql  | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp
index 911aded5091..c104af770f0 100644
--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@@ -326,7 +326,8 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
                 table_join->setStorageJoin(storage_join);
             }
 
-            if (auto storage_dict = std::dynamic_pointer_cast<StorageDictionary>(storage); storage_dict && try_use_direct_join)
+            auto storage_dict = std::dynamic_pointer_cast<StorageDictionary>(storage);
+            if (storage_dict && try_use_direct_join && storage_dict->getDictionary()->getSpecialKeyType() != DictionarySpecialKeyType::Range)
             {
                 FunctionDictHelper dictionary_helper(context);
 
diff --git a/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
index e3af53fa335..6ed195cf22c 100644
--- a/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
+++ b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
@@ -30,5 +30,6 @@ RANGE(MIN discount_start_date MAX discount_end_date);
 CREATE TABLE ids (id UInt64) ENGINE = Memory;
 INSERT INTO ids SELECT * FROM numbers(10);
 
-SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS join_algorithm = 'direct';
-SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS allow_experimental_analyzer = 1;
+SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS join_algorithm = 'direct,hash';
+SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS join_algorithm = 'default';
+SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS join_algorithm = 'direct'; -- { serverError NOT_IMPLEMENTED }

From 617a39399c328dbea68dd419c70ac3d4de713bcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 28 Nov 2023 11:13:57 +0000
Subject: [PATCH 1078/1097] Add back flaky tests to analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index ef338b032f3..b0e611fa77b 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -40,3 +40,6 @@
 02784_parallel_replicas_automatic_decision_join
 02818_parameterized_view_with_cte_multiple_usage
 02815_range_dict_no_direct_join
+# Flaky. Please don't delete them without fixing them:
+01600_parts_states_metrics_long
+01287_max_execution_speed

From 59cf90876c8684a58ec0afdaeee94c96e8073719 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 28 Nov 2023 12:18:20 +0100
Subject: [PATCH 1079/1097] Changes after a review

---
 tests/ci/cherry_pick.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py
index ac0fd88fde3..bf4fdc332d8 100644
--- a/tests/ci/cherry_pick.py
+++ b/tests/ci/cherry_pick.py
@@ -58,8 +58,8 @@ class ReleaseBranch:
     CHERRYPICK_DESCRIPTION = """Original pull-request #{pr_number}
 
 This pull-request is a first step of an automated backporting.
-It contains changes similar to calling a local command `git cherry-pick`.
-If you intend to continue backporting this changes, then resolve all conflicts if any.
+It contains changes similar to calling `git cherry-pick` locally.
+If you intend to continue backporting the changes, then resolve all conflicts if any.
 Otherwise, if you do not want to backport them, then just close this pull-request.
 
 The check results does not matter at this step - you can safely ignore them.
@@ -69,7 +69,9 @@ The check results does not matter at this step - you can safely ignore them.
 This pull-request will be merged automatically. Please, **do not merge it manually** \
 (but if you accidentally did, nothing bad will happen).
 
-### If the PR was manually reopened after being closed
+### Troubleshooting
+
+#### If the PR was manually reopened after being closed
 
 If this PR is stuck (i.e. not automatically merged after one day), check {pr_url} for \
 `{backport_created_label}` *label* and delete it.
@@ -77,7 +79,7 @@ If this PR is stuck (i.e. not automatically merged after one day), check {pr_url
 Manually merging will do nothing. The `{backport_created_label}` *label* prevents the \
 original PR {pr_url} from being processed.
 
-### If the conflicts were resolved in a wrong way
+#### If the conflicts were resolved in a wrong way
 
 If this cherry-pick PR is completely screwed by a wrong conflicts resolution, and you \
 want to recreate it:

From 6c229ab574df8ddf736226bf34b8c363b83ba019 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 28 Nov 2023 11:54:18 +0000
Subject: [PATCH 1080/1097] Clone original ast durin cloning query tree node

---
 src/Analyzer/IQueryTreeNode.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp
index 7f46de685a8..d61cb0ffab1 100644
--- a/src/Analyzer/IQueryTreeNode.cpp
+++ b/src/Analyzer/IQueryTreeNode.cpp
@@ -278,6 +278,7 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
         if (it != replacement_map.end())
             continue;
 
+        node_clone->original_ast = node_to_clone->original_ast;
         node_clone->setAlias(node_to_clone->alias);
         node_clone->children = node_to_clone->children;
         node_clone->weak_pointers = node_to_clone->weak_pointers;
@@ -318,6 +319,7 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
 
         *weak_pointer_ptr = it->second;
     }
+    result_cloned_node_place->original_ast = original_ast;
 
     return result_cloned_node_place;
 }

From 636ac3f349191d3201a1fc39cbe286f61d8aa534 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 28 Nov 2023 12:07:05 +0000
Subject: [PATCH 1081/1097] Fix assert

---
 .../MergeTree/ReplicatedMergeTreeAttachThread.cpp  |  9 +++------
 .../ReplicatedMergeTreeRestartingThread.cpp        | 14 +++++---------
 src/Storages/StorageReplicatedMergeTree.cpp        |  5 +----
 3 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp
index 9dc0a5d04f2..a544ac908a4 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp
@@ -73,11 +73,8 @@ void ReplicatedMergeTreeAttachThread::run()
             storage.initialization_done = true;
         }
 
-        if (!storage.is_readonly_metric_set)
-        {
-            storage.is_readonly_metric_set = true;
+        if (!std::exchange(storage.is_readonly_metric_set, true))
             CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
-        }
     }
 
     if (!first_try_done.exchange(true))
@@ -85,10 +82,10 @@ void ReplicatedMergeTreeAttachThread::run()
 
     if (shutdown_called)
     {
-        if (storage.is_readonly_metric_set)
+        if (std::exchange(storage.is_readonly_metric_set, false))
         {
-            storage.is_readonly_metric_set = false;
             CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
+            chassert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
         }
 
         LOG_WARNING(log, "Shutdown called, cancelling initialization");
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
index da30c914fac..d83e9c3f805 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
@@ -79,12 +79,10 @@ void ReplicatedMergeTreeRestartingThread::run()
 
     if (first_time)
     {
-        if (storage.is_readonly && !storage.is_readonly_metric_set)
-        {
-            storage.is_readonly_metric_set = true;
+        if (storage.is_readonly && !std::exchange(storage.is_readonly_metric_set, true))
             /// We failed to start replication, table is still readonly, so we should increment the metric. See also setNotReadonly().
             CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
-        }
+
         /// It does not matter if replication is actually started or not, just notify after the first attempt.
         storage.startup_event.set();
         first_time = false;
@@ -362,6 +360,7 @@ void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown)
 
     if (became_readonly)
     {
+        chassert(!storage.is_readonly_metric_set);
         storage.is_readonly_metric_set = true;
         CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
         return;
@@ -369,10 +368,8 @@ void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown)
 
     /// Replica was already readonly, but we should decrement the metric if it was set because we are detaching/dropping table.
     /// the task should be deactivated if it's full shutdown so no race is present
-    chassert(on_shutdown);
-    if (storage.is_readonly_metric_set)
+    if (on_shutdown && std::exchange(storage.is_readonly_metric_set, false))
     {
-        storage.is_readonly_metric_set = false;
         CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
         chassert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
     }
@@ -384,9 +381,8 @@ void ReplicatedMergeTreeRestartingThread::setNotReadonly()
     /// is_readonly is true on startup, but ReadonlyReplica metric is not incremented,
     /// because we don't want to change this metric if replication is started successfully.
     /// So we should not decrement it when replica stopped being readonly on startup.
-    if (storage.is_readonly.compare_exchange_strong(old_val, false) && storage.is_readonly_metric_set)
+    if (storage.is_readonly.compare_exchange_strong(old_val, false) && std::exchange(storage.is_readonly_metric_set, false))
     {
-        storage.is_readonly_metric_set = false;
         CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
         chassert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
     }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 3132cd00215..7258d9489bd 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4991,11 +4991,8 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
     /// Do not start replication if ZooKeeper is not configured or there is no metadata in zookeeper
     if (!has_metadata_in_zookeeper.has_value() || !*has_metadata_in_zookeeper)
     {
-        if (!is_readonly_metric_set)
-        {
-            is_readonly_metric_set = true;
+        if (!std::exchange(is_readonly_metric_set, true))
             CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
-        }
 
         LOG_TRACE(log, "No connection to ZooKeeper or no metadata in ZooKeeper, will not startup");
         return;

From 6e8e4a6ca5d1d9723438575a7a768d9065ebc419 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 28 Nov 2023 12:41:35 +0000
Subject: [PATCH 1082/1097] Lower level for annoying log

---
 src/IO/S3/Credentials.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index 73763853713..b0b33244015 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -649,7 +649,7 @@ Aws::String SSOCredentialsProvider::loadAccessTokenFile(const Aws::String & sso_
     }
     else
     {
-        LOG_TRACE(logger, "Unable to open token file on path: {}", sso_access_token_path);
+        LOG_TEST(logger, "Unable to open token file on path: {}", sso_access_token_path);
         return "";
     }
 }

From 20ce32f69ab950915e3f3423d293b6b878a13ef0 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 28 Nov 2023 11:54:48 +0000
Subject: [PATCH 1083/1097] Use original function name instead of nothing in
 FunctionNode::toAST

---
 .../Combinators/AggregateFunctionNull.cpp            | 11 ++++++++++-
 src/Analyzer/FunctionNode.cpp                        | 12 ++++++++++++
 .../02922_analyzer_aggregate_nothing_type.sql        |  6 +++++-
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
index 1f5bf4c3818..1e2d03ddbf9 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.cpp
@@ -100,7 +100,16 @@ public:
 
         if (has_null_types)
         {
-            /// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64.
+            /** Some functions, such as `count`, `uniq`, and others, return 0 :: UInt64 instead of NULL for a NULL argument.
+              * These functions have the `returns_default_when_only_null` property, so we explicitly specify the result type
+              * when replacing the function with `nothing`.
+              *
+              * Note: It's a bit dangerous to have the function result type depend on properties because we do not serialize properties in AST,
+              * and we can lose this information. For example, when we have `count(NULL)` replaced with `nothing(NULL) as "count(NULL)"` and send it
+              * to the remote server, the remote server will execute `nothing(NULL)` and return `NULL` while `0` is expected.
+              *
+              * To address this, we handle `nothing` in a special way in `FunctionNode::toASTImpl`.
+              */
             if (properties.returns_default_when_only_null)
                 return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeUInt64>());
             else
diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp
index 974e55042b4..e139883a281 100644
--- a/src/Analyzer/FunctionNode.cpp
+++ b/src/Analyzer/FunctionNode.cpp
@@ -203,6 +203,18 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
 
     function_ast->name = function_name;
 
+    if (function_name == "nothing")
+    {
+        /** Inside AggregateFunctionCombinatorNull we may replace functions with `NULL` in arguments with `nothing`.
+          * Result type of `nothing` depends on `returns_default_when_only_null` property of nested function.
+          * If we convert `nothing` to AST, we will lose this information, so we use original function name instead.
+          */
+        const auto & original_ast = getOriginalAST();
+        const auto & original_function_ast = original_ast ? original_ast->as<ASTFunction>() : nullptr;
+        if (original_function_ast)
+            function_ast->name = original_function_ast->name;
+    }
+
     if (isWindowFunction())
     {
         function_ast->is_window_function = true;
diff --git a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql
index 25becc7dcaf..987515527f0 100644
--- a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql
+++ b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql
@@ -16,5 +16,9 @@ SET
 ;
 
 SELECT count(NULL) FROM t1 WITH TOTALS;
-SELECT count(NULL as a), sum(a) FROM t1 WITH TOTALS;
+SELECT count(NULL as a), a FROM t1 WITH TOTALS;
+
+-- result differs in old and new analyzer:
+-- SELECT count(NULL as a), sum(a) FROM t1 WITH TOTALS;
+
 SELECT uniq(NULL) FROM t1 WITH TOTALS;

From 23e81d8234c48e03f2a72c76f8c73faa110c73ce Mon Sep 17 00:00:00 2001
From: abakhmetev <110221961+abakhmetev@users.noreply.github.com>
Date: Tue, 28 Nov 2023 17:26:42 +0300
Subject: [PATCH 1084/1097] Update date-time-functions.md

month as an integer number (01-12) == %m, and not %c
---
 docs/en/sql-reference/functions/date-time-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 565486275e6..65038f179a4 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -2540,7 +2540,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 |----------|---------------------------------------------------------|------------|
 | %a       | abbreviated weekday name (Mon-Sun)                      | Mon        |
 | %b       | abbreviated month name (Jan-Dec)                        | Jan        |
-| %c       | month as an integer number (01-12), see 'Note 3' below  | 01         |
+| %m       | month as an integer number (01-12), see 'Note 3' below  | 01         |
 | %C       | year divided by 100 and truncated to integer (00-99)    | 20         |
 | %d       | day of the month, zero-padded (01-31)                   | 02         |
 | %D       | Short MM/DD/YY date, equivalent to %m/%d/%y             | 01/02/18   |

From d10aeef23eae33365d0afd407685cb770547de76 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 28 Nov 2023 15:32:20 +0100
Subject: [PATCH 1085/1097] Add comment

---
 src/Coordination/KeeperServer.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 44b73be0cbc..50292de493a 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -329,6 +329,10 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
     params.return_method_ = nuraft::raft_params::async_handler;
 
     nuraft::asio_service::options asio_opts{};
+
+    /// If asio worker threads fail in any way, NuRaft will stop to make any progress
+    /// For that reason we need to supress out of memory exceptions in such threads
+    /// TODO: use `get_active_workers` to detect when we have no active workers to abort
     asio_opts.worker_start_ = [](uint32_t /*worker_id*/)
     {
         LockMemoryExceptionInThread::addUniqueLock(VariableContext::Global);

From 852b4d42b600df7f4eec754d9dfbd559ac2954fa Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 28 Nov 2023 16:00:06 +0100
Subject: [PATCH 1086/1097] Update src/Coordination/KeeperServer.cpp

---
 src/Coordination/KeeperServer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 50292de493a..bc5e3a723f2 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -331,7 +331,7 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
     nuraft::asio_service::options asio_opts{};
 
     /// If asio worker threads fail in any way, NuRaft will stop to make any progress
-    /// For that reason we need to supress out of memory exceptions in such threads
+    /// For that reason we need to suppress out of memory exceptions in such threads
     /// TODO: use `get_active_workers` to detect when we have no active workers to abort
     asio_opts.worker_start_ = [](uint32_t /*worker_id*/)
     {

From 745e9586099589129e36a85ffa97c3ef3c156c4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 28 Nov 2023 16:17:56 +0100
Subject: [PATCH 1087/1097] Address 02668_ulid_decoding flakiness

---
 tests/queries/0_stateless/02668_ulid_decoding.sql | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02668_ulid_decoding.sql b/tests/queries/0_stateless/02668_ulid_decoding.sql
index ecab5004df6..2effe58e1ea 100644
--- a/tests/queries/0_stateless/02668_ulid_decoding.sql
+++ b/tests/queries/0_stateless/02668_ulid_decoding.sql
@@ -1,6 +1,4 @@
--- Tags: no-fasttest
-
-SELECT dateDiff('minute', ULIDStringToDateTime(generateULID()), now()) = 0;
+SELECT dateDiff('minute', ULIDStringToDateTime(generateULID()), now()) <= 1;
 SELECT toTimezone(ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E'), 'America/Costa_Rica');
 SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Rica');
 SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9', 'America/Costa_Rica'); -- { serverError ILLEGAL_COLUMN }

From 8e84c734bf58cdf5e0990352eff3f2e8c8b6c8f1 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 28 Nov 2023 16:32:47 +0100
Subject: [PATCH 1088/1097] rename some code

---
 src/CMakeLists.txt                            |  2 +-
 src/Storages/IStorage.cpp                     |  2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  2 +-
 src/Storages/MergeTree/MergeTask.cpp          |  4 ++--
 src/Storages/MergeTree/MergeTreeData.cpp      |  4 ++--
 .../MergeTree/MergeTreeDataPartWriterOnDisk.h |  2 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |  2 +-
 .../MergeTree/MergeTreeWhereOptimizer.h       |  2 +-
 .../MergeTree/MergedBlockOutputStream.h       |  2 +-
 .../MergeTree/MergedColumnOnlyOutputStream.h  |  2 +-
 src/Storages/MergeTree/MutateTask.cpp         |  6 ++---
 .../{Statistic => Statistics}/Estimator.cpp   |  2 +-
 .../{Statistic => Statistics}/Estimator.h     |  2 +-
 .../Statistics.cpp}                           | 24 +++++++++----------
 .../Statistic.h => Statistics/Statistics.h}   |  6 ++---
 .../TDigestStatistic.cpp                      |  2 +-
 .../TDigestStatistic.h                        |  2 +-
 .../tests/gtest_stats.cpp                     |  4 ++--
 19 files changed, 37 insertions(+), 37 deletions(-)
 rename src/Storages/{Statistic => Statistics}/Estimator.cpp (98%)
 rename src/Storages/{Statistic => Statistics}/Estimator.h (98%)
 rename src/Storages/{Statistic/Statistic.cpp => Statistics/Statistics.cpp} (67%)
 rename src/Storages/{Statistic/Statistic.h => Statistics/Statistics.h} (93%)
 rename src/Storages/{Statistic => Statistics}/TDigestStatistic.cpp (92%)
 rename src/Storages/{Statistic => Statistics}/TDigestStatistic.h (91%)
 rename src/Storages/{Statistic => Statistics}/tests/gtest_stats.cpp (94%)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 38b5ef94d74..6063c701708 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -222,7 +222,7 @@ add_object_library(clickhouse_storages Storages)
 add_object_library(clickhouse_storages_mysql Storages/MySQL)
 add_object_library(clickhouse_storages_distributed Storages/Distributed)
 add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
-add_object_library(clickhouse_storages_statistic Storages/Statistic)
+add_object_library(clickhouse_storages_statistics Storages/Statistics)
 add_object_library(clickhouse_storages_liveview Storages/LiveView)
 add_object_library(clickhouse_storages_windowview Storages/WindowView)
 add_object_library(clickhouse_storages_s3queue Storages/S3Queue)
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index 0302f68dd96..af1c032bc56 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -12,7 +12,7 @@
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Storages/AlterCommands.h>
-#include <Storages/Statistic/Estimator.h>
+#include <Storages/Statistics/Estimator.h>
 #include <Backups/RestorerFromBackup.h>
 #include <Backups/IBackup.h>
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 847700553c9..38ecd8f4067 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -633,7 +633,7 @@ Statistics IMergeTreeDataPart::loadStatistics() const
 {
     const auto & metadata_snaphost = storage.getInMemoryMetadata();
 
-    auto total_statistics = MergeTreeStatisticFactory::instance().getMany(metadata_snaphost.getColumns());
+    auto total_statistics = MergeTreeStatisticsFactory::instance().getMany(metadata_snaphost.getColumns());
 
     Statistics result;
     for (auto & stat : total_statistics)
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 355c6a99e06..06e0712646a 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -17,7 +17,7 @@
 #include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
 #include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
 #include <Storages/MergeTree/MergeTreeIOSettings.h>
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistics/Statistics.h>
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
 #include <Storages/ColumnsDescription.h>
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 8f0ff5c45a3..e8e307bb148 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -1,5 +1,5 @@
 #include <Storages/MergeTree/IDataPartStorage.h>
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistics/Statistics.h>
 #include <Storages/MergeTree/MergeTask.h>
 
 #include <memory>
@@ -374,7 +374,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
         global_ctx->metadata_snapshot,
         global_ctx->merging_columns,
         MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()),
-        MergeTreeStatisticFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()),
+        MergeTreeStatisticsFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()),
         ctx->compression_codec,
         global_ctx->txn,
         /*reset_columns=*/ true,
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 0092fde55d4..b906641d281 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -76,7 +76,7 @@
 #include <Storages/MergeTree/MergeTreeDataPartCompact.h>
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergeTreeDataPartWide.h>
-#include <Storages/Statistic/Estimator.h>
+#include <Storages/Statistics/Estimator.h>
 #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
 #include <Storages/MergeTree/checkDataPart.h>
 #include <Storages/MutationCommands.h>
@@ -659,7 +659,7 @@ void MergeTreeData::checkProperties(
     for (const auto & col : new_metadata.columns)
     {
         if (col.stat)
-            MergeTreeStatisticFactory::instance().validate(*col.stat, col.type);
+            MergeTreeStatisticsFactory::instance().validate(*col.stat, col.type);
     }
 
     checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key_);
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
index 30d43b9c180..4d081778e68 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@@ -10,7 +10,7 @@
 #include <Disks/IDisk.h>
 #include <Parsers/ExpressionElementParsers.h>
 #include <Parsers/parseQuery.h>
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistics/Statistics.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index f223fda4920..2a381afa805 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -569,7 +569,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
         metadata_snapshot,
         columns,
         indices,
-        MergeTreeStatisticFactory::instance().getMany(metadata_snapshot->getColumns()),
+        MergeTreeStatisticsFactory::instance().getMany(metadata_snapshot->getColumns()),
         compression_codec,
         context->getCurrentTransaction(),
         false,
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index 0e0205b2b1e..0ef7ac9efff 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -4,7 +4,7 @@
 #include <Interpreters/Context_fwd.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/MergeTree/RPNBuilder.h>
-#include <Storages/Statistic/Estimator.h>
+#include <Storages/Statistics/Estimator.h>
 
 #include <boost/noncopyable.hpp>
 
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h
index 48eca3e71f6..770bcfb05a0 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.h
@@ -3,7 +3,7 @@
 #include <Storages/MergeTree/IMergedBlockOutputStream.h>
 #include <Columns/ColumnArray.h>
 #include <IO/WriteSettings.h>
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistics/Statistics.h>
 
 
 namespace DB
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
index 1a2c56a4f7b..ad3cabe459e 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Storages/MergeTree/IMergedBlockOutputStream.h>
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistics/Statistics.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 1cca22c0c04..6b6b5947581 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -3,7 +3,7 @@
 #include <Common/logger_useful.h>
 #include <Common/escapeForFileName.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistics/Statistics.h>
 #include <Columns/ColumnsNumber.h>
 #include <Parsers/queryToString.h>
 #include <Interpreters/SquashingTransform.h>
@@ -458,7 +458,7 @@ static ExecuteTTLType shouldExecuteTTL(const StorageMetadataPtr & metadata_snaps
 
 static std::set<StatisticPtr> getStatisticsToRecalculate(const StorageMetadataPtr & metadata_snapshot, const NameSet & materialized_stats)
 {
-    const auto & stats_factory = MergeTreeStatisticFactory::instance();
+    const auto & stats_factory = MergeTreeStatisticsFactory::instance();
     std::set<StatisticPtr> stats_to_recalc;
     const auto & columns = metadata_snapshot->getColumns();
     for (const auto & col_desc : columns)
@@ -1411,7 +1411,7 @@ private:
 
             if (ctx->materialized_statistics.contains(col.name))
             {
-                stats_to_rewrite.push_back(MergeTreeStatisticFactory::instance().get(*col.stat));
+                stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(*col.stat));
             }
             else
             {
diff --git a/src/Storages/Statistic/Estimator.cpp b/src/Storages/Statistics/Estimator.cpp
similarity index 98%
rename from src/Storages/Statistic/Estimator.cpp
rename to src/Storages/Statistics/Estimator.cpp
index 7ecd22358e5..7e0e465c7bf 100644
--- a/src/Storages/Statistic/Estimator.cpp
+++ b/src/Storages/Statistics/Estimator.cpp
@@ -1,4 +1,4 @@
-#include <Storages/Statistic/Estimator.h>
+#include <Storages/Statistics/Estimator.h>
 #include <Storages/MergeTree/RPNBuilder.h>
 
 namespace DB
diff --git a/src/Storages/Statistic/Estimator.h b/src/Storages/Statistics/Estimator.h
similarity index 98%
rename from src/Storages/Statistic/Estimator.h
rename to src/Storages/Statistics/Estimator.h
index 3190e0698fe..903bb57eb80 100644
--- a/src/Storages/Statistic/Estimator.h
+++ b/src/Storages/Statistics/Estimator.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Storages/Statistic/TDigestStatistic.h>
+#include <Storages/Statistics/TDigestStatistic.h>
 
 namespace DB
 {
diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistics/Statistics.cpp
similarity index 67%
rename from src/Storages/Statistic/Statistic.cpp
rename to src/Storages/Statistics/Statistics.cpp
index 38b05c316bc..6619eac19dc 100644
--- a/src/Storages/Statistic/Statistic.cpp
+++ b/src/Storages/Statistics/Statistics.cpp
@@ -2,8 +2,8 @@
 #include <numeric>
 
 #include <DataTypes/DataTypeNullable.h>
-#include <Storages/Statistic/Statistic.h>
-#include <Storages/Statistic/TDigestStatistic.h>
+#include <Storages/Statistics/Statistics.h>
+#include <Storages/Statistics/TDigestStatistic.h>
 #include <Storages/StatisticsDescription.h>
 #include <Storages/ColumnsDescription.h>
 #include <Common/Exception.h>
@@ -18,16 +18,16 @@ namespace ErrorCodes
     extern const int ILLEGAL_STATISTIC;
 }
 
-void MergeTreeStatisticFactory::registerCreator(StatisticType stat_type, Creator creator)
+void MergeTreeStatisticsFactory::registerCreator(StatisticType stat_type, Creator creator)
 {
     if (!creators.emplace(stat_type, std::move(creator)).second)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic creator type {} is not unique", stat_type);
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistic creator type {} is not unique", stat_type);
 }
 
-void MergeTreeStatisticFactory::registerValidator(StatisticType stat_type, Validator validator)
+void MergeTreeStatisticsFactory::registerValidator(StatisticType stat_type, Validator validator)
 {
     if (!validators.emplace(stat_type, std::move(validator)).second)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic validator type {} is not unique", stat_type);
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistic validator type {} is not unique", stat_type);
 
 }
 
@@ -44,19 +44,19 @@ void TDigestValidator(const StatisticDescription &, DataTypePtr data_type)
 }
 
 
-MergeTreeStatisticFactory::MergeTreeStatisticFactory()
+MergeTreeStatisticsFactory::MergeTreeStatisticsFactory()
 {
     registerCreator(TDigest, TDigestCreator);
     registerValidator(TDigest, TDigestValidator);
 }
 
-MergeTreeStatisticFactory & MergeTreeStatisticFactory::instance()
+MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance()
 {
-    static MergeTreeStatisticFactory instance;
+    static MergeTreeStatisticsFactory instance;
     return instance;
 }
 
-void MergeTreeStatisticFactory::validate(const StatisticDescription & stat, DataTypePtr data_type) const
+void MergeTreeStatisticsFactory::validate(const StatisticDescription & stat, DataTypePtr data_type) const
 {
     auto it = validators.find(stat.type);
     if (it == validators.end())
@@ -66,7 +66,7 @@ void MergeTreeStatisticFactory::validate(const StatisticDescription & stat, Data
     it->second(stat, data_type);
 }
 
-StatisticPtr MergeTreeStatisticFactory::get(const StatisticDescription & stat) const
+StatisticPtr MergeTreeStatisticsFactory::get(const StatisticDescription & stat) const
 {
     auto it = creators.find(stat.type);
     if (it == creators.end())
@@ -77,7 +77,7 @@ StatisticPtr MergeTreeStatisticFactory::get(const StatisticDescription & stat) c
     return std::make_shared<TDigestStatistic>(stat);
 }
 
-Statistics MergeTreeStatisticFactory::getMany(const ColumnsDescription & columns) const
+Statistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription & columns) const
 {
     Statistics result;
     for (const auto & col : columns)
diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistics/Statistics.h
similarity index 93%
rename from src/Storages/Statistic/Statistic.h
rename to src/Storages/Statistics/Statistics.h
index 132b453e465..e6d9666ce1c 100644
--- a/src/Storages/Statistic/Statistic.h
+++ b/src/Storages/Statistics/Statistics.h
@@ -63,10 +63,10 @@ protected:
 
 class ColumnsDescription;
 
-class MergeTreeStatisticFactory : private boost::noncopyable
+class MergeTreeStatisticsFactory : private boost::noncopyable
 {
 public:
-    static MergeTreeStatisticFactory & instance();
+    static MergeTreeStatisticsFactory & instance();
 
     void validate(const StatisticDescription & stat, DataTypePtr data_type) const;
 
@@ -82,7 +82,7 @@ public:
     void registerValidator(StatisticType type, Validator validator);
 
 protected:
-    MergeTreeStatisticFactory();
+    MergeTreeStatisticsFactory();
 
 private:
     using Creators = std::unordered_map<StatisticType, Creator>;
diff --git a/src/Storages/Statistic/TDigestStatistic.cpp b/src/Storages/Statistics/TDigestStatistic.cpp
similarity index 92%
rename from src/Storages/Statistic/TDigestStatistic.cpp
rename to src/Storages/Statistics/TDigestStatistic.cpp
index cb10902c64c..efb4282d203 100644
--- a/src/Storages/Statistic/TDigestStatistic.cpp
+++ b/src/Storages/Statistics/TDigestStatistic.cpp
@@ -1,4 +1,4 @@
-#include <Storages/Statistic/TDigestStatistic.h>
+#include <Storages/Statistics/TDigestStatistic.h>
 
 namespace DB
 {
diff --git a/src/Storages/Statistic/TDigestStatistic.h b/src/Storages/Statistics/TDigestStatistic.h
similarity index 91%
rename from src/Storages/Statistic/TDigestStatistic.h
rename to src/Storages/Statistics/TDigestStatistic.h
index b7e31eef363..295b5f69900 100644
--- a/src/Storages/Statistic/TDigestStatistic.h
+++ b/src/Storages/Statistics/TDigestStatistic.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistics/Statistics.h>
 
 namespace DB
 {
diff --git a/src/Storages/Statistic/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp
similarity index 94%
rename from src/Storages/Statistic/tests/gtest_stats.cpp
rename to src/Storages/Statistics/tests/gtest_stats.cpp
index 52e60a7b030..45f8271be97 100644
--- a/src/Storages/Statistic/tests/gtest_stats.cpp
+++ b/src/Storages/Statistics/tests/gtest_stats.cpp
@@ -1,8 +1,8 @@
 #include <gtest/gtest.h>
 
-#include <Storages/Statistic/Statistic.h>
+#include <Storages/Statistics/Statistics.h>
 
-TEST(Statistic, TDigestLessThan)
+TEST(Statistics, TDigestLessThan)
 {
     /// this is the simplest data which is continuous integeters.
     /// so the estimated errors should be low.

From e4421e28082009fcf258bfe44d936b8d4464e746 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 28 Nov 2023 16:46:55 +0100
Subject: [PATCH 1089/1097] remove wrong code

---
 src/Storages/ColumnsDescription.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 5a9f5f94336..cf8f341f9ff 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -71,7 +71,6 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
     return name == other.name
         && type->equals(*other.type)
         && default_desc == other.default_desc
-        && comment == other.comment
         && stat == other.stat
         && ast_to_str(codec) == ast_to_str(other.codec)
         && ast_to_str(ttl) == ast_to_str(other.ttl);

From 8aea098b8925b7540b8efa98011fcb30ce246e3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 28 Nov 2023 17:09:05 +0100
Subject: [PATCH 1090/1097] Ulid requires external libraries

---
 tests/queries/0_stateless/02668_ulid_decoding.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02668_ulid_decoding.sql b/tests/queries/0_stateless/02668_ulid_decoding.sql
index 2effe58e1ea..85344bdf49e 100644
--- a/tests/queries/0_stateless/02668_ulid_decoding.sql
+++ b/tests/queries/0_stateless/02668_ulid_decoding.sql
@@ -1,3 +1,5 @@
+-- Tags: no-fasttest
+
 SELECT dateDiff('minute', ULIDStringToDateTime(generateULID()), now()) <= 1;
 SELECT toTimezone(ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E'), 'America/Costa_Rica');
 SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Rica');

From e9faad77cbceee5e126a408249fc6f3f693c27e6 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Tue, 28 Nov 2023 17:41:51 +0100
Subject: [PATCH 1091/1097] Simplify docs

---
 .../server-configuration-parameters/settings.md           | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 5957e150518..d2a23e61708 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -1837,10 +1837,10 @@ Settings:
 
 - `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’.
 - `port` – Port for `endpoint`.
-- `metrics` – Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
-- `events` – Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
-- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
-- `errors` - Flag that sets to expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well.
+- `metrics` – Expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
+- `events` – Expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
+- `asynchronous_metrics` – Expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
+- `errors` - Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well.
 
 **Example**
 

From 1cb5dcd849d3eb035b7e97a59faf69e883cde259 Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@clickhouse.com>
Date: Tue, 28 Nov 2023 17:56:53 +0100
Subject: [PATCH 1092/1097] Update README.md

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index d0fd19c0b73..7642cb100ed 100644
--- a/README.md
+++ b/README.md
@@ -33,8 +33,6 @@ curl https://clickhouse.com/ | sh
 
 ## Upcoming Events
 
-* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 14
-* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/296334976/) - Nov 15
 * [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
 * [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
 * [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12

From a6eb2ad930825b41d3121152907210b88ebbe464 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 15 Nov 2023 10:45:21 +0000
Subject: [PATCH 1093/1097] Fix JoinStep::describeJoinActions

---
 src/Core/Settings.h                           |  2 +-
 src/Interpreters/InterpreterSelectQuery.cpp   |  1 +
 src/Processors/QueryPlan/JoinStep.cpp         | 61 +++++++++++++------
 src/Processors/QueryPlan/JoinStep.h           |  3 +
 src/Storages/MySQL/MySQLSettings.h            |  2 +-
 .../02815_join_algorithm_setting.sql          | 35 +++++------
 6 files changed, 63 insertions(+), 41 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 590e4b25c7c..ddc803af0ab 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -592,7 +592,7 @@ class IColumn;
     M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \
     M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
     M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \
-    M(MySQLDataTypesSupport, mysql_datatypes_support_level, {}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
+    M(MySQLDataTypesSupport, mysql_datatypes_support_level, std::vector<MySQLDataTypesSupport>{}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
     M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
     M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
     M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 814b61c514c..f61f6745024 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1645,6 +1645,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                         expressions.join,
                         settings.max_block_size);
 
+                    filled_join_step->setStepDescription("JOIN");
                     query_plan.addStep(std::move(filled_join_step));
                 }
                 else
diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp
index 9ac0f18d2c3..1931b1eb3a1 100644
--- a/src/Processors/QueryPlan/JoinStep.cpp
+++ b/src/Processors/QueryPlan/JoinStep.cpp
@@ -15,6 +15,29 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
+std::vector<std::pair<String, String>> describeJoinActions(const JoinPtr & join)
+{
+    std::vector<std::pair<String, String>> description;
+    const auto & table_join = join->getTableJoin();
+
+    description.emplace_back("Type", toString(table_join.kind()));
+    description.emplace_back("Strictness", toString(table_join.strictness()));
+    description.emplace_back("Algorithm", join->getName());
+
+    if (table_join.strictness() == JoinStrictness::Asof)
+        description.emplace_back("ASOF inequality", toString(table_join.getAsofInequality()));
+
+    if (!table_join.getClauses().empty())
+        description.emplace_back("Clauses", table_join.formatClauses(table_join.getClauses(), true /*short_format*/));
+
+    return description;
+}
+
+}
+
 JoinStep::JoinStep(
     const DataStream & left_stream_,
     const DataStream & right_stream_,
@@ -65,30 +88,14 @@ void JoinStep::describeActions(FormatSettings & settings) const
 {
     String prefix(settings.offset, ' ');
 
-    const auto & table_join = join->getTableJoin();
-    settings.out << prefix << "Type: " << toString(table_join.kind()) << '\n';
-    settings.out << prefix << "Strictness: " << toString(table_join.strictness()) << '\n';
-    settings.out << prefix << "Algorithm: " << join->getName() << '\n';
-
-    if (table_join.strictness() == JoinStrictness::Asof)
-        settings.out << prefix << "ASOF inequality: " << toString(table_join.getAsofInequality()) << '\n';
-
-    if (!table_join.getClauses().empty())
-        settings.out << prefix << "Clauses: " << table_join.formatClauses(table_join.getClauses(), true /*short_format*/) << '\n';
+    for (const auto & [name, value] : describeJoinActions(join))
+        settings.out << prefix << name << ": " << value << '\n';
 }
 
 void JoinStep::describeActions(JSONBuilder::JSONMap & map) const
 {
-    const auto & table_join = join->getTableJoin();
-    map.add("Type", toString(table_join.kind()));
-    map.add("Strictness", toString(table_join.strictness()));
-    map.add("Algorithm", join->getName());
-
-    if (table_join.strictness() == JoinStrictness::Asof)
-        map.add("ASOF inequality", toString(table_join.getAsofInequality()));
-
-    if (!table_join.getClauses().empty())
-        map.add("Clauses", table_join.formatClauses(table_join.getClauses(), true /*short_format*/));
+    for (const auto & [name, value] : describeJoinActions(join))
+        map.add(name, value);
 }
 
 void JoinStep::updateOutputStream()
@@ -151,4 +158,18 @@ void FilledJoinStep::updateOutputStream()
         input_streams.front(), JoiningTransform::transformHeader(input_streams.front().header, join), getDataStreamTraits());
 }
 
+void FilledJoinStep::describeActions(FormatSettings & settings) const
+{
+    String prefix(settings.offset, ' ');
+
+    for (const auto & [name, value] : describeJoinActions(join))
+        settings.out << prefix << name << ": " << value << '\n';
+}
+
+void FilledJoinStep::describeActions(JSONBuilder::JSONMap & map) const
+{
+    for (const auto & [name, value] : describeJoinActions(join))
+        map.add(name, value);
+}
+
 }
diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h
index 5bfaa41f9b6..a9059a083fe 100644
--- a/src/Processors/QueryPlan/JoinStep.h
+++ b/src/Processors/QueryPlan/JoinStep.h
@@ -54,6 +54,9 @@ public:
     String getName() const override { return "FilledJoin"; }
     void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
 
+    void describeActions(JSONBuilder::JSONMap & map) const override;
+    void describeActions(FormatSettings & settings) const override;
+
     const JoinPtr & getJoin() const { return join; }
 
 private:
diff --git a/src/Storages/MySQL/MySQLSettings.h b/src/Storages/MySQL/MySQLSettings.h
index 363ac1eb10b..2526fc3673b 100644
--- a/src/Storages/MySQL/MySQLSettings.h
+++ b/src/Storages/MySQL/MySQLSettings.h
@@ -24,7 +24,7 @@ class ASTSetQuery;
     M(Bool, connection_auto_close, true, "Auto-close connection after query execution, i.e. disable connection reuse.", 0) \
     M(UInt64, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connect timeout (in seconds)", 0) \
     M(UInt64, read_write_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Read/write timeout (in seconds)", 0) \
-    M(MySQLDataTypesSupport, mysql_datatypes_support_level, {}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
+    M(MySQLDataTypesSupport, mysql_datatypes_support_level, std::vector<MySQLDataTypesSupport>{}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
 
 DECLARE_SETTINGS_TRAITS(MySQLSettingsTraits, LIST_OF_MYSQL_SETTINGS)
 
diff --git a/tests/queries/0_stateless/02815_join_algorithm_setting.sql b/tests/queries/0_stateless/02815_join_algorithm_setting.sql
index fada864a07e..a4c24bb60f9 100644
--- a/tests/queries/0_stateless/02815_join_algorithm_setting.sql
+++ b/tests/queries/0_stateless/02815_join_algorithm_setting.sql
@@ -1,8 +1,5 @@
 -- Tags: use-rocksdb
 
--- DirectJoin still requires fixes for analyzer
-SET allow_experimental_analyzer = 0;
-
 DROP TABLE IF EXISTS rdb;
 DROP TABLE IF EXISTS t2;
 
@@ -15,8 +12,8 @@ INSERT INTO t2 VALUES (4), (5), (6);
 
 SELECT value == 'default' FROM system.settings WHERE name = 'join_algorithm';
 
-SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
-    EXPLAIN PLAN description = 1
+SELECT countIf(explain like '%Algorithm: DirectKeyValueJoin%'), countIf(explain like '%Algorithm: HashJoin%') FROM (
+    EXPLAIN PLAN actions = 1
     SELECT * FROM ( SELECT k AS key FROM t2 ) AS t2
     INNER JOIN rdb ON rdb.key = t2.key
     ORDER BY key ASC
@@ -26,8 +23,8 @@ SET join_algorithm = 'direct, hash';
 
 SELECT value == 'direct,hash' FROM system.settings WHERE name = 'join_algorithm';
 
-SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
-    EXPLAIN PLAN description = 1
+SELECT countIf(explain like '%Algorithm: DirectKeyValueJoin%'), countIf(explain like '%Algorithm: HashJoin%') FROM (
+    EXPLAIN PLAN actions = 1
     SELECT * FROM ( SELECT k AS key FROM t2 ) AS t2
     INNER JOIN rdb ON rdb.key = t2.key
     ORDER BY key ASC
@@ -37,8 +34,8 @@ SET join_algorithm = 'hash, direct';
 
 SELECT value == 'hash,direct' FROM system.settings WHERE name = 'join_algorithm';
 
-SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
-    EXPLAIN PLAN description = 1
+SELECT countIf(explain like '%Algorithm: DirectKeyValueJoin%'), countIf(explain like '%Algorithm: HashJoin%') FROM (
+    EXPLAIN PLAN actions = 1
     SELECT * FROM ( SELECT k AS key FROM t2 ) AS t2
     INNER JOIN rdb ON rdb.key = t2.key
     ORDER BY key ASC
@@ -48,14 +45,14 @@ SET join_algorithm = 'grace_hash,hash';
 
 SELECT value == 'grace_hash,hash' FROM system.settings WHERE name = 'join_algorithm';
 
-SELECT countIf(explain like '%FillRightFirst%GraceHashJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
-    EXPLAIN PLAN description = 1
+SELECT countIf(explain like '%Algorithm: GraceHashJoin%'), countIf(explain like '%Algorithm: HashJoin%') FROM (
+    EXPLAIN PLAN actions = 1
     SELECT * FROM ( SELECT number AS key, number * 10 AS key2 FROM numbers_mt(10) ) AS t1
     JOIN ( SELECT k AS key, k + 100 AS key2 FROM t2 ) AS t2 ON t1.key = t2.key OR t1.key2 = t2.key2
 );
 
-SELECT countIf(explain like '%FillRightFirst%GraceHashJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
-    EXPLAIN PLAN description = 1
+SELECT countIf(explain like '%Algorithm: GraceHashJoin%'), countIf(explain like '%Algorithm: HashJoin%') FROM (
+    EXPLAIN PLAN actions = 1
     SELECT * FROM ( SELECT number AS key, number * 10 AS key2 FROM numbers_mt(10) ) AS t1
     JOIN ( SELECT k AS key, k + 100 AS key2 FROM t2 ) AS t2 ON t1.key = t2.key
 );
@@ -81,19 +78,19 @@ LAYOUT(HASHED());
 
 SET join_algorithm = 'default';
 
-SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
-    EXPLAIN description = 1
+SELECT countIf(explain like '%Algorithm: DirectKeyValueJoin%'), countIf(explain like '%Algorithm: HashJoin%') FROM (
+    EXPLAIN actions = 1
     SELECT s FROM (SELECT toUInt64(9911) id) t1 INNER JOIN dict t2 USING (id)
 );
 
 SET join_algorithm = 'direct,hash';
-SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
-    EXPLAIN description = 1
+SELECT countIf(explain like '%Algorithm: DirectKeyValueJoin%'), countIf(explain like '%Algorithm: HashJoin%') FROM (
+    EXPLAIN actions = 1
     SELECT s FROM (SELECT toUInt64(9911) id) t1 INNER JOIN dict t2 USING (id)
 );
 
 SET join_algorithm = 'hash,direct';
-SELECT countIf(explain like '%FilledJoin%DirectJoin%'), countIf(explain like '%FillRightFirst% HashJoin%') FROM (
-    EXPLAIN description = 1
+SELECT countIf(explain like '%Algorithm: DirectKeyValueJoin%'), countIf(explain like '%Algorithm: HashJoin%') FROM (
+    EXPLAIN actions = 1
     SELECT s FROM (SELECT toUInt64(9911) id) t1 INNER JOIN dict t2 USING (id)
 );

From 822c0675a23cec8a4c4b70d977b9676cb60ff739 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 15 Nov 2023 11:23:16 +0000
Subject: [PATCH 1094/1097] Fix predicate_push_down_filled_join_fix.reference

---
 .../02675_predicate_push_down_filled_join_fix.reference       | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
index 986ecffcdf8..2630c5b95b6 100644
--- a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
+++ b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
@@ -13,6 +13,10 @@ Positions: 3 0 1
   Header: id_0 UInt64
           value_1 String
           value_2 String
+  Type: INNER
+  Strictness: ALL
+  Algorithm: HashJoin
+  Clauses: [(id_0) = (id)]
     Filter (( + (JOIN actions + Change column names to column identifiers)))
     Header: id_0 UInt64
             value_1 String

From b74840597637a7ff68fd64e2f217d953a734f4af Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 28 Nov 2023 19:08:03 +0000
Subject: [PATCH 1095/1097] Intorduce *List definition for muli enum settings

---
 src/Core/Settings.h                | 2 +-
 src/Core/SettingsFields.h          | 3 ++-
 src/Storages/MySQL/MySQLSettings.h | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ddc803af0ab..6d61cfdfd20 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -592,7 +592,7 @@ class IColumn;
     M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \
     M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
     M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \
-    M(MySQLDataTypesSupport, mysql_datatypes_support_level, std::vector<MySQLDataTypesSupport>{}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
+    M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
     M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
     M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
     M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h
index c2a3c5f2b3b..22c1cf8a267 100644
--- a/src/Core/SettingsFields.h
+++ b/src/Core/SettingsFields.h
@@ -559,7 +559,8 @@ void SettingFieldMultiEnum<EnumT, Traits>::readBinary(ReadBuffer & in)
         static EnumType fromString(std::string_view str); \
     }; \
     \
-    using SettingField##NEW_NAME = SettingFieldMultiEnum<ENUM_TYPE, SettingField##NEW_NAME##Traits>;
+    using SettingField##NEW_NAME = SettingFieldMultiEnum<ENUM_TYPE, SettingField##NEW_NAME##Traits>; \
+    using NEW_NAME##List = typename SettingField##NEW_NAME::ValueType;
 
 /// NOLINTNEXTLINE
 #define IMPLEMENT_SETTING_MULTI_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \
diff --git a/src/Storages/MySQL/MySQLSettings.h b/src/Storages/MySQL/MySQLSettings.h
index 2526fc3673b..a82bebd2506 100644
--- a/src/Storages/MySQL/MySQLSettings.h
+++ b/src/Storages/MySQL/MySQLSettings.h
@@ -24,7 +24,7 @@ class ASTSetQuery;
     M(Bool, connection_auto_close, true, "Auto-close connection after query execution, i.e. disable connection reuse.", 0) \
     M(UInt64, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connect timeout (in seconds)", 0) \
     M(UInt64, read_write_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Read/write timeout (in seconds)", 0) \
-    M(MySQLDataTypesSupport, mysql_datatypes_support_level, std::vector<MySQLDataTypesSupport>{}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
+    M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
 
 DECLARE_SETTINGS_TRAITS(MySQLSettingsTraits, LIST_OF_MYSQL_SETTINGS)
 

From e33efaff9f2c161e0c2a4d120c0537cc55808ebf Mon Sep 17 00:00:00 2001
From: Denny Crane <deniszhuravlov@gmail.com>
Date: Tue, 28 Nov 2023 15:56:51 -0400
Subject: [PATCH 1096/1097] Revert "Update date-time-functions.md"

---
 docs/en/sql-reference/functions/date-time-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 65038f179a4..565486275e6 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -2540,7 +2540,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 |----------|---------------------------------------------------------|------------|
 | %a       | abbreviated weekday name (Mon-Sun)                      | Mon        |
 | %b       | abbreviated month name (Jan-Dec)                        | Jan        |
-| %m       | month as an integer number (01-12), see 'Note 3' below  | 01         |
+| %c       | month as an integer number (01-12), see 'Note 3' below  | 01         |
 | %C       | year divided by 100 and truncated to integer (00-99)    | 20         |
 | %d       | day of the month, zero-padded (01-31)                   | 02         |
 | %D       | Short MM/DD/YY date, equivalent to %m/%d/%y             | 01/02/18   |

From 7d2699075284a1681ff4e3497080776605aa8f34 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 28 Nov 2023 19:54:55 +0000
Subject: [PATCH 1097/1097] Follow-up to #56490: Fix build with
 -DENABLE_LIBRARIES=0

---
 src/Storages/StorageFuzzJSON.cpp              | 6 +++++-
 src/Storages/StorageFuzzJSON.h                | 4 ++++
 src/Storages/registerStorages.cpp             | 4 ++++
 src/TableFunctions/TableFunctionFuzzJSON.cpp  | 2 ++
 src/TableFunctions/TableFunctionFuzzJSON.h    | 4 ++++
 src/TableFunctions/registerTableFunctions.cpp | 2 ++
 src/TableFunctions/registerTableFunctions.h   | 2 ++
 7 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp
index b4025f58a46..54e89d3c89f 100644
--- a/src/Storages/StorageFuzzJSON.cpp
+++ b/src/Storages/StorageFuzzJSON.cpp
@@ -1,3 +1,7 @@
+#include <Storages/StorageFuzzJSON.h>
+
+#if USE_SIMDJSON || USE_RAPIDJSON
+
 #include <optional>
 #include <random>
 #include <string_view>
@@ -6,7 +10,6 @@
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Storages/NamedCollectionsHelpers.h>
 #include <Storages/StorageFactory.h>
-#include <Storages/StorageFuzzJSON.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <Common/JSONParsers/RapidJSONParser.h>
 #include <Common/JSONParsers/SimdJSONParser.h>
@@ -693,3 +696,4 @@ void registerStorageFuzzJSON(StorageFactory & factory)
 }
 
 }
+#endif
diff --git a/src/Storages/StorageFuzzJSON.h b/src/Storages/StorageFuzzJSON.h
index 98cde1e83fd..f1d78fba85c 100644
--- a/src/Storages/StorageFuzzJSON.h
+++ b/src/Storages/StorageFuzzJSON.h
@@ -4,6 +4,9 @@
 #include <Storages/StorageConfiguration.h>
 #include <Common/randomSeed.h>
 
+#include "config.h"
+
+#if USE_SIMDJSON || USE_RAPIDJSON
 namespace DB
 {
 
@@ -66,3 +69,4 @@ private:
 };
 
 }
+#endif
diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp
index c05ff7d613c..dea9feaf28b 100644
--- a/src/Storages/registerStorages.cpp
+++ b/src/Storages/registerStorages.cpp
@@ -25,7 +25,9 @@ void registerStorageLiveView(StorageFactory & factory);
 void registerStorageGenerateRandom(StorageFactory & factory);
 void registerStorageExecutable(StorageFactory & factory);
 void registerStorageWindowView(StorageFactory & factory);
+#if USE_RAPIDJSON || USE_SIMDJSON
 void registerStorageFuzzJSON(StorageFactory & factory);
+#endif
 
 #if USE_AWS_S3
 void registerStorageS3(StorageFactory & factory);
@@ -124,7 +126,9 @@ void registerStorages()
     registerStorageGenerateRandom(factory);
     registerStorageExecutable(factory);
     registerStorageWindowView(factory);
+#if USE_RAPIDJSON || USE_SIMDJSON
     registerStorageFuzzJSON(factory);
+#endif
 
 #if USE_AWS_S3
     registerStorageS3(factory);
diff --git a/src/TableFunctions/TableFunctionFuzzJSON.cpp b/src/TableFunctions/TableFunctionFuzzJSON.cpp
index 629fbd54b62..dddefccf815 100644
--- a/src/TableFunctions/TableFunctionFuzzJSON.cpp
+++ b/src/TableFunctions/TableFunctionFuzzJSON.cpp
@@ -1,5 +1,6 @@
 #include <TableFunctions/TableFunctionFuzzJSON.h>
 
+#if USE_RAPIDJSON || USE_SIMDJSON
 #include <DataTypes/DataTypeString.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <TableFunctions/TableFunctionFactory.h>
@@ -57,3 +58,4 @@ void registerTableFunctionFuzzJSON(TableFunctionFactory & factory)
 }
 
 }
+#endif
diff --git a/src/TableFunctions/TableFunctionFuzzJSON.h b/src/TableFunctions/TableFunctionFuzzJSON.h
index 821e58bbf8a..38aa532d49d 100644
--- a/src/TableFunctions/TableFunctionFuzzJSON.h
+++ b/src/TableFunctions/TableFunctionFuzzJSON.h
@@ -5,6 +5,9 @@
 #include <Storages/StorageFuzzJSON.h>
 #include <TableFunctions/ITableFunction.h>
 
+#include "config.h"
+
+#if USE_RAPIDJSON || USE_SIMDJSON
 namespace DB
 {
 
@@ -37,3 +40,4 @@ private:
 };
 
 }
+#endif
diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp
index 33029475dac..4996ffd47fd 100644
--- a/src/TableFunctions/registerTableFunctions.cpp
+++ b/src/TableFunctions/registerTableFunctions.cpp
@@ -22,7 +22,9 @@ void registerTableFunctions()
     registerTableFunctionGenerate(factory);
     registerTableFunctionMongoDB(factory);
     registerTableFunctionRedis(factory);
+#if USE_RAPIDJSON || USE_SIMDJSON
     registerTableFunctionFuzzJSON(factory);
+#endif
 
 #if USE_AWS_S3
     registerTableFunctionS3(factory);
diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h
index db945168282..7702352c3c3 100644
--- a/src/TableFunctions/registerTableFunctions.h
+++ b/src/TableFunctions/registerTableFunctions.h
@@ -19,7 +19,9 @@ void registerTableFunctionInput(TableFunctionFactory & factory);
 void registerTableFunctionGenerate(TableFunctionFactory & factory);
 void registerTableFunctionMongoDB(TableFunctionFactory & factory);
 void registerTableFunctionRedis(TableFunctionFactory & factory);
+#if USE_RAPIDJSON || USE_SIMDJSON
 void registerTableFunctionFuzzJSON(TableFunctionFactory & factory);
+#endif
 
 #if USE_AWS_S3
 void registerTableFunctionS3(TableFunctionFactory & factory);